Index: projects/clang1000-import/bin/pwait/pwait.1
===================================================================
--- projects/clang1000-import/bin/pwait/pwait.1	(revision 357178)
+++ projects/clang1000-import/bin/pwait/pwait.1	(revision 357179)
@@ -1,101 +1,103 @@
 .\"
 .\" Copyright (c) 2004-2009, Jilles Tjoelker
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with
 .\" or without modification, are permitted provided that the
 .\" following conditions are met:
 .\"
 .\" 1. Redistributions of source code must retain the above
 .\"    copyright notice, this list of conditions and the
 .\"    following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the
 .\"    above copyright notice, this list of conditions and
 .\"    the following disclaimer in the documentation and/or
 .\"    other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
 .\" CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
 .\" WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 .\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 .\" PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 .\" COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY
 .\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 .\" PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 .\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 .\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 .\" NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 .\" USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
 .\" OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd March 7, 2017
+.Dd January 26, 2020
 .Dt PWAIT 1
 .Os
 .Sh NAME
 .Nm pwait
 .Nd wait for processes to terminate
 .Sh SYNOPSIS
 .Nm
 .Op Fl t Ar duration
-.Op Fl v
+.Op Fl ov
 .Ar pid
 \&...
 .Sh DESCRIPTION
 The
 .Nm
 utility will wait until each of the given processes has terminated.
 .Pp
 The following option is available:
 .Bl -tag -width indent
+.It Fl o
+Exit when any of the given processes has terminated.
 .It Fl t Ar duration
 If any process is still running after
 .Ar duration ,
 .Nm
 will exit.
 The
 .Ar duration
 value can be integer or decimal numbers.
 Values without unit symbols are interpreted as seconds.
 .Pp
 Supported unit symbols are:
 .Bl -tag -width indent -compact
 .It s
 seconds
 .It m
 minutes
 .It h
 hours
 .El
 .It Fl v
 Print the exit status when each process terminates.
 .El
 .Sh EXIT STATUS
 The
 .Nm
 utility exits 0 on success, and >0 if an error occurs.
 .Pp
 If the
 .Fl t
 flag is specified and a timeout occurs, the exit status will be 124.
 .Pp
 Invalid pids elicit a warning message but are otherwise ignored.
 .Sh SEE ALSO
 .Xr kill 1 ,
 .Xr pkill 1 ,
 .Xr ps 1 ,
 .Xr wait 1 ,
 .Xr kqueue 2
 .Sh NOTES
 .Nm
 is not a substitute for the
 .Xr wait 1
 builtin
 as it will not clean up any zombies or state in the parent process.
 .Sh HISTORY
 A
 .Nm
 command first appeared in SunOS 5.8.
Index: projects/clang1000-import/bin/pwait/pwait.c
===================================================================
--- projects/clang1000-import/bin/pwait/pwait.c	(revision 357178)
+++ projects/clang1000-import/bin/pwait/pwait.c	(revision 357179)
@@ -1,195 +1,219 @@
 /*-
  * Copyright (c) 2004-2009, Jilles Tjoelker
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with
  * or without modification, are permitted provided that the
  * following conditions are met:
  *
  * 1. Redistributions of source code must retain the above
  *    copyright notice, this list of conditions and the
  *    following disclaimer.
  * 2. Redistributions in binary form must reproduce the
  *    above copyright notice, this list of conditions and
  *    the following disclaimer in the documentation and/or
  *    other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
  * OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/event.h>
 #include <sys/time.h>
 #include <sys/wait.h>
 
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sysexits.h>
 #include <unistd.h>
 
 static void
 usage(void)
 {
 
-	fprintf(stderr, "usage: pwait [-t timeout] [-v] pid ...\n");
-	exit(EX_USAGE);
+	errx(EX_USAGE, "usage: pwait [-t timeout] [-ov] pid ...");
 }
 
 /*
  * pwait - wait for processes to terminate
  */
 int
 main(int argc, char *argv[])
 {
 	struct itimerval itv;
-	int kq;
 	struct kevent *e;
-	int tflag, verbose;
-	int opt, nleft, n, i, duplicate, status;
+	int oflag, tflag, verbose;
+	int i, kq, n, nleft, opt, status;
 	long pid;
-	char *s, *end;
+	char *end, *s;
 	double timeout;
 
-	tflag = verbose = 0;
+	oflag = 0;
+	tflag = 0;
+	verbose = 0;
 	memset(&itv, 0, sizeof(itv));
-	while ((opt = getopt(argc, argv, "t:v")) != -1) {
+
+	while ((opt = getopt(argc, argv, "ot:v")) != -1) {
 		switch (opt) {
+		case 'o':
+			oflag = 1;
+			break;
 		case 't':
 			tflag = 1;
 			errno = 0;
 			timeout = strtod(optarg, &end);
-			if (end == optarg || errno == ERANGE ||
-			    timeout < 0)
+			if (end == optarg || errno == ERANGE || timeout < 0) {
 				errx(EX_DATAERR, "timeout value");
+			}
 			switch(*end) {
 			case 0:
 			case 's':
 				break;
 			case 'h':
 				timeout *= 60;
 				/* FALLTHROUGH */
 			case 'm':
 				timeout *= 60;
 				break;
 			default:
 				errx(EX_DATAERR, "timeout unit");
 			}
-			if (timeout > 100000000L)
+			if (timeout > 100000000L) {
 				errx(EX_DATAERR, "timeout value");
+			}
 			itv.it_value.tv_sec = (time_t)timeout;
 			timeout -= (time_t)timeout;
 			itv.it_value.tv_usec =
 			    (suseconds_t)(timeout * 1000000UL);
 			break;
 		case 'v':
 			verbose = 1;
 			break;
 		default:
 			usage();
 			/* NOTREACHED */
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
-	if (argc == 0)
+	if (argc == 0) {
 		usage();
+	}
 
 	kq = kqueue();
-	if (kq == -1)
-		err(1, "kqueue");
+	if (kq == -1) {
+		err(EX_OSERR, "kqueue");
+	}
 
 	e = malloc((argc + tflag) * sizeof(struct kevent));
-	if (e == NULL)
-		err(1, "malloc");
+	if (e == NULL) {
+		err(EX_OSERR, "malloc");
+	}
 	nleft = 0;
 	for (n = 0; n < argc; n++) {
 		s = argv[n];
-		if (!strncmp(s, "/proc/", 6)) /* Undocumented Solaris compat */
+		/* Undocumented Solaris compat */
+		if (!strncmp(s, "/proc/", 6)) {
 			s += 6;
+		}
 		errno = 0;
 		pid = strtol(s, &end, 10);
 		if (pid < 0 || *end != '\0' || errno != 0) {
 			warnx("%s: bad process id", s);
 			continue;
 		}
-		duplicate = 0;
-		for (i = 0; i < nleft; i++)
-			if (e[i].ident == (uintptr_t)pid)
-				duplicate = 1;
-		if (!duplicate) {
-			EV_SET(e + nleft, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT,
-			    0, NULL);
-			if (kevent(kq, e + nleft, 1, NULL, 0, NULL) == -1)
-				warn("%ld", pid);
-			else
-				nleft++;
+		for (i = 0; i < nleft; i++) {
+			if (e[i].ident == (uintptr_t)pid) {
+				break;
+			}
 		}
+		if (i < nleft) {
+			/* Duplicate. */
+			continue;
+		}
+		EV_SET(e + nleft, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);
+		if (kevent(kq, e + nleft, 1, NULL, 0, NULL) == -1) {
+			warn("%ld", pid);
+			if (oflag) {
+				exit(EX_OK);
+			}
+		} else {
+			nleft++;
+		}
 	}
 
-	if (tflag) {
+	if (nleft > 0 && tflag) {
 		/*
 		 * Explicitly detect SIGALRM so that an exit status of 124
 		 * can be returned rather than 142.
 		 */
 		EV_SET(e + nleft, SIGALRM, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
-		if (kevent(kq, e + nleft, 1, NULL, 0, NULL) == -1)
+		if (kevent(kq, e + nleft, 1, NULL, 0, NULL) == -1) {
 			err(EX_OSERR, "kevent");
+		}
 		/* Ignore SIGALRM to not interrupt kevent(2). */
 		signal(SIGALRM, SIG_IGN);
-		if (setitimer(ITIMER_REAL, &itv, NULL) == -1)
+		if (setitimer(ITIMER_REAL, &itv, NULL) == -1) {
 			err(EX_OSERR, "setitimer");
+		}
 	}
 	while (nleft > 0) {
 		n = kevent(kq, NULL, 0, e, nleft + tflag, NULL);
-		if (n == -1)
-			err(1, "kevent");
+		if (n == -1) {
+			err(EX_OSERR, "kevent");
+		}
 		for (i = 0; i < n; i++) {
 			if (e[i].filter == EVFILT_SIGNAL) {
-				if (verbose)
+				if (verbose) {
 					printf("timeout\n");
-				return (124);
+				}
+				exit(124);
 			}
 			if (verbose) {
 				status = e[i].data;
-				if (WIFEXITED(status))
+				if (WIFEXITED(status)) {
 					printf("%ld: exited with status %d.\n",
 					    (long)e[i].ident,
 					    WEXITSTATUS(status));
-				else if (WIFSIGNALED(status))
+				} else if (WIFSIGNALED(status)) {
 					printf("%ld: killed by signal %d.\n",
 					    (long)e[i].ident,
 					    WTERMSIG(status));
-				else
+				} else {
 					printf("%ld: terminated.\n",
 					    (long)e[i].ident);
+				}
+			}
+			if (oflag) {
+				exit(EX_OK);
 			}
 			--nleft;
 		}
 	}
 
 	exit(EX_OK);
 }
Index: projects/clang1000-import/bin/pwait/tests/pwait_test.sh
===================================================================
--- projects/clang1000-import/bin/pwait/tests/pwait_test.sh	(revision 357178)
+++ projects/clang1000-import/bin/pwait/tests/pwait_test.sh	(revision 357179)
@@ -1,242 +1,322 @@
 # $FreeBSD$
 
 atf_test_case basic
 basic_head()
 {
 	atf_set "descr" "Basic tests on pwait(1) utility"
 }
 
 basic_body()
 {
 	sleep 1 &
 	p1=$!
 
 	sleep 5 &
 	p5=$!
 
 	sleep 10 &
 	p10=$!
 
 	atf_check \
 		-o empty \
 		-e empty \
 		-s exit:0 \
 		timeout --preserve-status 15 pwait $p1 $p5 $p10
 
 	atf_check \
 		-o empty \
 		-e inline:"kill: $p1: No such process\n" \
 		-s exit:1 \
 		kill -0 $p1
 
 	atf_check \
 		-o empty \
 		-e inline:"kill: $p5: No such process\n" \
 		-s exit:1 \
 		kill -0 $p5
 
 	atf_check \
 		-o empty \
 		-e inline:"kill: $p10: No such process\n" \
 		-s exit:1 \
 		kill -0 $p10
 
 }
 
 basic_cleanup()
 {
 	kill $p1 $p5 $p10 >/dev/null 2>&1
 	wait $p1 $p5 $p10 >/dev/null 2>&1
 }
 
 atf_test_case time_unit
 time_unit_head()
 {
 	atf_set "descr" "Test parsing the timeout unit and value"
 }
 
 time_unit_body()
 {
 	init=1
 
 	atf_check \
 		-o empty \
 		-e inline:"pwait: timeout unit\n" \
 		-s exit:65 \
 		timeout --preserve-status 2 pwait -t 1d $init
 
 	atf_check \
 		-o empty \
 		-e inline:"pwait: timeout unit\n" \
 		-s exit:65 \
 		timeout --preserve-status 2 pwait -t 1d $init
 
 	atf_check \
 		-o empty \
 		-e inline:"pwait: timeout value\n" \
 		-s exit:65 \
 		timeout --preserve-status 2 pwait -t -1 $init
 
 	atf_check \
 		-o empty \
 		-e inline:"pwait: timeout value\n" \
 		-s exit:65 \
 		timeout --preserve-status 2 pwait -t 100000001 $init
 
 	# These long duration cases are expected to timeout from the
 	# timeout utility rather than pwait -t.
 	atf_check \
 		-o empty \
 		-e empty \
 		-s exit:143 \
 		timeout --preserve-status 2 pwait -t 100000000 $init
 
 	atf_check \
 		-o empty \
 		-e empty \
 		-s exit:143 \
 		timeout --preserve-status 2 pwait -t 1h $init
 
 	atf_check \
 		-o empty \
 		-e empty \
 		-s exit:143 \
 		timeout --preserve-status 2 pwait -t 1.5h $init
 
 	atf_check \
 		-o empty \
 		-e empty \
 		-s exit:143 \
 		timeout --preserve-status 2 pwait -t 1m $init
 
 	atf_check \
 		-o empty \
 		-e empty \
 		-s exit:143 \
 		timeout --preserve-status 2 pwait -t 1.5m $init
 
 	atf_check \
 		-o empty \
 		-e empty \
 		-s exit:143 \
 		timeout --preserve-status 2 pwait -t 0 $init
 
 	# The rest are fast enough that pwait -t is expected to trigger
 	# the timeout.
 	atf_check \
 		-o empty \
 		-e empty \
 		-s exit:124 \
 		timeout --preserve-status 2 pwait -t 1s $init
 
 	atf_check \
 		-o empty \
 		-e empty \
 		-s exit:124 \
 		timeout --preserve-status 2 pwait -t 1.5s $init
 
 	atf_check \
 		-o empty \
 		-e empty \
 		-s exit:124 \
 		timeout --preserve-status 2 pwait -t 1 $init
 
 	atf_check \
 		-o empty \
 		-e empty \
 		-s exit:124 \
 		timeout --preserve-status 2 pwait -t 1.5 $init
 
 	atf_check \
 		-o empty \
 		-e empty \
 		-s exit:124 \
 		timeout --preserve-status 2 pwait -t 0.5 $init
 }
 
 atf_test_case timeout_trigger_timeout
 timeout_trigger_timeout_head()
 {
 	atf_set "descr" "Test that exceeding the timeout is detected"
 }
 
 timeout_trigger_timeout_body()
 {
 	sleep 10 &
 	p10=$!
 
 	atf_check \
 		-o empty \
 		-e empty \
 		-s exit:124 \
 		timeout --preserve-status 6.5 pwait -t 5 $p10
 }
 
 timeout_trigger_timeout_cleanup()
 {
 	kill $p10 >/dev/null 2>&1
 	wait $p10 >/dev/null 2>&1
 }
 
 atf_test_case timeout_no_timeout
 timeout_no_timeout_head()
 {
 	atf_set "descr" "Test that not exceeding the timeout continues to wait"
 }
 
 timeout_no_timeout_body()
 {
 	sleep 10 &
 	p10=$!
 
 	atf_check \
 		-o empty \
 		-e empty \
 		-s exit:0 \
 		timeout --preserve-status 11.5 pwait -t 12 $p10
 }
 
 timeout_no_timeout_cleanup()
 {
 	kill $p10 >/dev/null 2>&1
 	wait $p10 >/dev/null 2>&1
 }
 
 atf_test_case timeout_many
 timeout_many_head()
 {
 	atf_set "descr" "Test timeout on many processes"
 }
 
 timeout_many_body()
 {
 	sleep 1 &
 	p1=$!
 
 	sleep 5 &
 	p5=$!
 
 	sleep 10 &
 	p10=$!
 
 	atf_check \
 		-o empty \
 		-e empty \
 		-s exit:124 \
 		timeout --preserve-status 7.5 pwait -t 6 $p1 $p5 $p10
 }
 
 timeout_many_cleanup()
 {
 	kill $p1 $p5 $p10 >/dev/null 2>&1
 	wait $p1 $p5 $p10 >/dev/null 2>&1
 }
 
+atf_test_case or_flag
+or_flag_head()
+{
+	atf_set "descr" "Test OR flag"
+}
+
+or_flag_body()
+{
+	sleep 2 &
+	p2=$!
+
+	sleep 4 &
+	p4=$!
+
+	sleep 6 &
+	p6=$!
+
+	atf_check \
+		-o inline:"$p2: exited with status 0.\n" \
+		-e empty \
+		-s exit:0 \
+		timeout --preserve-status 15 pwait -o -v $p2 $p4 $p6
+
+	atf_check \
+		-o empty \
+		-e inline:"pwait: $p2: No such process\n" \
+		-s exit:0 \
+		timeout --preserve-status 15 pwait -o $p2 $p4 $p6
+
+	atf_check \
+		-o empty \
+		-e empty \
+		-s exit:0 \
+		timeout --preserve-status 15 pwait -o $p4 $p6
+
+	atf_check \
+		-o empty \
+		-e inline:"pwait: $p4: No such process\n" \
+		-s exit:0 \
+		timeout --preserve-status 15 pwait -o $p4 $p6
+
+	atf_check \
+		-o inline:"$p6: exited with status 0.\n" \
+		-e empty \
+		-s exit:0 \
+		timeout --preserve-status 15 pwait -o -v $p6
+
+	atf_check \
+		-o empty \
+		-e inline:"pwait: $p6: No such process\n" \
+		-s exit:0 \
+		timeout --preserve-status 15 pwait -o $p6
+
+	atf_check \
+		-o empty \
+		-e inline:"kill: $p2: No such process\n" \
+		-s exit:1 \
+		kill -0 $p2
+
+	atf_check \
+		-o empty \
+		-e inline:"kill: $p4: No such process\n" \
+		-s exit:1 \
+		kill -0 $p4
+
+	atf_check \
+		-o empty \
+		-e inline:"kill: $p6: No such process\n" \
+		-s exit:1 \
+		kill -0 $p6
+
+}
+
+or_flag_cleanup()
+{
+	kill $p2 $p4 $p6 >/dev/null 2>&1
+	wait $p2 $p4 $p6 >/dev/null 2>&1
+}
+
 atf_init_test_cases()
 {
 	atf_add_test_case basic
 	atf_add_test_case time_unit
 	atf_add_test_case timeout_trigger_timeout
 	atf_add_test_case timeout_no_timeout
 	atf_add_test_case timeout_many
+	atf_add_test_case or_flag
 }
Index: projects/clang1000-import/contrib/libxo/configure.ac
===================================================================
--- projects/clang1000-import/contrib/libxo/configure.ac	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/configure.ac	(revision 357179)
@@ -1,500 +1,500 @@
 #
 # $Id$
 #
 # See ./INSTALL for more info
 #
 
 #
 # Release numbering: even numbered dot releases are official ones, and
 # odd numbers are development ones.  The svn version of this file will
 # only (ONLY!) ever (EVER!) contain odd numbers, so I'll always know if
 # a particular user has the dist or svn release.
 #
 
 AC_PREREQ(2.2)
-AC_INIT([libxo], [1.3.1], [phil@juniper.net])
+AC_INIT([libxo], [1.4.0], [phil@juniper.net])
 AM_INIT_AUTOMAKE([-Wall -Werror foreign -Wno-portability])
 
 # Support silent build rules.  Requires at least automake-1.11.
 # Disable with "configure --disable-silent-rules" or "make V=1"
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 
 AC_PROG_CC
 AC_PROG_INSTALL
 AC_CONFIG_MACRO_DIR([m4])
 AC_PROG_LN_S
 
 # Must be after AC_PROG_AR
 LT_INIT([dlopen shared])
 
 AC_PATH_PROG(BASENAME, basename, /usr/bin/basename)
 AC_PATH_PROG(BISON, bison, /usr/bin/bison)
 AC_PATH_PROG(CAT, cat, /bin/cat)
 AC_PATH_PROG(CHMOD, chmod, /bin/chmod)
 AC_PATH_PROG(CP, cp, /bin/cp)
 AC_PATH_PROG(DIFF, diff, /usr/bin/diff)
 AC_PATH_PROG(MKDIR, mkdir, /bin/mkdir)
 AC_PATH_PROG(MV, mv, /bin/mv)
 AC_PATH_PROG(RM, rm, /bin/rm)
 AC_PATH_PROG(SED, sed, /bin/sed)
 
 AC_STDC_HEADERS
 
 # Checks for typedefs, structures, and compiler characteristics.
 AC_C_INLINE
 AC_TYPE_SIZE_T
 
 # Checks for library functions.
 AC_FUNC_ALLOCA
 AC_FUNC_MALLOC
 AC_FUNC_REALLOC
 AC_CHECK_FUNCS([bzero memmove strchr strcspn strerror strspn])
 AC_CHECK_FUNCS([sranddev srand strlcpy])
 AC_CHECK_FUNCS([fdopen getrusage])
 AC_CHECK_FUNCS([gettimeofday ctime])
 AC_CHECK_FUNCS([getpass])
 AC_CHECK_FUNCS([getprogname])
 AC_CHECK_FUNCS([sysctlbyname])
 AC_CHECK_FUNCS([flock])
 AC_CHECK_FUNCS([asprintf])
 AC_CHECK_FUNCS([__flbf])
 AC_CHECK_FUNCS([sysctlbyname])
 
 
 AC_CHECK_HEADERS([dlfcn.h])
 AC_CHECK_HEADERS([dlfcn.h])
 AC_CHECK_HEADERS([stdio_ext.h])
 AC_CHECK_HEADERS([tzfile.h])
 AC_CHECK_HEADERS([stdtime/tzfile.h])
 AC_CHECK_FUNCS([dlfunc])
 
 AC_CHECK_HEADERS([sys/time.h])
 AC_CHECK_HEADERS([ctype.h errno.h stdio.h stdlib.h])
 AC_CHECK_HEADERS([string.h sys/param.h unistd.h ])
 AC_CHECK_HEADERS([sys/sysctl.h])
 AC_CHECK_HEADERS([threads.h])
 AC_CHECK_HEADERS([monitor.h])
 
 dnl humanize_number(3) is a great function, but it's not standard.
 dnl Note Macosx has the function in libutil.a but doesn't ship the
 dnl header file, so I'll need to carry my own implementation.  See:
 dnl    https://devforums.apple.com/thread/271121
 AC_CHECK_HEADERS([libutil.h])
 AC_CHECK_LIB([util], [humanize_number],
      [HAVE_HUMANIZE_NUMBER=$ac_cv_header_libutil_h],
      [HAVE_HUMANIZE_NUMBER=no])
 
 AC_MSG_RESULT(humanize_number results: :${HAVE_HUMANIZE_NUMBER}:${ac_cv_header_libutil_h}:)
 
 if test "$HAVE_HUMANIZE_NUMBER" = "yes"; then
     AC_DEFINE([HAVE_HUMANIZE_NUMBER], [1], [humanize_number(3)])
 fi
 
 AM_CONDITIONAL([HAVE_HUMANIZE_NUMBER], [test "$HAVE_HUMANIZE_NUMBER" = "yes"])
 
 AC_ARG_ENABLE([gettext],
     [  --disable-gettext  Turn off support for gettext],
     [GETTEXT_ENABLE=$enableval],
     [GETTEXT_ENABLE=yes])
 
 dnl Looking for gettext(), assumably in libintl
 AC_ARG_WITH(gettext,
         [  --with-gettext=[PFX]           Specify location of gettext installation],
         [GETTEXT_PREFIX=$withval],
 	[GETTEXT_PREFIX=/usr],
 )
 
 HAVE_GETTEXT=no
 
 if test "$GETTEXT_ENABLE" != "no"; then
 
   AC_MSG_CHECKING([gettext in ${GETTEXT_PREFIX}])
 
   _save_cflags="$CFLAGS"
   CFLAGS="$CFLAGS -I${GETTEXT_PREFIX}/include -L${GETTEXT_PREFIX}/lib -Werror -lintl"
   AC_LINK_IFELSE([AC_LANG_SOURCE([[#include <libintl.h>]
              [int main() {char *cp = dgettext(NULL, "xx"); return 0; }]])],
              [HAVE_GETTEXT=yes],
              [HAVE_GETTEXT=no])
   CFLAGS="$_save_cflags"
 
   AC_MSG_RESULT([$HAVE_GETTEXT])
 
   if test "$HAVE_GETTEXT" != "yes"; then
       GETTEXT_PREFIX=/opt/local
       AC_MSG_CHECKING([gettext in ${GETTEXT_PREFIX}])
 
       _save_cflags="$CFLAGS"
       CFLAGS="$CFLAGS -I${GETTEXT_PREFIX}/include -L${GETTEXT_PREFIX}/lib -Werror -lintl"
       AC_LINK_IFELSE([AC_LANG_SOURCE([[#include <libintl.h>]
                  [int main() {char *cp = dgettext(NULL, "xx"); return 0; }]])],
                  [HAVE_GETTEXT=yes],
                  [HAVE_GETTEXT=no])
       CFLAGS="$_save_cflags"
 
       AC_MSG_RESULT([$HAVE_GETTEXT])
   fi
 
   if test "$HAVE_GETTEXT" != "yes"; then
       GETTEXT_PREFIX=/usr/local
       AC_MSG_CHECKING([gettext in ${GETTEXT_PREFIX}])
 
       _save_cflags="$CFLAGS"
       CFLAGS="$CFLAGS -I${GETTEXT_PREFIX}/include -L${GETTEXT_PREFIX}/lib -Werror -lintl"
       AC_LINK_IFELSE([AC_LANG_SOURCE([[#include <libintl.h>]
                  [int main() {char *cp = dgettext(NULL, "xx"); return 0; }]])],
                  [HAVE_GETTEXT=yes],
                  [HAVE_GETTEXT=no])
       CFLAGS="$_save_cflags"
 
       AC_MSG_RESULT([$HAVE_GETTEXT])
   fi
 fi
 
 if test "$HAVE_GETTEXT" = "yes"; then
     AC_DEFINE([HAVE_GETTEXT], [1], [gettext(3)])
     GETTEXT_CFLAGS="-I${GETTEXT_PREFIX}/include"
     GETTEXT_LIBS="-L${GETTEXT_PREFIX}/lib -lintl"
 else
     GETTEXT_PREFIX=none
     GETTEXT_CFLAGS=
     GETTEXT_LIBS=
 fi
 AC_SUBST(GETTEXT_CFLAGS)
 AC_SUBST(GETTEXT_LIBS)
 
 GETTEXT_LIBDIR=${GETTEXT_PREFIX}/lib
 AC_SUBST(GETTEXT_LIBDIR)
 if test -x ${GETTEXT_PREFIX}/bin/msgfmt ; then
     GETTEXT_BINDIR=${GETTEXT_PREFIX}/bin
 elif test -x ${GETTEXT_PREFIX}/local/bin/msgfmt ; then
     GETTEXT_BINDIR=${GETTEXT_PREFIX}/local/bin
 else
     AC_MSG_NOTICE("could not find msgfmt tool")
     # Use a (bad) fall back value
     GETTEXT_BINDIR=${GETTEXT_PREFIX}/bin
 fi
 AC_SUBST(GETTEXT_BINDIR)
 
 AM_CONDITIONAL([HAVE_GETTEXT], [test "$HAVE_GETTEXT" = "yes"])
 
 dnl Looking for how to do thread-local variables
 AC_ARG_WITH(threads,
         [  --with-threads=[STYLE]           Specify style of thread-local support (none)],
         [THREAD_LOCAL=$withval],
 	[THREAD_LOCAL=unknown],
 )
 
 AC_MSG_CHECKING([thread-locals are ${THREAD_LOCAL}])
 
 if test "$THREAD_LOCAL" = "unknown"; then
      AC_LINK_IFELSE([AC_LANG_SOURCE([[]
            [__thread int foo; int main() { foo++; return foo; }]])],
            [THREAD_LOCAL=before],
            [THREAD_LOCAL=unknown])
 
     AC_MSG_RESULT([$THREAD_LOCAL])
 fi
 
 if test "$THREAD_LOCAL" = "unknown"; then
     AC_LINK_IFELSE([AC_LANG_SOURCE([[]
            [int __thread foo; int main() { foo++; return foo; }]])],
            [THREAD_LOCAL=after],
            [THREAD_LOCAL=unknown])
     AC_MSG_RESULT([$THREAD_LOCAL])
 fi
 
 if test "$THREAD_LOCAL" = "unknown"; then
     AC_LINK_IFELSE([AC_LANG_SOURCE([[]
            [__declspec(int) foo; int main() { foo++; return foo; }]])],
            [THREAD_LOCAL=declspec],
            [THREAD_LOCAL=unknown])
     AC_MSG_RESULT([$THREAD_LOCAL])
 fi
 
 if test "$THREAD_LOCAL" != "unknown"; then
     AC_DEFINE_UNQUOTED([HAVE_THREAD_LOCAL], 
               THREAD_LOCAL_${THREAD_LOCAL}, [thread-local setting])
 fi
 
 dnl Looking for libcrypto....
 AC_CHECK_LIB([crypto], [MD5_Init])
 AM_CONDITIONAL([HAVE_LIBCRYPTO], [test "$HAVE_LIBCRYPTO" != "no"])
 
 AC_CHECK_MEMBER([struct sockaddr_un.sun_len],
     [HAVE_SUN_LEN=yes ;
         AC_DEFINE([HAVE_SUN_LEN], [1], [Have struct sockaddr_un.sun_len])],
     [HAS_SUN_LEN=no], [[#include <sys/un.h>]])
 
 AC_CHECK_DECLS([__isthreaded], [], [], [#include <stdio.h>])
 HAVE_ISTHREADED=${ac_cv_have_decl___isthreaded}
 
 dnl
 dnl Some packages need to be checked against version numbers so we
 dnl define a function here for later use
 dnl
 AC_DEFUN([VERSION_TO_NUMBER],
 [`$1 | sed -e 's/lib.* //' | awk 'BEGIN { FS = "."; } { printf "%d", ([$]1 * 1000 + [$]2) * 1000 + [$]3;}'`])
 
 LIBSLAX_CONFIG_PREFIX=""
 LIBSLAX_SRC=""
 
 AC_ARG_WITH(libslax-prefix,
         [  --with-libslax-prefix=[PFX]           Specify location of libslax config],
         LIBSLAX_CONFIG_PREFIX=$withval
 )
 
 AC_MSG_CHECKING(for libslax) 
 if test "x$LIBSLAX_CONFIG_PREFIX" != "x"
 then
         SLAX_CONFIG=${LIBSLAX_CONFIG_PREFIX}/bin/slax-config
 else
         SLAX_CONFIG=slax-config
 fi
 
 dnl
 dnl make sure slax-config is executable,
 dnl test version and init our variables
 dnl
 
 if ${SLAX_CONFIG} --libs > /dev/null 2>&1
 then
 	LIBSLAX_VERSION=`$SLAX_CONFIG --version`
 	SLAX_BINDIR="`$SLAX_CONFIG --bindir | head -1`"
 	SLAX_OXTRADOCDIR="`$SLAX_CONFIG --oxtradoc | head -1`"
 	AC_MSG_RESULT($LIBSLAX_VERSION found)
 	HAVE_OXTRADOC=yes
 else
 	LIBSLAX_VERSION=
 	SLAX_BINDIR=
 	SLAX_OXTRADOCDIR=
 	AC_MSG_RESULT([no])
 	HAVE_OXTRADOC=no
 fi
 AM_CONDITIONAL([HAVE_OXTRADOC], [test "$HAVE_OXTRADOC" != "no"])
 
 AC_SUBST(SLAX_BINDIR)
 AC_SUBST(SLAX_OXTRADOCDIR)
 
 AC_MSG_CHECKING([whether to build with warnings])
 AC_ARG_ENABLE([warnings],
     [  --enable-warnings    Turn on compiler warnings],
     [LIBXO_WARNINGS=$enableval],
     [LIBXO_WARNINGS=no])
 AC_MSG_RESULT([$LIBXO_WARNINGS])
 AM_CONDITIONAL([LIBXO_WARNINGS_HIGH], [test "$LIBXO_WARNINGS" != "no"])
 
 AC_MSG_CHECKING([whether to build with debugging])
 AC_ARG_ENABLE([debug],
     [  --enable-debug    Turn on debugging],
     [LIBXO_DEBUG=yes; AC_DEFINE([LIBXO_DEBUG], [1], [Enable debugging])],
     [LIBXO_DEBUG=no])
 AC_MSG_RESULT([$LIBXO_DEBUG])
 AM_CONDITIONAL([LIBXO_DEBUG], [test "$LIBXO_DEBUG" != "no"])
 
 AC_MSG_CHECKING([whether to use int return codes])
 AC_ARG_ENABLE([int-return-codes],
     [  --enable-int-return-codes    Use int return codes (instead of ssize_t)],
     [USE_INT_RETURN_CODES=yes; AC_DEFINE([USE_INT_RETURN_CODES], [1], [Use int return codes])],
     [USE_INT_RETURN_CODES=no])
 AC_MSG_RESULT([$USE_INT_RETURN_CODES])
 
 AC_MSG_CHECKING([whether to build with text-only rendering])
 AC_ARG_ENABLE([text-only],
     [  --enable-text-only    Turn on text-only rendering],
     [LIBXO_TEXT_ONLY=yes; AC_DEFINE([LIBXO_TEXT_ONLY], [1], [Enable text-only rendering])],
     [LIBXO_TEXT_ONLY=no])
 AC_MSG_RESULT([$LIBXO_TEXT_ONLY])
 AM_CONDITIONAL([LIBXO_TEXT_ONLY], [test "$LIBXO_TEXT_ONLY" != "no"])
 
 AC_MSG_CHECKING([whether to build with local wcwidth implementation])
 AC_ARG_ENABLE([wcwidth],
     [  --disable-wcwidth        Disable local wcwidth implementation],
     [LIBXO_WCWIDTH=$enableval],
     [LIBXO_WCWIDTH=yes])
 AC_MSG_RESULT([$LIBXO_WCWIDTH])
 if test "${LIBXO_WCWIDTH}" != "no"; then
   AC_DEFINE([LIBXO_WCWIDTH], [1], [Enable local wcwidth implementation])
 fi
 
 AC_MSG_CHECKING([retain hash bucket size])
 AC_ARG_WITH(retain-size,
         [  --with-retain-size=[DIR]           Specify retain hash bucket size (in bits)],
         [XO_RETAIN_SIZE=$withval],
         [XO_RETAIN_SIZE=default]
 )
 
 AC_MSG_RESULT([$XO_RETAIN_SIZE])
 if test "${XO_RETAIN_SIZE}" != "default"; then
   AC_DEFINE_UNQUOTED([XO_RETAIN_SIZE], ${XO_RETAIN_SIZE}, [Retain hash bucket size])
 fi
 
 AC_CHECK_LIB([m], [lrint])
 AM_CONDITIONAL([HAVE_LIBM], [test "$HAVE_LIBM" != "no"])
 
 AC_MSG_CHECKING([compiler for gcc])
 HAVE_GCC=no
 if test "${CC}" != ""; then
     HAVE_GCC=`${CC} --version 2>&1 | grep GCC`
     if test "${HAVE_GCC}" != ""; then
         HAVE_GCC=yes
     else
         HAVE_GCC=no
     fi
 fi
 AC_MSG_RESULT([$HAVE_GCC])
 AM_CONDITIONAL([HAVE_GCC], [test "$HAVE_GCC" = "yes"])
 
 AC_MSG_CHECKING([whether to build with printflike])
 AC_ARG_ENABLE([printflike],
     [  --enable-printflike    Enable use of GCC __printflike attribute],
     [HAVE_PRINTFLIKE=yes;
            AC_DEFINE([HAVE_PRINTFLIKE], [1], [Support printflike])],
     [HAVE_PRINTFLIKE=no])
 AC_MSG_RESULT([$HAVE_PRINTFLIKE])
 AM_CONDITIONAL([HAVE_PRINTFLIKE], [test "$HAVE_PRINTFLIKE" != ""])
 
 AC_MSG_CHECKING([whether to build with LIBXO_OPTIONS])
 AC_ARG_ENABLE([libxo-options],
     [  --disable-libxo-options  Turn off support for LIBXO_OPTIONS],
     [LIBXO_OPTS=$enableval],
     [LIBXO_OPTS=yes])
 AC_MSG_RESULT([$LIBXO_OPTS])
 AM_CONDITIONAL([NO_LIBXO_OPTIONS], [test "$LIBXO_OPTS" != "yes"])
 
 case $host_os in
      darwin*)
         LIBTOOL=glibtool
 	XO_LIBEXT=dylib
 	;;
      Linux*|linux*)
         CFLAGS="-D_GNU_SOURCE $CFLAGS"
 	LDFLAGS=-ldl
 	XO_LIBEXT=so
 	;;
      cygwin*|CYGWIN*)
 	LDFLAGS=-no-undefined
 	XO_LIBEXT=ddl
 	;;
 esac
 
 case $prefix in
      NONE)
 	prefix=/usr/local
 	;;
 esac
 
 XO_LIBS=-lxo
 XO_SRCDIR=${srcdir}
 XO_LIBDIR=${libdir}
 XO_BINDIR=${bindir}
 XO_INCLUDEDIR=${includedir}
 XO_CFLAGS="${CFLAGS}"
 
 AC_SUBST(XO_LIBS)
 AC_SUBST(XO_SRCDIR)
 AC_SUBST(XO_LIBDIR)
 AC_SUBST(XO_BINDIR)
 AC_SUBST(XO_INCLUDEDIR)
 AC_SUBST(XO_LIBEXT)
 AC_SUBST(XO_CFLAGS)
 
 AC_ARG_WITH(encoder-dir,
         [  --with-encoder-dir=[DIR]           Specify location of encoder libraries],
         [XO_ENCODERDIR=$withval],
         [XO_ENCODERDIR=$libdir/libxo/encoder]
 )
 AC_SUBST(XO_ENCODERDIR)
 
 AC_ARG_WITH(share-dir,
         [  --with-share-dir=[DIR]           Specify location of shared files],
         [XO_SHAREDIR=$withval],
         [XO_SHAREDIR=$datarootdir/libxo]
 )
 XO_SHAREDIR=`echo $XO_SHAREDIR | sed "s;\\${prefix};$prefix;"`
 AC_SUBST(XO_SHAREDIR)
 
 dnl for the spec file
 RELDATE=`date +'%Y-%m-%d%n'`
 AC_SUBST(RELDATE)
 
 AC_MSG_RESULT(Using configure dir $ac_abs_confdir)
 
 if test -d $ac_abs_confdir/.git ; then
   extra=`git branch | awk '/\*/ { print $2 }'`
   if test "$extra" != "" -a "$extra" != "master"
   then
       LIBXO_VERSION_EXTRA="-git-$extra"
   fi
 fi
 
 LIBXO_VERSION=$PACKAGE_VERSION
 LIBXO_VERSION_NUMBER=VERSION_TO_NUMBER(echo $PACKAGE_VERSION)
 AC_SUBST(LIBXO_VERSION)
 AC_SUBST(LIBXO_VERSION_NUMBER)
 AC_SUBST(LIBXO_VERSION_EXTRA)
 
 AC_DEFINE_UNQUOTED(LIBXO_VERSION, ["$LIBXO_VERSION"],
     [Version number as dotted value])
 AC_DEFINE_UNQUOTED(LIBXO_VERSION_NUMBER, [$LIBXO_VERSION_NUMBER],
     [Version number as a number])
 AC_DEFINE_UNQUOTED(LIBXO_VERSION_STRING, ["$LIBXO_VERSION_NUMBER"],
     [Version number as string])
 AC_DEFINE_UNQUOTED(LIBXO_VERSION_EXTRA, ["$LIBXO_VERSION_EXTRA"],
     [Version number extra information])
 
 AC_CONFIG_HEADERS([libxo/xo_config.h])
 AC_CONFIG_FILES([
   Makefile
   libxo-config
   xohtml/xohtml.sh
   libxo/Makefile
   libxo/add.man
   encoder/Makefile
   encoder/cbor/Makefile
   encoder/csv/Makefile
   encoder/test/Makefile
   xo/Makefile
   xolint/Makefile
   xohtml/Makefile
   xopo/Makefile
   packaging/libxo.pc
   doc/Makefile
   doc/top-link.html
   tests/Makefile
   tests/core/Makefile
   tests/gettext/Makefile
   tests/xo/Makefile
   packaging/libxo.spec
   packaging/libxo.rb.base
 ])
 AC_OUTPUT
 
 AC_MSG_NOTICE([summary of build options:
 
   libxo version:    ${VERSION} ${LIBXO_VERSION_EXTRA}
   host type:        ${host} / ${host_os}
   install prefix:   ${prefix}
   srcdir:           ${XO_SRCDIR}
   libdir:           ${XO_LIBDIR}
   bindir:           ${XO_BINDIR}
   includedir:       ${XO_INCLUDEDIR}
   share dir:        ${XO_SHAREDIR}
   extensions dir:   ${XO_ENCODERDIR}
   oxtradoc dir:     ${SLAX_OXTRADOCDIR}
 
   compiler:         ${CC} (${HAVE_GCC:-no})
   compiler flags:   ${CFLAGS}
   library types:    Shared=${enable_shared}, Static=${enable_static}
 
   warnings:         ${LIBXO_WARNINGS:-no}
   debug:            ${LIBXO_DEBUG:-no}
   printf-like:      ${HAVE_PRINTFLIKE:-no}
   libxo-options:    ${LIBXO_OPTS:-no}
   text-only:        ${LIBXO_TEXT_ONLY:-no}
   gettext:          ${HAVE_GETTEXT:-no} (${GETTEXT_PREFIX})
   isthreaded:       ${HAVE_ISTHREADED:-no}
   thread-local:     ${THREAD_LOCAL:-no}
   local wcwidth:    ${LIBXO_WCWIDTH:-no}
   retain size:      ${XO_RETAIN_SIZE:-no}
 ])
Index: projects/clang1000-import/contrib/libxo/doc/api.rst
===================================================================
--- projects/clang1000-import/contrib/libxo/doc/api.rst	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/doc/api.rst	(revision 357179)
@@ -1,1620 +1,1679 @@
 .. index:: API
 
 The libxo API
 =============
 
 This section gives details about the functions in libxo, how to call
 them, and the actions they perform.
 
 .. index:: Handles
 .. _handles:
 
 Handles
 -------
 
 libxo uses "handles" to control its rendering functionality.  The
 handle contains state and buffered data, as well as callback functions
 to process data.
 
 Handles give an abstraction for libxo that encapsulates the state of a
 stream of output.  Handles have the data type "`xo_handle_t`" and are
 opaque to the caller.
 
 The library has a default handle that is automatically initialized.
 By default, this handle will send text style output (`XO_STYLE_TEXT`) to
 standard output.  The xo_set_style and xo_set_flags functions can be
 used to change this behavior.
 
 For the typical command that is generating output on standard output,
 there is no need to create an explicit handle, but they are available
 when needed, e.g., for daemons that generate multiple streams of
 output.
 
 Many libxo functions take a handle as their first parameter; most that
 do not use the default handle.  Any function taking a handle can be
 passed NULL to access the default handle.  For the convenience of
 callers, the libxo library includes handle-less functions that
 implicitly use the default handle.
 
 For example, the following are equivalent::
 
     xo_emit("test");
     xo_emit_h(NULL, "test");
 
 Handles are created using `xo_create` and destroy using
 `xo_destroy`. 
 
 .. index:: xo_create
 
 xo_create
 ~~~~~~~~~
 
 .. c:function:: xo_handle_t *xo_create (xo_style_t style, xo_xof_flags_t flags)
 
   The `xo_create` function allocates a new handle which can be passed
   to further libxo function calls.  The `xo_handle_t` structure is
   opaque.
 
   :param xo_style_t style: Output style (XO_STYLE\_*)
   :param xo_xof_flags_t flags: Flags for this handle (XOF\_*)
   :return: New libxo handle
   :rtype: xo_handle_t \*
 
   ::
 
     EXAMPLE:
         xo_handle_t *xop = xo_create(XO_STYLE_JSON, XOF_WARN | XOF_PRETTY);
         ....
         xo_emit_h(xop, "testing\n");
 
   See also :ref:`output-styles` and :ref:`flags`.
 
 .. index:: xo_create_to_file
 .. index:: XOF_CLOSE_FP
 
 xo_create_to_file
 ~~~~~~~~~~~~~~~~~
 
 .. c:function::
   xo_handle_t *xo_create_to_file (FILE *fp, unsigned style, unsigned flags)
 
   The `xo_create_to_file` function is aconvenience function is
   provided for situations when output should be written to a different
   file, rather than the default of standard output.
 
   The `XOF_CLOSE_FP` flag can be set on the returned handle to trigger a
   call to fclose() for the FILE pointer when the handle is destroyed,
   avoiding the need for the caller to perform this task.
 
   :param fp: FILE to use as base for this handle
   :type fp: FILE *
   :param xo_style_t style: Output style (XO_STYLE\_*)
   :param xo_xof_flags_t flags: Flags for this handle (XOF\_*)
   :return: New libxo handle
   :rtype: xo_handle_t \*
 
 .. index:: xo_set_writer
 .. index:: xo_write_func_t
 .. index:: xo_close_func_t
 .. index:: xo_flush_func_t
 
 xo_set_writer
 ~~~~~~~~~~~~~
 
 .. c:function::
   void xo_set_writer (xo_handle_t *xop, void *opaque, \
   xo_write_func_t write_func, xo_close_func_t close_func, \
   xo_flush_func_t flush_func)
 
   The `xo_set_writer` function allows custom functions which can
   tailor how libxo writes data.  The `opaque` argument is recorded and
   passed back to the functions, allowing the function to acquire
   context information. The *write_func* function writes data to the
   output stream.  The *close_func* function can release this opaque
   data and any other resources as needed.  The *flush_func* function
   is called to flush buffered data associated with the opaque object.
 
   :param xop: Handle to modify (or NULL for default handle)
   :type xop: xo_handle_t *
   :param opaque: Pointer to opaque data passed to the given functions
   :type opaque: void *
   :param xo_write_func_t write_func: New write function
   :param xo_close_func_t close_func: New close function
   :param xo_flush_func_t flush_func: New flush function
   :returns: void
 
 .. index:: xo_get_style
 
 xo_get_style
 ~~~~~~~~~~~~
 
 .. c:function:: xo_style_t xo_get_style(xo_handle_t *xop)
 
   Use the `xo_get_style` function to find the current output style for
   a given handle.  To use the default handle, pass a `NULL` handle.
 
   :param xop: Handle to interrogate (or NULL for default handle)
   :type xop: xo_handle_t *
   :returns: Output style (XO_STYLE\_*)
   :rtype: xo_style_t
 
   ::
 
     EXAMPLE::
         style = xo_get_style(NULL);
 
 .. index::  XO_STYLE_TEXT
 .. index::  XO_STYLE_XML
 .. index::  XO_STYLE_JSON
 .. index::  XO_STYLE_HTML
 
 .. _output-styles:
 
 Output Styles (XO_STYLE\_\*)
 ++++++++++++++++++++++++++++
 
 The libxo functions accept a set of output styles:
 
   =============== =========================
    Flag            Description
   =============== =========================
    XO_STYLE_TEXT   Traditional text output
    XO_STYLE_XML    XML encoded data
    XO_STYLE_JSON   JSON encoded data
    XO_STYLE_HTML   HTML encoded data
   =============== =========================
 
 The "XML", "JSON", and "HTML" output styles all use the UTF-8
 character encoding.  "TEXT" using locale-based encoding.
 
 .. index:: xo_set_style
 
 xo_set_style
 ~~~~~~~~~~~~
 
 .. c:function:: void xo_set_style(xo_handle_t *xop, xo_style_t style)
 
   The `xo_set_style` function is used to change the output style
   setting for a handle.  To use the default handle, pass a `NULL`
   handle.
 
   :param xop: Handle to modify
   :type xop: xo_handle_t *
   :param xo_style_t style: Output style (XO_STYLE\_*)
   :returns: void
 
   ::
 
     EXAMPLE:
         xo_set_style(NULL, XO_STYLE_XML);
 
 .. index:: xo_set_style_name
 
 xo_set_style_name
 ~~~~~~~~~~~~~~~~~
 
 .. c:function:: int xo_set_style_name (xo_handle_t *xop, const char *style)
 
   The `xo_set_style_name` function can be used to set the style based
   on a name encoded as a string: The name can be any of the supported
   styles: "text", "xml", "json", or "html".
 
   :param xop: Handle for modify (or NULL for default handle)
   :type xop: xo_handle_t \*
   :param style: Text name of the style
   :type style: const char \*
   :returns: zero for success, non-zero for error
   :rtype: int
 
   ::
 
     EXAMPLE:
         xo_set_style_name(NULL, "html");
 
 .. index:: xo_set_flags
 
 xo_set_flags
 ~~~~~~~~~~~~
 
 .. c:function:: void xo_set_flags(xo_handle_t *xop, xo_xof_flags_t flags)
 
   :param xop: Handle for modify (or NULL for default handle)
   :type xop: xo_handle_t \*
   :param xo_xof_flags_t flags: Flags to add for the handle
   :returns: void
 
   Use the `xo_set_flags` function to turn on flags for a given libxo
   handle.  To use the default handle, pass a `NULL` handle.
 
   ::
 
     EXAMPLE:
         xo_set_flags(NULL, XOF_PRETTY | XOF_WARN);
 
 .. index:: Flags; XOF_*
 .. index:: XOF_CLOSE_FP
 .. index:: XOF_COLOR
 .. index:: XOF_COLOR_ALLOWED
 .. index:: XOF_DTRT
 .. index:: XOF_INFO
 .. index:: XOF_KEYS
 .. index:: XOF_NO_ENV
 .. index:: XOF_NO_HUMANIZE
 .. index:: XOF_PRETTY
 .. index:: XOF_UNDERSCORES
 .. index:: XOF_UNITS
 .. index:: XOF_WARN
 .. index:: XOF_WARN_XML
 .. index:: XOF_XPATH
 .. index:: XOF_COLUMNS
 .. index:: XOF_FLUSH
 
 .. _flags:
 
 Flags (XOF\_\*)
 +++++++++++++++
 
 The set of valid flags include:
 
   =================== =========================================
    Flag                Description
   =================== =========================================
    XOF_CLOSE_FP        Close file pointer on `xo_destroy`
    XOF_COLOR           Enable color and effects in output
    XOF_COLOR_ALLOWED   Allow color/effect for terminal output
    XOF_DTRT            Enable "do the right thing" mode
    XOF_INFO            Display info data attributes (HTML)
    XOF_KEYS            Emit the key attribute (XML)
    XOF_NO_ENV          Do not use the :ref:`libxo-options` env var
    XOF_NO_HUMANIZE     Display humanization (TEXT, HTML)
    XOF_PRETTY          Make "pretty printed" output
    XOF_UNDERSCORES     Replaces hyphens with underscores
    XOF_UNITS           Display units (XML, HMTL)
    XOF_WARN            Generate warnings for broken calls
    XOF_WARN_XML        Generate warnings in XML on stdout
    XOF_XPATH           Emit XPath expressions (HTML)
    XOF_COLUMNS         Force xo_emit to return columns used
    XOF_FLUSH           Flush output after each `xo_emit` call
   =================== =========================================
 
 The `XOF_CLOSE_FP` flag will trigger the call of the *close_func*
 (provided via `xo_set_writer`) when the handle is destroyed.
 
 The `XOF_COLOR` flag enables color and effects in output regardless
 of output device, while the `XOF_COLOR_ALLOWED` flag allows color
 and effects only if the output device is a terminal.
 
 The `XOF_PRETTY` flag requests "pretty printing", which will trigger
 the addition of indentation and newlines to enhance the readability of
 XML, JSON, and HTML output.  Text output is not affected.
 
 The `XOF_WARN` flag requests that warnings will trigger diagnostic
 output (on standard error) when the library notices errors during
 operations, or with arguments to functions.  Without warnings enabled,
 such conditions are ignored.
 
 Warnings allow developers to debug their interaction with libxo.
 The function `xo_failure` can used as a breakpoint for a debugger,
 regardless of whether warnings are enabled.
 
 If the style is `XO_STYLE_HTML`, the following additional flags can be
 used:
 
   =============== =========================================
    Flag            Description
   =============== =========================================
    XOF_XPATH       Emit "data-xpath" attributes
    XOF_INFO        Emit additional info fields
   =============== =========================================
 
 The `XOF_XPATH` flag enables the emission of XPath expressions detailing
 the hierarchy of XML elements used to encode the data field, if the
 XPATH style of output were requested.
 
 The `XOF_INFO` flag encodes additional informational fields for HTML
 output.  See :ref:`field-information` for details.
 
 If the style is `XO_STYLE_XML`, the following additional flags can be
 used:
 
   =============== =========================================
    Flag            Description
   =============== =========================================
    XOF_KEYS        Flag "key" fields for XML
   =============== =========================================
 
 The `XOF_KEYS` flag adds "key" attribute to the XML encoding for
 field definitions that use the "k" modifier.  The key attribute has
 the value "key"::
 
     xo_emit("{k:name}", item);
 
   XML:
       <name key="key">truck</name>
 
 .. index:: xo_clear_flags
 
 xo_clear_flags
 ++++++++++++++
 
 .. c:function:: void xo_clear_flags (xo_handle_t *xop, xo_xof_flags_t flags)
 
   :param xop: Handle for modify (or NULL for default handle)
   :type xop: xo_handle_t \*
   :param xo_xof_flags_t flags: Flags to clear for the handle
   :returns: void
 
   Use the `xo_clear_flags` function to turn off the given flags in a
   specific handle.  To use the default handle, pass a `NULL` handle.
 
 .. index:: xo_set_options
 
 xo_set_options
 ++++++++++++++
 
 .. c:function:: int xo_set_options (xo_handle_t *xop, const char *input)
 
   :param xop: Handle for modify (or NULL for default handle)
   :type xop: xo_handle_t \*
   :param input: string containing options to set
   :type input: const char *
   :returns: zero for success, non-zero for error
   :rtype: int
 
   The `xo_set_options` function accepts a comma-separated list of
   output styles and modifier flags and enables them for a specific
   handle.  The options are identical to those listed in
   :ref:`options`.  To use the default handle, pass a `NULL` handle.
 
 .. index:: xo_destroy
 
 xo_destroy
 ++++++++++
 
 .. c:function:: void xo_destroy(xo_handle_t *xop)
 
   :param xop: Handle for modify (or NULL for default handle)
   :type xop: xo_handle_t \*
   :returns: void  
 
   The `xo_destroy` function releases a handle and any resources it is
   using.  Calling `xo_destroy` with a `NULL` handle will release any
   resources associated with the default handle.
 
 .. index:: xo_emit
 
 Emitting Content (xo_emit)
 --------------------------
 
 The functions in this section are used to emit output.
 
 The "fmt" argument is a string containing field descriptors as
 specified in :ref:`format-strings`.  The use of a handle is optional and
 `NULL` can be passed to access the internal "default" handle.  See
 :ref:`handles`.
 
 The remaining arguments to `xo_emit` and `xo_emit_h` are a set of
 arguments corresponding to the fields in the format string.  Care must
 be taken to ensure the argument types match the fields in the format
 string, since an inappropriate cast can ruin your day.  The vap
 argument to `xo_emit_hv` points to a variable argument list that can
 be used to retrieve arguments via `va_arg`.
 
 .. c:function:: xo_ssize_t xo_emit (const char *fmt, ...)
 
   :param fmt: The format string, followed by zero or more arguments
   :returns: If XOF_COLUMNS is set, the number of columns used; otherwise the number of bytes emitted
   :rtype: xo_ssize_t
 
 .. c:function:: xo_ssize_t xo_emit_h (xo_handle_t *xop, const char *fmt, ...)
 
   :param xop: Handle for modify (or NULL for default handle)
   :type xop: xo_handle_t \*
   :param fmt: The format string, followed by zero or more arguments
   :returns: If XOF_COLUMNS is set, the number of columns used; otherwise the number of bytes emitted
   :rtype: xo_ssize_t
 
 .. c:function:: xo_ssize_t xo_emit_hv (xo_handle_t *xop, const char *fmt, va_list vap)
 
   :param xop: Handle for modify (or NULL for default handle)
   :type xop: xo_handle_t \*
   :param fmt: The format string
   :param va_list vap: A set of variadic arguments
   :returns: If XOF_COLUMNS is set, the number of columns used; otherwise the number of bytes emitted
   :rtype: xo_ssize_t
 
 .. index:: xo_emit_field
 
 Single Field Emitting Functions (xo_emit_field)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The functions in this section can also make output, but only make a
 single field at a time.  These functions are intended to avoid the
 scenario where one would otherwise need to compose a format
 descriptors using `snprintf`.  The individual parts of the format
 descriptor are passed in distinctly.
 
 .. c:function:: xo_ssize_t xo_emit_field (const char *rolmod, const char *contents, const char *fmt, const char *efmt, ...)
 
   :param rolmod: A comma-separated list of field roles and field modifiers
   :type rolmod: const char *
   :param contents: The "contents" portion of the field description string
   :type contents: const char *
   :param fmt: Content format string
   :type fmt: const char *
   :param efmt: Encoding format string, followed by additional arguments
   :type efmt: const char *
   :returns: If XOF_COLUMNS is set, the number of columns used; otherwise the number of bytes emitted
   :rtype: xo_ssize_t
 
   ::
 
     EXAMPLE::
         xo_emit_field("T", "Host name is ", NULL, NULL);
         xo_emit_field("V", "host-name", NULL, NULL, host-name);
 
 .. c:function:: xo_ssize_t xo_emit_field_h (xo_handle_t *xop, const char *rolmod, const char *contents, const char *fmt, const char *efmt, ...)
 
   :param xop: Handle for modify (or NULL for default handle)
   :type xop: xo_handle_t \*
   :param rolmod: A comma-separated list of field roles and field modifiers
   :type rolmod: const char *
   :param contents: The "contents" portion of the field description string
   :type contents: const char *
   :param fmt: Content format string
   :type fmt: const char *
   :param efmt: Encoding format string, followed by additional arguments
   :type efmt: const char *
   :returns: If XOF_COLUMNS is set, the number of columns used; otherwise the number of bytes emitted
   :rtype: xo_ssize_t
 
 .. c:function:: xo_ssize_t xo_emit_field_hv (xo_handle_t *xop, const char *rolmod, const char *contents, const char *fmt, const char *efmt, va_list vap)
 
   :param xop: Handle for modify (or NULL for default handle)
   :type xop: xo_handle_t \*
   :param rolmod: A comma-separated list of field roles and field modifiers
   :type rolmod: const char *
   :param contents: The "contents" portion of the field description string
   :type contents: const char *
   :param fmt: Content format string
   :type fmt: const char *
   :param efmt: Encoding format string
   :type efmt: const char *
   :param va_list vap: A set of variadic arguments
   :returns: If XOF_COLUMNS is set, the number of columns used; otherwise the number of bytes emitted
   :rtype: xo_ssize_t
 
 .. index:: xo_attr
 .. _xo_attr:
 
 Attributes (xo_attr)
 ~~~~~~~~~~~~~~~~~~~~
 
 The functions in this section emit an XML attribute with the given name
 and value.  This only affects the XML output style.
 
 The `name` parameter give the name of the attribute to be encoded.  The
 `fmt` parameter gives a printf-style format string used to format the
 value of the attribute using any remaining arguments, or the vap
 parameter passed to `xo_attr_hv`.
 
 All attributes recorded via `xo_attr` are placed on the next
 container, instance, leaf, or leaf list that is emitted.
 
 Since attributes are only emitted in XML, their use should be limited
 to meta-data and additional or redundant representations of data
 already emitted in other form.
 
 .. c:function:: xo_ssize_t xo_attr (const char *name, const char *fmt, ...)
 
   :param name: Attribute name
   :type name: const char *
   :param fmt: Attribute value, as variadic arguments
   :type fmt: const char *
   :returns: -1 for error, or the number of bytes in the formatted attribute value
   :rtype: xo_ssize_t
 
   ::
 
     EXAMPLE:
         xo_attr("seconds", "%ld", (unsigned long) login_time);
         struct tm *tmp = localtime(login_time);
         strftime(buf, sizeof(buf), "%R", tmp);
         xo_emit("Logged in at {:login-time}\n", buf);
     XML:
         <login-time seconds="1408336270">00:14</login-time>
 
 
 .. c:function:: xo_ssize_t xo_attr_h (xo_handle_t *xop, const char *name, const char *fmt, ...)
 
   :param xop: Handle for modify (or NULL for default handle)
   :type xop: xo_handle_t \*
 
   The `xo_attr_h` function follows the conventions of `xo_attr` but
   adds an explicit libxo handle.
 
 .. c:function:: xo_ssize_t xo_attr_hv (xo_handle_t *xop, const char *name, const char *fmt, va_list vap)
 
   The `xo_attr_h` function follows the conventions of `xo_attr_h`
   but replaced the variadic list with a variadic pointer.
 
 .. index:: xo_flush
 
 Flushing Output (xo_flush)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. c:function:: xo_ssize_t xo_flush (void)
 
   :returns: -1 for error, or the number of bytes generated
   :rtype: xo_ssize_t
 
   libxo buffers data, both for performance and consistency, but also
   to allow for the proper function of various advanced features.  At
   various times, the caller may wish to flush any data buffered within
   the library.  The `xo_flush` call is used for this.
 
   Calling `xo_flush` also triggers the flush function associated with
   the handle.  For the default handle, this is equivalent to
   "fflush(stdio);".
 
 .. c:function:: xo_ssize_t xo_flush_h (xo_handle_t *xop)
 
   :param xop: Handle for flush (or NULL for default handle)
   :type xop: xo_handle_t \*
   :returns: -1 for error, or the number of bytes generated
   :rtype: xo_ssize_t
 
   The `xo_flush_h` function follows the conventions of `xo_flush`,
   but adds an explicit libxo handle.
 
 .. index:: xo_finish
 .. index:: xo_finish_atexit
 .. index:: atexit
 
 Finishing Output (xo_finish)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 When the program is ready to exit or close a handle, a call to
 `xo_finish` or `xo_finish_h` is required.  This flushes any buffered
 data, closes open libxo constructs, and completes any pending
 operations.
 
 Calling this function is vital to the proper operation of libxo,
 especially for the non-TEXT output styles.
 
 .. c:function:: xo_ssize_t xo_finish (void)
 
   :returns: -1 on error, or the number of bytes flushed
   :rtype: xo_ssize_t
 
 .. c:function:: xo_ssize_t xo_finish_h (xo_handle_t *xop)
 
   :param xop: Handle for finish (or NULL for default handle)
   :type xop: xo_handle_t \*
   :returns: -1 on error, or the number of bytes flushed
   :rtype: xo_ssize_t
 
 .. c:function:: void xo_finish_atexit (void)
 
   The `xo_finish_atexit` function is suitable for use with
   :manpage:`atexit(3)` to ensure that `xo_finish` is called
   on the default handle when the application exits.
 
 .. index:: UTF-8
 .. index:: xo_open_container
 .. index:: xo_close_container
 
 Emitting Hierarchy
 ------------------
 
 libxo represents two types of hierarchy: containers and lists.  A
 container appears once under a given parent where a list consists of
 instances that can appear multiple times.  A container is used to hold
 related fields and to give the data organization and scope.
 
 .. index:: YANG
 
 .. admonition:: YANG Terminology
 
   libxo uses terminology from YANG (:RFC:`7950`), the data modeling
   language for NETCONF: container, list, leaf, and leaf-list.
 
 For XML and JSON, individual fields appear inside hierarchies which
 provide context and meaning to the fields.  Unfortunately, these
 encoding have a basic disconnect between how lists is similar objects
 are represented.
 
 XML encodes lists as set of sequential elements::
 
     <user>phil</user>
     <user>pallavi</user>
     <user>sjg</user>
 
 JSON encodes lists using a single name and square brackets::
 
     "user": [ "phil", "pallavi", "sjg" ]
 
 This means libxo needs three distinct indications of hierarchy: one
 for containers of hierarchy appear only once for any specific parent,
 one for lists, and one for each item in a list.
 
 .. index:: Containers
 
 Containers
 ~~~~~~~~~~
 
 A "*container*" is an element of a hierarchy that appears only once
 under any specific parent.  The container has no value, but serves to
 contain and organize other nodes.
 
 To open a container, call xo_open_container() or
 xo_open_container_h().  The former uses the default handle and the
 latter accepts a specific handle.  To close a level, use the
 xo_close_container() or xo_close_container_h() functions.
 
 Each open call must have a matching close call.  If the XOF_WARN flag
 is set and the name given does not match the name of the currently open
 container, a warning will be generated.
 
 .. c:function:: xo_ssize_t xo_open_container (const char *name)
 
   :param name: Name of the container
   :type name: const char *
   :returns: -1 on error, or the number of bytes generated
   :rtype: xo_ssize_t
 
   The `name` parameter gives the name of the container, encoded in
   UTF-8.  Since ASCII is a proper subset of UTF-8, traditional C
   strings can be used directly.
 
 .. c:function:: xo_ssize_t xo_open_container_h (xo_handle_t *xop, const char *name)
 
   :param xop: Handle to use (or NULL for default handle)
   :type xop: xo_handle_t *
 
   The `xo_open_container_h` function adds a `handle` parameter.
 
 .. c:function:: xo_ssize_t xo_close_container (const char *name)
 
   :param name: Name of the container
   :type name: const char *
   :returns: -1 on error, or the number of bytes generated
   :rtype: xo_ssize_t
 
 .. c:function:: xo_ssize_t xo_close_container_h (xo_handle_t *xop, const char *name)
 
   :param xop: Handle to use (or NULL for default handle)
   :type xop: xo_handle_t *
 
   The `xo_close_container_h` function adds a `handle` parameter.
 
 Use the :index:`XOF_WARN` flag to generate a warning if the name given
 on the close does not match the current open container.
 
 For TEXT and HTML output, containers are not rendered into output
 text, though for HTML they are used to record an XPath value when the
 :index:`XOF_XPATH` flag is set.
 
 ::
 
     EXAMPLE:
         xo_open_container("top");
         xo_open_container("system");
         xo_emit("{:host-name/%s%s%s}", hostname,
                 domainname ? "." : "", domainname ?: "");
         xo_close_container("system");
         xo_close_container("top");
     TEXT:
         my-host.example.org
     XML:
         <top>
           <system>
               <host-name>my-host.example.org</host-name>
           </system>
         </top>
     JSON:
         "top" : {
           "system" : {
               "host-name": "my-host.example.org"
           }
         }
     HTML:
         <div class="data"
              data-tag="host-name">my-host.example.org</div>
 
 .. index:: xo_open_instance
 .. index:: xo_close_instance
 .. index:: xo_open_list
 .. index:: xo_close_list
 
 Lists and Instances
 ~~~~~~~~~~~~~~~~~~~
 
 A "*list*" is set of one or more instances that appear under the same
 parent.  The instances contain details about a specific object.  One
 can think of instances as objects or records.  A call is needed to
 open and close the list, while a distinct call is needed to open and
 close each instance of the list.
 
 The name given to all calls must be identical, and it is strongly
 suggested that the name be singular, not plural, as a matter of
 style and usage expectations::
 
   EXAMPLE:
       xo_open_list("item");
 
       for (ip = list; ip->i_title; ip++) {
           xo_open_instance("item");
           xo_emit("{L:Item} '{:name/%s}':\n", ip->i_title);
           xo_close_instance("item");
       }
 
       xo_close_list("item");
 
 Getting the list and instance calls correct is critical to the proper
 generation of XML and JSON data.
 
 Opening Lists
 +++++++++++++
 
 .. c:function:: xo_ssize_t xo_open_list (const char *name)
 
   :param name: Name of the list
   :type name: const char *
   :returns: -1 on error, or the number of bytes generated
   :rtype: xo_ssize_t
 		
   The `xo_open_list` function open a list of instances.
 
 .. c:function:: xo_ssize_t xo_open_list_h (xo_handle_t *xop, const char *name)
 
   :param xop: Handle to use (or NULL for default handle)
   :type xop: xo_handle_t *
 
 Closing Lists
 +++++++++++++
 
 .. c:function:: xo_ssize_t xo_close_list (const char *name)
 
   :param name: Name of the list
   :type name: const char *
   :returns: -1 on error, or the number of bytes generated
   :rtype: xo_ssize_t
 		
   The `xo_close_list` function closes a list of instances.
 
 .. c:function:: xo_ssize_t xo_close_list_h (xo_handle_t *xop, const char *name)
 
   :param xop: Handle to use (or NULL for default handle)
   :type xop: xo_handle_t *
 
    The `xo_close_container_h` function adds a `handle` parameter.
 
 Opening Instances
 +++++++++++++++++
 
 .. c:function:: xo_ssize_t xo_open_instance (const char *name)
 
   :param name: Name of the instance (same as the list name)
   :type name: const char *
   :returns: -1 on error, or the number of bytes generated
   :rtype: xo_ssize_t
 		
   The `xo_open_instance` function open a single instance.
 
 .. c:function:: xo_ssize_t xo_open_instance_h (xo_handle_t *xop, const char *name)
 
   :param xop: Handle to use (or NULL for default handle)
   :type xop: xo_handle_t *
 
    The `xo_open_instance_h` function adds a `handle` parameter.
 
 Closing Instances
 +++++++++++++++++
 
 .. c:function:: xo_ssize_t xo_close_instance (const char *name)
 
   :param name: Name of the instance
   :type name: const char *
   :returns: -1 on error, or the number of bytes generated
   :rtype: xo_ssize_t
 
   The `xo_close_instance` function closes an open instance.
 
 .. c:function:: xo_ssize_t xo_close_instance_h (xo_handle_t *xop, const char *name)
 
   :param xop: Handle to use (or NULL for default handle)
   :type xop: xo_handle_t *
 
   The `xo_close_instance_h` function adds a `handle` parameter.
 
   ::
 
     EXAMPLE:
         xo_open_list("user");
         for (i = 0; i < num_users; i++) {
             xo_open_instance("user");
             xo_emit("{k:name}:{:uid/%u}:{:gid/%u}:{:home}\n",
                     pw[i].pw_name, pw[i].pw_uid,
                     pw[i].pw_gid, pw[i].pw_dir);
             xo_close_instance("user");
         }
         xo_close_list("user");
     TEXT:
         phil:1001:1001:/home/phil
         pallavi:1002:1002:/home/pallavi
     XML:
         <user>
             <name>phil</name>
             <uid>1001</uid>
             <gid>1001</gid>
             <home>/home/phil</home>
         </user>
         <user>
             <name>pallavi</name>
             <uid>1002</uid>
             <gid>1002</gid>
             <home>/home/pallavi</home>
         </user>
     JSON:
         user: [
             {
                 "name": "phil",
                 "uid": 1001,
                 "gid": 1001,
                 "home": "/home/phil",
             },
             {
                 "name": "pallavi",
                 "uid": 1002,
                 "gid": 1002,
                 "home": "/home/pallavi",
             }
         ]
 
 Markers
 ~~~~~~~
 
 Markers are used to protect and restore the state of open hierarchy
 constructs (containers, lists, or instances).  While a marker is open,
 no other open constructs can be closed.  When a marker is closed, all
 constructs open since the marker was opened will be closed.
 
 Markers use names which are not user-visible, allowing the caller to
 choose appropriate internal names.
 
 In this example, the code whiffles through a list of fish, calling a
 function to emit details about each fish.  The marker "fish-guts" is
 used to ensure that any constructs opened by the function are closed
 properly::
 
   EXAMPLE:
       for (i = 0; fish[i]; i++) {
           xo_open_instance("fish");
           xo_open_marker("fish-guts");
           dump_fish_details(i);
           xo_close_marker("fish-guts");
       }
 
 .. c:function:: xo_ssize_t xo_open_marker(const char *name)
 
   :param name: Name of the instance
   :type name: const char *
   :returns: -1 on error, or the number of bytes generated
   :rtype: xo_ssize_t
 
   The `xo_open_marker` function records the current state of open tags
   in order for `xo_close_marker` to close them at some later point.
 
 .. c:function:: xo_ssize_t xo_open_marker_h(const char *name)
 
   :param xop: Handle to use (or NULL for default handle)
   :type xop: xo_handle_t *
 
   The `xo_open_marker_h` function adds a `handle` parameter.
 
 .. c:function:: xo_ssize_t xo_close_marker(const char *name)
 
   :param name: Name of the instance
   :type name: const char *
   :returns: -1 on error, or the number of bytes generated
   :rtype: xo_ssize_t
 
   The `xo_close_marker` function closes any open containers, lists, or
   instances as needed to return to the state recorded when
   `xo_open_marker` was called with the matching name.
 
 .. c:function:: xo_ssize_t xo_close_marker(const char *name)
 
   :param xop: Handle to use (or NULL for default handle)
   :type xop: xo_handle_t *
 
   The `xo_close_marker_h` function adds a `handle` parameter.
 
 DTRT Mode
 ~~~~~~~~~
 
 Some users may find tracking the names of open containers, lists, and
 instances inconvenient.  libxo offers a "Do The Right Thing" mode, where
 libxo will track the names of open containers, lists, and instances so
 the close function can be called without a name.  To enable DTRT mode,
 turn on the XOF_DTRT flag prior to making any other libxo output::
 
     xo_set_flags(NULL, XOF_DTRT);
 
 .. index:: XOF_DTRT
 
 Each open and close function has a version with the suffix "_d", which
 will close the open container, list, or instance::
 
     xo_open_container_d("top");
     ...
     xo_close_container_d();
 
 This also works for lists and instances::
 
     xo_open_list_d("item");
     for (...) {
         xo_open_instance_d("item");
         xo_emit(...);
         xo_close_instance_d();
     }
     xo_close_list_d();
 
 .. index:: XOF_WARN
 
 Note that the XOF_WARN flag will also cause libxo to track open
 containers, lists, and instances.  A warning is generated when the
 name given to the close function and the name recorded do not match.
 
 Support Functions
 -----------------
 
 .. index:: xo_parse_args
 .. _xo_parse_args:
 
 Parsing Command-line Arguments (xo_parse_args)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. c:function:: int xo_parse_args (int argc, char **argv)
 
   :param int argc: Number of arguments
   :param argv: Array of argument strings
   :return: -1 on error, or the number of remaining arguments
   :rtype: int
 
   The `xo_parse_args` function is used to process a program's
   arguments.  libxo-specific options are processed and removed from
   the argument list so the calling application does not need to
   process them.  If successful, a new value for argc is returned.  On
   failure, a message is emitted and -1 is returned::
 
     argc = xo_parse_args(argc, argv);
     if (argc < 0)
         exit(EXIT_FAILURE);
 
   Following the call to xo_parse_args, the application can process the
   remaining arguments in a normal manner.  See :ref:`options` for a
   description of valid arguments.
 
 .. index:: xo_set_program
 
 xo_set_program
 ~~~~~~~~~~~~~~
 
 .. c:function:: void xo_set_program (const char *name)
 
   :param name: Name to use as the program name
   :type name: const char *
   :returns: void
 
   The `xo_set_program` function sets the name of the program as
   reported by functions like `xo_failure`, `xo_warn`, `xo_err`, etc.
   The program name is initialized by `xo_parse_args`, but subsequent
   calls to `xo_set_program` can override this value::
 
     EXAMPLE:
         xo_set_program(argv[0]);
 
   Note that the value is not copied, so the memory passed to
   `xo_set_program` (and `xo_parse_args`) must be maintained by the
   caller.
 
 .. index:: xo_set_version
 
 xo_set_version
 ~~~~~~~~~~~~~~
 
 .. c:function:: void xo_set_version (const char *version)
 
   :param name: Value to use as the version string
   :type name: const char *
   :returns: void
 
   The `xo_set_version` function records a version number to be emitted
   as part of the data for encoding styles (XML and JSON).  This
   version number is suitable for tracking changes in the content,
   allowing a user of the data to discern which version of the data
   model is in use.
 
 .. c:function:: void xo_set_version_h (xo_handle_t *xop, const char *version)
 
   :param xop: Handle to use (or NULL for default handle)
   :type xop: xo_handle_t *
 
   The `xo_set_version` function adds a `handle` parameter.
 
 .. index:: --libxo
 .. index:: XOF_INFO
 .. index:: xo_info_t
 
 .. _field-information:
 
 Field Information (xo_info_t)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 HTML data can include additional information in attributes that
 begin with "data-".  To enable this, three things must occur:
 
 First the application must build an array of xo_info_t structures,
 one per tag.  The array must be sorted by name, since libxo uses a
 binary search to find the entry that matches names from format
 instructions.
 
 Second, the application must inform libxo about this information using
 the `xo_set_info` call::
 
     typedef struct xo_info_s {
         const char *xi_name;    /* Name of the element */
         const char *xi_type;    /* Type of field */
         const char *xi_help;    /* Description of field */
     } xo_info_t;
 
     void xo_set_info (xo_handle_t *xop, xo_info_t *infop, int count);
 
 Like other libxo calls, passing `NULL` for the handle tells libxo to
 use the default handle.
 
 If the count is -1, libxo will count the elements of infop, but there
 must be an empty element at the end.  More typically, the number is
 known to the application::
 
     xo_info_t info[] = {
         { "in-stock", "number", "Number of items in stock" },
         { "name", "string", "Name of the item" },
         { "on-order", "number", "Number of items on order" },
         { "sku", "string", "Stock Keeping Unit" },
         { "sold", "number", "Number of items sold" },
     };
     int info_count = (sizeof(info) / sizeof(info[0]));
     ...
     xo_set_info(NULL, info, info_count);
 
 Third, the emission of info must be triggered with the `XOF_INFO` flag
 using either the `xo_set_flags` function or the "`--libxo=info`"
 command line argument.
 
 The type and help values, if present, are emitted as the "data-type"
 and "data-help" attributes::
 
   <div class="data" data-tag="sku" data-type="string"
        data-help="Stock Keeping Unit">GRO-000-533</div>
 
 .. c:function:: void xo_set_info (xo_handle_t *xop, xo_info_t *infop, int count)
 
   :param xop: Handle to use (or NULL for default handle)
   :type xop: xo_handle_t *
   :param infop: Array of information structures
   :type infop: xo_info_t *
   :returns: void
 
 .. index:: xo_set_allocator
 .. index:: xo_realloc_func_t
 .. index:: xo_free_func_t
 
 Memory Allocation
 ~~~~~~~~~~~~~~~~~
 
 The `xo_set_allocator` function allows libxo to be used in
 environments where the standard :manpage:`realloc(3)` and
 :manpage:`free(3)` functions are not appropriate.
 
 .. c:function:: void xo_set_allocator (xo_realloc_func_t realloc_func, xo_free_func_t free_func)
 
   :param xo_realloc_func_t realloc_func:  Allocation function
   :param xo_free_func_t free_func: Free function
 
   *realloc_func* should expect the same arguments as
   :manpage:`realloc(3)` and return a pointer to memory following the
   same convention.  *free_func* will receive the same argument as
   :manpage:`free(3)` and should release it, as appropriate for the
   environment.
 
 By default, the standard :manpage:`realloc(3)` and :manpage:`free(3)`
 functions are used.
 
 .. index:: --libxo
 
 .. _libxo-options:
 
 LIBXO_OPTIONS
 ~~~~~~~~~~~~~
 
 The environment variable "LIBXO_OPTIONS" can be set to a subset of
 libxo options, including:
 
 - color
 - flush
 - flush-line
 - no-color
 - no-humanize
 - no-locale
 - no-retain
 - pretty
 - retain
 - underscores
 - warn
 
 For example, warnings can be enabled by::
 
     % env LIBXO_OPTIONS=warn my-app
 
 Since environment variables are inherited, child processes will have
 the same options, which may be undesirable, making the use of the
 "`--libxo`" command-line option preferable in most situations.
 
 .. index:: xo_warn
 .. index:: xo_err
 .. index:: xo_errx
 .. index:: xo_message
 
 Errors, Warnings, and Messages
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Many programs make use of the standard library functions
 :manpage:`err(3)` and :manpage:`warn(3)` to generate errors and
 warnings for the user.  libxo wants to pass that information via the
 current output style, and provides compatible functions to allow
 this::
 
     void xo_warn (const char *fmt, ...);
     void xo_warnx (const char *fmt, ...);
     void xo_warn_c (int code, const char *fmt, ...);
     void xo_warn_hc (xo_handle_t *xop, int code,
                      const char *fmt, ...);
     void xo_err (int eval, const char *fmt, ...);
     void xo_errc (int eval, int code, const char *fmt, ...);
     void xo_errx (int eval, const char *fmt, ...);
 
 ::
 
     void xo_message (const char *fmt, ...);
     void xo_message_c (int code, const char *fmt, ...);
     void xo_message_hc (xo_handle_t *xop, int code,
                         const char *fmt, ...);
     void xo_message_hcv (xo_handle_t *xop, int code,
                          const char *fmt, va_list vap);
 
 These functions display the program name, a colon, a formatted message
 based on the arguments, and then optionally a colon and an error
 message associated with either *errno* or the *code* parameter::
 
     EXAMPLE:
         if (open(filename, O_RDONLY) < 0)
             xo_err(1, "cannot open file '%s'", filename);
 
 .. index:: xo_error
+.. index:: xo_error_h
+.. index:: xo_error_hv
+.. index:: xo_errorn
+.. index:: xo_errorn_h
+.. index:: xo_errorn_hv
 
 xo_error
 ~~~~~~~~
 
 .. c:function:: void xo_error (const char *fmt, ...)
 
   :param fmt: Format string
   :type fmt: const char *
   :returns: void
 
+.. c:function:: void xo_error_h (xo_handle_t *xop, const char *fmt, ...)
+
+  :param xop: libxo handle pointer
+  :type xop: xo_handle_t *
+  :param fmt: Format string
+  :type fmt: const char *
+  :returns: void
+
+.. c:function:: void xo_error_hv (xo_handle_t *xop, const char *fmt, va_list vap)
+
+  :param xop: libxo handle pointer
+  :type xop: xo_handle_t *
+  :param fmt: Format string
+  :type fmt: const char *
+  :param vap: variadic arguments
+  :type xop: va_list
+  :returns: void
+
+.. c:function:: void xo_errorn (const char *fmt, ...)
+
+  :param fmt: Format string
+  :type fmt: const char *
+  :returns: void
+
+.. c:function:: void xo_errorn_h (xo_handle_t *xop, const char *fmt, ...)
+
+  :param xop: libxo handle pointer
+  :type xop: xo_handle_t *
+  :param fmt: Format string
+  :type fmt: const char *
+  :returns: void
+
+.. c:function:: void xo_errorn_hv (xo_handle_t *xop, int need_newline, const char *fmt, va_list vap)
+
+  :param xop: libxo handle pointer
+  :type xop: xo_handle_t *
+  :param need_newline: boolean indicating need for trailing newline
+  :type need_newline: int
+  :param fmt: Format string
+  :type fmt: const char *
+  :param vap: variadic arguments
+  :type xop: va_list
+  :returns: void
+
   The `xo_error` function can be used for generic errors that should
   be reported over the handle, rather than to stderr.  The `xo_error`
   function behaves like `xo_err` for TEXT and HTML output styles, but
   puts the error into XML or JSON elements::
 
     EXAMPLE::
         xo_error("Does not %s", "compute");
     XML::
         <error><message>Does not compute</message></error>
     JSON::
         "error": { "message": "Does not compute" }
+
+  The `xo_error_h` and `xo_error_hv` add a handle object and a
+  variadic-ized parameter to the signature, respectively.
+
+  The `xo_errorn` function supplies a newline at the end the error
+  message if the format string does not include one.  The
+  `xo_errorn_h` and `xo_errorn_hv` functions add a handle object and
+  a variadic-ized parameter to the signature, respectively.  The
+  `xo_errorn_hv` function also adds a boolean to indicate the need for
+  a trailing newline.
 
 .. index:: xo_no_setlocale
 .. index:: Locale
 
 xo_no_setlocale
 ~~~~~~~~~~~~~~~
 
 .. c:function:: void xo_no_setlocale (void)
 
   libxo automatically initializes the locale based on setting of the
   environment variables LC_CTYPE, LANG, and LC_ALL.  The first of this
   list of variables is used and if none of the variables, the locale
   defaults to "UTF-8".  The caller may wish to avoid this behavior,
   and can do so by calling the `xo_no_setlocale` function.
 
 Emitting syslog Messages
 ------------------------
 
 syslog is the system logging facility used throughout the unix world.
 Messages are sent from commands, applications, and daemons to a
 hierarchy of servers, where they are filtered, saved, and forwarded
 based on configuration behaviors.
 
 syslog is an older protocol, originally documented only in source
 code.  By the time :RFC:`3164` published, variation and mutation left the
 leading "<pri>" string as only common content.  :RFC:`5424` defines a new
 version (version 1) of syslog and introduces structured data into the
 messages.  Structured data is a set of name/value pairs transmitted
 distinctly alongside the traditional text message, allowing filtering
 on precise values instead of regular expressions.
 
 These name/value pairs are scoped by a two-part identifier; an
 enterprise identifier names the party responsible for the message
 catalog and a name identifying that message.  `Enterprise IDs`_ are
 defined by IANA, the Internet Assigned Numbers Authority.
 
 .. _Enterprise IDs:
     https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
 
 Use the `xo_set_syslog_enterprise_id` function to set the Enterprise
 ID, as needed.
 
 The message name should follow the conventions in
 :ref:`good-field-names`\ , as should the fields within the message::
 
     /* Both of these calls are optional */
     xo_set_syslog_enterprise_id(32473);
     xo_open_log("my-program", 0, LOG_DAEMON);
 
     /* Generate a syslog message */
     xo_syslog(LOG_ERR, "upload-failed",
               "error <%d> uploading file '{:filename}' "
               "as '{:target/%s:%s}'",
               code, filename, protocol, remote);
 
     xo_syslog(LOG_INFO, "poofd-invalid-state",
               "state {:current/%u} is invalid {:connection/%u}",
 	      state, conn);
 
 The developer should be aware that the message name may be used in the
 future to allow access to further information, including
 documentation.  Care should be taken to choose quality, descriptive
 names.
 
 .. _syslog-details:
 
 Priority, Facility, and Flags
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The `xo_syslog`, `xo_vsyslog`, and `xo_open_log` functions
 accept a set of flags which provide the priority of the message, the
 source facility, and some additional features.  These values are OR'd
 together to create a single integer argument::
 
     xo_syslog(LOG_ERR | LOG_AUTH, "login-failed",
              "Login failed; user '{:user}' from host '{:address}'",
              user, addr);
 
 These values are defined in <syslog.h>.
 
 The priority value indicates the importance and potential impact of
 each message:
 
   ============= =======================================================
    Priority      Description
   ============= =======================================================
    LOG_EMERG     A panic condition, normally broadcast to all users
    LOG_ALERT     A condition that should be corrected immediately
    LOG_CRIT      Critical conditions
    LOG_ERR       Generic errors
    LOG_WARNING   Warning messages
    LOG_NOTICE    Non-error conditions that might need special handling
    LOG_INFO      Informational messages
    LOG_DEBUG     Developer-oriented messages
   ============= =======================================================
 
 The facility value indicates the source of message, in fairly generic
 terms:
 
   =============== =======================================================
    Facility        Description
   =============== =======================================================
    LOG_AUTH        The authorization system (e.g. :manpage:`login(1)`)
    LOG_AUTHPRIV    As LOG_AUTH, but logged to a privileged file
    LOG_CRON        The cron daemon: :manpage:`cron(8)`
    LOG_DAEMON      System daemons, not otherwise explicitly listed
    LOG_FTP         The file transfer protocol daemons
    LOG_KERN        Messages generated by the kernel
    LOG_LPR         The line printer spooling system
    LOG_MAIL        The mail system
    LOG_NEWS        The network news system
    LOG_SECURITY    Security subsystems, such as :manpage:`ipfw(4)`
    LOG_SYSLOG      Messages generated internally by :manpage:`syslogd(8)`
    LOG_USER        Messages generated by user processes (default)
    LOG_UUCP        The uucp system
    LOG_LOCAL0..7   Reserved for local use
   =============== =======================================================
 
 In addition to the values listed above, xo_open_log accepts a set of
 addition flags requesting specific logging behaviors:
 
   ============ ====================================================
    Flag         Description
   ============ ====================================================
    LOG_CONS     If syslogd fails, attempt to write to /dev/console
    LOG_NDELAY   Open the connection to :manpage:`syslogd(8)` immediately
    LOG_PERROR   Write the message also to standard error output
    LOG_PID      Log the process id with each message
   ============ ====================================================
 
 .. index:: xo_syslog
 
 xo_syslog
 ~~~~~~~~~
 
 .. c:function:: void xo_syslog (int pri, const char *name, const char *fmt, ...)
 
   :param int pri: syslog priority
   :param name: Name of the syslog event
   :type name: const char *
   :param fmt: Format string, followed by arguments
   :type fmt: const char *
   :returns: void
 
   Use the `xo_syslog` function to generate syslog messages by calling
   it with a log priority and facility, a message name, a format
   string, and a set of arguments.  The priority/facility argument are
   discussed above, as is the message name.
 
   The format string follows the same conventions as `xo_emit`'s format
   string, with each field being rendered as an SD-PARAM pair::
 
     xo_syslog(LOG_ERR, "poofd-missing-file",
               "'{:filename}' not found: {:error/%m}", filename);
 
     ... [poofd-missing-file@32473 filename="/etc/poofd.conf"
           error="Permission denied"] '/etc/poofd.conf' not
           found: Permission denied
 
 Support functions
 ~~~~~~~~~~~~~~~~~
 
 .. index:: xo_vsyslog
 
 xo_vsyslog
 ++++++++++
 
 .. c:function:: void xo_vsyslog (int pri, const char *name, const char *fmt, va_list vap)
 
   :param int pri: syslog priority
   :param name: Name of the syslog event
   :type name: const char *
   :param fmt: Format string
   :type fmt: const char *
   :param va_list vap: Variadic argument list
   :returns: void
 
   xo_vsyslog is identical in function to xo_syslog, but takes the set of
   arguments using a va_list::
 
     EXAMPLE:
         void
         my_log (const char *name, const char *fmt, ...)
         {
             va_list vap;
             va_start(vap, fmt);
             xo_vsyslog(LOG_ERR, name, fmt, vap);
             va_end(vap);
         }
 
 .. index:: xo_open_log
 
 xo_open_log
 +++++++++++
 
 .. c:function:: void xo_open_log (const char *ident, int logopt, int facility)
 
   :param indent:
   :type indent: const char *
   :param int logopt: Bit field containing logging options
   :param int facility:
   :returns: void
 
   xo_open_log functions similar to :manpage:`openlog(3)`, allowing
   customization of the program name, the log facility number, and the
   additional option flags described in :ref:`syslog-details`.
 
 .. index:: xo_close_log
 
 xo_close_log
 ++++++++++++
 
 .. c:function:: void xo_close_log (void)
 
   The `xo_close_log` function is similar to :manpage:`closelog(3)`,
   closing the log file and releasing any associated resources.
 
 .. index:: xo_set_logmask
 
 xo_set_logmask
 ++++++++++++++
 
 .. c:function:: int xo_set_logmask (int maskpri)
 
   :param int maskpri: the log priority mask
   :returns: The previous log priority mask
 
   The `xo_set_logmask` function is similar to :manpage:`setlogmask(3)`,
   restricting the set of generated log event to those whose associated
   bit is set in maskpri.  Use `LOG_MASK(pri)` to find the appropriate bit,
   or `LOG_UPTO(toppri)` to create a mask for all priorities up to and
   including toppri::
 
     EXAMPLE:
         setlogmask(LOG_UPTO(LOG_WARN));
 
 .. index:: xo_set_syslog_enterprise_id
 
 xo_set_syslog_enterprise_id
 +++++++++++++++++++++++++++
 
 .. c:function:: void xo_set_syslog_enterprise_id (unsigned short eid)
 
   Use the `xo_set_syslog_enterprise_id` to supply a platform- or
   application-specific enterprise id.  This value is used in any future
   syslog messages.
 
   Ideally, the operating system should supply a default value via the
   "kern.syslog.enterprise_id" sysctl value.  Lacking that, the
   application should provide a suitable value.
 
 Enterprise IDs are administered by IANA, the Internet Assigned Number
 Authority.  The complete list is EIDs on their web site::
 
     https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
 
 New EIDs can be requested from IANA using the following page::
 
     http://pen.iana.org/pen/PenApplication.page
 
 Each software development organization that defines a set of syslog
 messages should register their own EID and use that value in their
 software to ensure that messages can be uniquely identified by the
 combination of EID + message name.
 
 Creating Custom Encoders
 ------------------------
 
 The number of encoding schemes in current use is staggering, with new
 and distinct schemes appearing daily.  While libxo provide XML, JSON,
 HMTL, and text natively, there are requirements for other encodings.
 
 Rather than bake support for all possible encoders into libxo, the API
 allows them to be defined externally.  libxo can then interfaces with
 these encoding modules using a simplistic API.  libxo processes all
 functions calls, handles state transitions, performs all formatting,
 and then passes the results as operations to a customized encoding
 function, which implements specific encoding logic as required.  This
 means your encoder doesn't need to detect errors with unbalanced
 open/close operations but can rely on libxo to pass correct data.
 
 By making a simple API, libxo internals are not exposed, insulating the
 encoder and the library from future or internal changes.
 
 The three elements of the API are:
 
 - loading
 - initialization
 - operations
 
 The following sections provide details about these topics.
 
 .. index:: CBOR
 
 libxo source contains an encoder for Concise Binary Object
 Representation, aka CBOR (:RFC:`7049`), which can be used as an
 example for the API for other encoders.
 
 Loading Encoders
 ~~~~~~~~~~~~~~~~
 
 Encoders can be registered statically or discovered dynamically.
 Applications can choose to call the `xo_encoder_register` function
 to explicitly register encoders, but more typically they are built as
 shared libraries, placed in the libxo/extensions directory, and loaded
 based on name.  libxo looks for a file with the name of the encoder
 and an extension of ".enc".  This can be a file or a symlink to the
 shared library file that supports the encoder::
 
     % ls -1 lib/libxo/extensions/*.enc
     lib/libxo/extensions/cbor.enc
     lib/libxo/extensions/test.enc
 
 Encoder Initialization
 ~~~~~~~~~~~~~~~~~~~~~~
 
 Each encoder must export a symbol used to access the library, which
 must have the following signature::
 
     int xo_encoder_library_init (XO_ENCODER_INIT_ARGS);
 
 `XO_ENCODER_INIT_ARGS` is a macro defined in "xo_encoder.h" that defines
 an argument called "arg", a pointer of the type
 `xo_encoder_init_args_t`.  This structure contains two fields:
 
 - `xei_version` is the version number of the API as implemented
   within libxo.  This version is currently as 1 using
   `XO_ENCODER_VERSION`.  This number can be checked to ensure
   compatibility.  The working assumption is that all versions should
   be backward compatible, but each side may need to accurately know
   the version supported by the other side.  `xo_encoder_library_init`
   can optionally check this value, and must then set it to the version
   number used by the encoder, allowing libxo to detect version
   differences and react accordingly.  For example, if version 2 adds
   new operations, then libxo will know that an encoding library that
   set `xei_version` to 1 cannot be expected to handle those new
   operations.
 
 - xei_handler must be set to a pointer to a function of type
   `xo_encoder_func_t`, as defined in "xo_encoder.h".  This function
   takes a set of parameters:
   - xop is a pointer to the opaque `xo_handle_t` structure
   - op is an integer representing the current operation
   - name is a string whose meaning differs by operation
   - value is a string whose meaning differs by operation
   - private is an opaque structure provided by the encoder
 
 Additional arguments may be added in the future, so handler functions
 should use the `XO_ENCODER_HANDLER_ARGS` macro.  An appropriate
 "extern" declaration is provided to help catch errors.
 
 Once the encoder initialization function has completed processing, it
 should return zero to indicate that no error has occurred.  A non-zero
 return code will cause the handle initialization to fail.
 
 Operations
 ~~~~~~~~~~
 
 The encoder API defines a set of operations representing the
 processing model of libxo.  Content is formatted within libxo, and
 callbacks are made to the encoder's handler function when data is
 ready to be processed:
 
   ======================= =======================================
    Operation               Meaning  (Base function)
   ======================= =======================================
    XO_OP_CREATE            Called when the handle is created
    XO_OP_OPEN_CONTAINER    Container opened (xo_open_container)
    XO_OP_CLOSE_CONTAINER   Container closed (xo_close_container)
    XO_OP_OPEN_LIST         List opened (xo_open_list)
    XO_OP_CLOSE_LIST        List closed (xo_close_list)
    XO_OP_OPEN_LEAF_LIST    Leaf list opened (xo_open_leaf_list)
    XO_OP_CLOSE_LEAF_LIST   Leaf list closed (xo_close_leaf_list)
    XO_OP_OPEN_INSTANCE     Instance opened (xo_open_instance)
    XO_OP_CLOSE_INSTANCE    Instance closed (xo_close_instance)
    XO_OP_STRING            Field with Quoted UTF-8 string
    XO_OP_CONTENT           Field with content
    XO_OP_FINISH            Finish any pending output
    XO_OP_FLUSH             Flush any buffered output
    XO_OP_DESTROY           Clean up resources
    XO_OP_ATTRIBUTE         An attribute name/value pair
    XO_OP_VERSION           A version string
   ======================= =======================================
 
 For all the open and close operations, the name parameter holds the
 name of the construct.  For string, content, and attribute operations,
 the name parameter is the name of the field and the value parameter is
 the value.  "string" are differentiated from "content" to allow differing
 treatment of true, false, null, and numbers from real strings, though
 content values are formatted as strings before the handler is called.
 For version operations, the value parameter contains the version.
 
 All strings are encoded in UTF-8.
Index: projects/clang1000-import/contrib/libxo/doc/encoders.rst
===================================================================
--- projects/clang1000-import/contrib/libxo/doc/encoders.rst	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/doc/encoders.rst	(revision 357179)
@@ -1,269 +1,274 @@
 .. index:: encoder
 
 Encoders
 ========
 
 This section gives an overview of encoders, details on the encoders
 that ship with libxo, and documentation for developers of future
 encoders.
 
 Overview
 --------
 
 The libxo library contains software to generate four "built-in"
 formats: text, XML, JSON, and HTML.  These formats are common and
 useful, but there are other common and useful formats that users will
 want, and including them all in the libxo software would be difficult
 and cumbersome.
 
 To allow support for additional encodings, libxo includes a
 "pluggable" extension mechanism for dynamically loading new encoders.
 libxo-based applications can automatically use any installed encoder.
 
 Use the "encoder=XXX" option to access encoders.  The following
 example uses the "cbor" encoder, saving the output into a file::
 
     df --libxo encoder=cbor > df-output.cbor
 
 Encoders can support specific options that can be accessed by
-following the encoder name with a colon (':') and one of more options,
-separated by a plus sign "+"::
+following the encoder name with a colon (':') or a plus sign ('+') and
+one of more options, separated by the same character::
 
-    df --libxo encoder=csv:path=filesystem+leaf=name+no-header
+    df --libxo encoder=csv+path=filesystem+leaf=name+no-header
+    df --libxo encoder=csv:path=filesystem:leaf=name:no-header
 
-This example instructs libxo to load the "csv" encoder and pass the
+These examples instructs libxo to load the "csv" encoder and pass the
 following options::
 
    path=filesystem
    leaf=name
    no-header
 
 Each of these option is interpreted by the encoder, and all such
 options names and semantics are specific to the particular encoder.
 Refer to the intended encoder for documentation on its options.
+
+The string "@" can be used in place of the string "encoder=".
+
+    df --libxo @csv:no-header
 
 .. _csv_encoder:
 
 CSV - Comma Separated Values
 ----------------------------
 
 libxo ships with a custom encoder for "CSV" files, a common format for
 comma separated values.  The output of the CSV encoder can be loaded
 directly into spreadsheets or similar applications.
 
 A standard for CSV files is provided in :RFC:`4180`, but since the
 format predates that standard by decades, there are many minor
 differences in CSV file consumers and their expectations.  The CSV
 encoder has a number of options to tailor output to those
 expectations.
 
 Consider the following XML::
 
   % list-items --libxo xml,pretty
   <top>
     <data test="value">
       <item test2="value2">
         <sku test3="value3" key="key">GRO-000-415</sku>
         <name key="key">gum</name>
         <sold>1412</sold>
         <in-stock>54</in-stock>
         <on-order>10</on-order>
       </item>
       <item>
         <sku test3="value3" key="key">HRD-000-212</sku>
         <name key="key">rope</name>
         <sold>85</sold>
         <in-stock>4</in-stock>
         <on-order>2</on-order>
       </item>
       <item>
         <sku test3="value3" key="key">HRD-000-517</sku>
         <name key="key">ladder</name>
         <sold>0</sold>
         <in-stock>2</in-stock>
         <on-order>1</on-order>
       </item>
     </data>
   </top>
 
 This output is a list of `instances` (named "item"), each containing a
 set of `leafs` ("sku", "name", etc).
 
 The CSV encoder will emit the leaf values in this output as `fields`
 inside a CSV `record`, which is a line containing a set of
 comma-separated values::
 
   % list-items --libxo encoder=csv
   sku,name,sold,in-stock,on-order
   GRO-000-415,gum,1412,54,10
   HRD-000-212,rope,85,4,2
   HRD-000-517,ladder,0,2,1
 
 Be aware that since the CSV encoder looks for data instances, when
 used with :ref:`xo`, the `--instance` option will be needed::
 
   % xo --libxo encoder=csv --instance foo 'The {:product} is {:status}\n' stereo "in route"
   product,status
   stereo,in route
 
 .. _csv_path:
 
 The `path` Option
 ~~~~~~~~~~~~~~~~~
 
 By default, the CSV encoder will attempt to emit any list instance
 generated by the application.  In some cases, this may be
 unacceptable, and a specific list may be desired.
 
 Use the "path" option to limit the processing of output to a specific
 hierarchy.  The path should be one or more names of containers or
 lists.
 
 For example, if the "list-items" application generates other lists,
 the user can give "path=top/data/item" as a path::
 
   % list-items --libxo encoder=csv:path=top/data/item
   sku,name,sold,in-stock,on-order
   GRO-000-415,gum,1412,54,10
   HRD-000-212,rope,85,4,2
   HRD-000-517,ladder,0,2,1
 
 Paths are "relative", meaning they need not be a complete set
 of names to the list.  This means that "path=item" may be sufficient
 for the above example.
 
 .. _csv_leafs:
 
 The `leafs` Option
 ~~~~~~~~~~~~~~~~~~
 
 The CSV encoding requires that all lines of output have the same
 number of fields with the same order.  In contrast, XML and JSON allow
 any order (though libxo forces key leafs to appear before other
 leafs).
 
 To maintain a consistent set of fields inside the CSV file, the same
 set of leafs must be selected from each list item.  By default, the
 CSV encoder records the set of leafs that appear in the first list
 instance it processes, and extract only those leafs from future
 instances.  If the first instance is missing a leaf that is desired by
 the consumer, the "leaf" option can be used to ensure that an empty
 value is recorded for instances that lack a particular leaf.
 
 The "leafs" option can also be used to exclude leafs, limiting the
 output to only those leafs provided.
 
 In addition, the order of the output fields follows the order in which
 the leafs are listed.  "leafs=one.two" and "leafs=two.one" give
 distinct output.
 
 So the "leafs" option can be used to expand, limit, and order the set
 of leafs.
 
 The value of the leafs option should be one or more leaf names,
 separated by a period (".")::
 
   % list-items --libxo encoder=csv:leafs=sku.on-order
   sku,on-order
   GRO-000-415,10
   HRD-000-212,2
   HRD-000-517,1
   % list-items -libxo encoder=csv:leafs=on-order.sku
   on-order,sku
   10,GRO-000-415
   2,HRD-000-212
   1,HRD-000-517
 
 Note that since libxo uses terminology from YANG (:RFC:`7950`), the
 data modeling language for NETCONF (:RFC:`6241`), which uses "leafs"
 as the plural form of "leaf".  libxo follows that convention.
 
 .. _csv_no_header:
 
 The `no-header` Option
 ~~~~~~~~~~~~~~~~~~~~~~
 
 CSV files typical begin with a line that defines the fields included
 in that file, in an attempt to make the contents self-defining::
 
     sku,name,sold,in-stock,on-order
     GRO-000-415,gum,1412,54,10
     HRD-000-212,rope,85,4,2
     HRD-000-517,ladder,0,2,1
 
 There is no reliable mechanism for determining whether this header
 line is included, so the consumer must make an assumption.
 
 The csv encoder defaults to producing the header line, but the
 "no-header" option can be included to avoid the header line.
 
 .. _csv_no_quotes:
 
 The `no-quotes` Option
 ~~~~~~~~~~~~~~~~~~~~~~
 
 :RFC:`4180` specifies that fields containing spaces should be quoted, but
 many CSV consumers do not handle quotes.  The "no-quotes" option
 instruct the CSV encoder to avoid the use of quotes.
 
 .. _csv_dos:
 
 The `dos` Option
 ~~~~~~~~~~~~~~~~
 
 :RFC:`4180` defines the end-of-line marker as a carriage return
 followed by a newline.  This `CRLF` convention dates from the distant
 past, but its use was anchored in the 1980s by the `DOS` operating
 system.
 
 The CSV encoder defaults to using the standard Unix end-of-line
 marker, a simple newline.  Use the "dos" option to use the `CRLF`
 convention.
 
 The Encoder API
 ---------------
 
 The encoder API consists of three distinct phases:
 
 - loading the encoder
 - initializing the encoder
 - feeding operations to the encoder
 
 To load the encoder, libxo will open a shared library named:
 
    ${prefix}/lib/libxo/encoder/${name}.enc
 
 This file is typically a symbolic link to a dynamic library, suitable
 for `dlopen`().  libxo looks for a symbol called
 `xo_encoder_library_init` inside that library and calls it with the
 arguments defined in the header file "xo_encoder.h".  This function
 should look as follows::
 
   int
   xo_encoder_library_init (XO_ENCODER_INIT_ARGS)
   {
       arg->xei_version = XO_ENCODER_VERSION;
       arg->xei_handler = test_handler;
   
       return 0;
   }
 
 Several features here allow for future compatibility: the macro
 XO_ENCODER_INIT_ARGS allows the arguments to this function change over
 time, and the XO_ENCODER_VERSION allows the library to tell libxo
 which version of the API it was compiled with.
 
 The function places in xei_handler should be have the signature::
 
   static int
   test_handler (XO_ENCODER_HANDLER_ARGS)
   {
        ...
 
 This function will be called with the "op" codes defined in
 "xo_encoder.h".  Each op code represents a distinct event in the libxo
 processing model.  For example OP_OPEN_CONTAINER tells the encoder
 that a new container has been opened, and the encoder can behave in an
 appropriate manner.
 
 
Index: projects/clang1000-import/contrib/libxo/doc/options.rst
===================================================================
--- projects/clang1000-import/contrib/libxo/doc/options.rst	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/doc/options.rst	(revision 357179)
@@ -1,164 +1,184 @@
 
 .. index:: --libxo
 .. index:: Options
 
 .. _options:
 
 Command-line Arguments
 ======================
 
 libxo uses command line options to trigger rendering behavior.  There
 are multiple conventions for passing options, all using the
 "`--libxo`" option::
 
   --libxo <options>
   --libxo=<options>
   --libxo:<brief-options>
 
 The *brief-options* is a series of single letter abbrevations, where
 the *options* is a comma-separated list of words.  Both provide access
 to identical functionality.  The following invocations are all
 identical in outcome::
 
   my-app --libxo warn,pretty arg1
   my-app --libxo=warn,pretty arg1
   my-app --libxo:WP arg1
 
 Programs using libxo are expecting to call the xo_parse_args function
 to parse these arguments.  See :ref:`xo_parse_args` for details.
 
 Option Keywords
 ---------------
 
 Options is a comma-separated list of tokens that correspond to output
 styles, flags, or features:
 
   =============== =======================================================
   Token           Action
   =============== =======================================================
   color           Enable colors/effects for display styles (TEXT, HTML)
   colors=xxxx     Adjust color output values
   dtrt            Enable "Do The Right Thing" mode
   flush           Flush after every libxo function call
   flush-line      Flush after every line (line-buffered)
   html            Emit HTML output
   indent=xx       Set the indentation level
   info            Add info attributes (HTML)
   json            Emit JSON output
   keys            Emit the key attribute for keys (XML)
   log-gettext     Log (via stderr) each gettext(3) string lookup
   log-syslog      Log (via stderr) each syslog message (via xo_syslog)
   no-humanize     Ignore the {h:} modifier (TEXT, HTML)
   no-locale       Do not initialize the locale setting
   no-retain       Prevent retaining formatting information
   no-top          Do not emit a top set of braces (JSON)
   not-first       Pretend the 1st output item was not 1st (JSON)
   pretty          Emit pretty-printed output
   retain          Force retaining formatting information
   text            Emit TEXT output
   underscores     Replace XML-friendly "-"s with JSON friendly "_"s
   units           Add the 'units' (XML) or 'data-units (HTML) attribute
   warn            Emit warnings when libxo detects bad calls
   warn-xml        Emit warnings in XML
   xml             Emit XML output
   xpath           Add XPath expressions (HTML)
   =============== =======================================================
 
 Most of these option are simple and direct, but some require
 additional details:
 
 - "colors" is described in :ref:`color-mapping`.
 - "flush-line" performs line buffering, even when the output is not
   directed to a TTY device.
 - "info" generates additional data for HTML, encoded in attributes
   using names that state with "data-".
 - "keys" adds a "key" attribute for XML output to indicate that a leaf
   is an identifier for the list member.
 - "no-humanize" avoids "humanizing" numeric output (see
   :ref:`humanize-modifier` for details).
 - "no-locale" instructs libxo to avoid translating output to the
   current locale.
 - "no-retain" disables the ability of libxo to internally retain
   "compiled" information about formatting strings (see :ref:`retain`
   for details).
 - "underscores" can be used with JSON output to change XML-friendly
   names with dashes into JSON-friendly name with underscores.
 - "warn" allows libxo to emit warnings on stderr when application code
   make incorrect calls.
 - "warn-xml" causes those warnings to be placed in XML inside the
   output.
 
 Brief Options
 -------------
 
 The brief options are simple single-letter aliases to the normal
 keywords, as detailed below:
 
   ======== =============================================
    Option   Action
   ======== =============================================
    c        Enable color/effects for TEXT/HTML
    F        Force line-buffered flushing
    H        Enable HTML output (XO_STYLE_HTML)
    I        Enable info output (XOF_INFO)
    i<num>   Indent by <number>
    J        Enable JSON output (XO_STYLE_JSON)
    k        Add keys to XPATH expressions in HTML
    n        Disable humanization (TEXT, HTML)
    P        Enable pretty-printed output (XOF_PRETTY)
    T        Enable text output (XO_STYLE_TEXT)
    U        Add units to HTML output
    u        Change "-"s to "_"s in element names (JSON)
    W        Enable warnings (XOF_WARN)
    X        Enable XML output (XO_STYLE_XML)
    x        Enable XPath data (XOF_XPATH)
   ======== =============================================
 
 .. index:: Colors
 
 .. _color-mapping:
 
 Color Mapping
 -------------
 
 The "colors" option takes a value that is a set of mappings from the
 pre-defined set of colors to new foreground and background colors.
 The value is a series of "fg/bg" values, separated by a "+".  Each
 pair of "fg/bg" values gives the colors to which a basic color is
 mapped when used as a foreground or background color.  The order is
 the mappings is:
 
 - black
 - red
 - green
 - yellow
 - blue
 - magenta
 - cyan
 - white
 
 Pairs may be skipped, leaving them mapped as normal, as are missing
 pairs or single colors.
 
 For example consider the following xo_emit call::
 
     xo_emit("{C:fg-red,bg-green}Merry XMas!!{C:}\n");
 
 To turn all colored output to red-on-blue, use eight pairs of
 "red/blue" mappings separated by plus signs ("+")::
 
     --libxo colors=red/blue+red/blue+red/blue+red/blue+\
                    red/blue+red/blue+red/blue+red/blue
 
 To turn the red-on-green text to magenta-on-cyan, give a "magenta"
 foreground value for red (the second mapping) and a "cyan" background
 to green (the third mapping)::
 
     --libxo colors=+magenta+/cyan
 
 Consider the common situation where blue output looks unreadable on a
 terminal session with a black background.  To turn both "blue"
 foreground and background output to "yellow", give only the fifth
 mapping, skipping the first four mappings with bare plus signs ("+")::
 
     --libxo colors=++++yellow/yellow
+
+Encoders
+--------
+
+In addition to the four "built-in" formats, libxo supports an
+extensible mechanism for adding encoders.  These are activated
+using the "encoder" keyword::
+
+   --libxo encoder=cbor
+
+The encoder can include encoder-specific options, separated by either
+colons (":") or plus signs ("+"):
+
+    --libxo encoder=csv+path=filesystem+leaf=name+no-header
+    --libxo encoder=csv:path=filesystem:leaf=name:no-header
+
+For brevity, the string "@" can be used in place of the string
+"encoder=".
+
+    df --libxo @csv:no-header
Index: projects/clang1000-import/contrib/libxo/encoder/csv/enc_csv.c
===================================================================
--- projects/clang1000-import/contrib/libxo/encoder/csv/enc_csv.c	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/encoder/csv/enc_csv.c	(revision 357179)
@@ -1,826 +1,834 @@
 /*
  * Copyright (c) 2015, Juniper Networks, Inc.
  * All rights reserved.
  * This SOFTWARE is licensed under the LICENSE provided in the
  * ../Copyright file. By downloading, installing, copying, or otherwise
  * using the SOFTWARE, you agree to be bound by the terms of that
  * LICENSE.
  * Phil Shafer, August 2015
  */
 
 /*
  * CSV encoder generates comma-separated value files for specific
  * subsets of data.  This is not (and cannot be) a generalized
  * facility, but for specific subsets of data, CSV data can be
  * reasonably generated.  For example, the df XML content:
  *     <filesystem>
  *      <name>procfs</name>
  *      <total-blocks>4</total-blocks>
  *      <used-blocks>4</used-blocks>
  *      <available-blocks>0</available-blocks>
  *      <used-percent>100</used-percent>
  *      <mounted-on>/proc</mounted-on>
  *    </filesystem>
  *
  * could be represented as:
  *
  *  #+name,total-blocks,used-blocks,available-blocks,used-percent,mounted-on
  *  procfs,4,4,0,100,/proc
  *
  * Data is then constrained to be sibling leaf values.  In addition,
  * singular leafs can also be matched.  The costs include recording
  * the specific leaf names (to ensure consistency) and some
  * buffering.
  *
  * Some escaping is needed for CSV files, following the rules of RFC4180:
  *
  * - Fields containing a line-break, double-quote or commas should be
  *   quoted. (If they are not, the file will likely be impossible to
  *   process correctly).
  * - A (double) quote character in a field must be represented by two
  *   (double) quote characters.
  * - Leading and trialing whitespace require fields be quoted.
  *
- * Cheesy, but simple.  The RFC also requires MS-DOS end-of-line, which
- * we only do with the "dos" option.  Strange that we still live in a
- * DOS-friendly world, but then again, we make spaceships based on the
- * horse butts (http://www.astrodigital.org/space/stshorse.html).
+ * Cheesy, but simple.  The RFC also requires MS-DOS end-of-line,
+ * which we only do with the "dos" option.  Strange that we still live
+ * in a DOS-friendly world, but then again, we make spaceships based
+ * on the horse butts (http://www.astrodigital.org/space/stshorse.html
+ * though the "built by English expatriates” bit is rubbish; better to
+ * say the first engines used in America were built by Englishmen.)
  */
 
 #include <string.h>
 #include <sys/types.h>
 #include <unistd.h>
 #include <stdint.h>
 #include <ctype.h>
 #include <stdlib.h>
 #include <limits.h>
 
 #include "xo.h"
 #include "xo_encoder.h"
 #include "xo_buf.h"
 
 #ifndef UNUSED
 #define UNUSED __attribute__ ((__unused__))
 #endif /* UNUSED */
 
 /*
  * The CSV encoder has three moving parts:
  *
  * - The path holds the path we are matching against
  *   - This is given as input via "options" and does not change
  *
  * - The stack holds the current names of the open elements
  *   - The "open" operations push, while the "close" pop
  *   - Turns out, at this point, the stack is unused, but I've
  *     left "drippings" in the code because I see this as useful
  *     for future features (under CSV_STACK_IS_NEEDED).
  *
  * - The leafs record the current set of leaf
  *   - A key from the parent list counts as a leaf (unless CF_NO_KEYS)
  *   - Once the path is matched, all other leafs at that level are leafs
  *   - Leafs are recorded to get the header comment accurately recorded
  *   - Once the first line is emited, the set of leafs _cannot_ change
  *
  * We use offsets into the buffers, since we know they can be
  * realloc'd out from under us, as the size increases.  The 'path'
  * is fixed, we allocate it once, so it doesn't need offsets.
  */
 typedef struct path_frame_s {
     char *pf_name;	       /* Path member name; points into c_path_buf */
     uint32_t pf_flags;	       /* Flags for this path element (PFF_*) */
 } path_frame_t;
 
 typedef struct stack_frame_s {
     ssize_t sf_off;		/* Element name; offset in c_stack_buf */
     uint32_t sf_flags;		/* Flags for this frame (SFF_*) */
 } stack_frame_t;
 
 /* Flags for sf_flags */
 
 typedef struct leaf_s {
     ssize_t f_name;		/* Name of leaf; offset in c_name_buf */
     ssize_t f_value;		/* Value of leaf; offset in c_value_buf */
     uint32_t f_flags;		/* Flags for this value (FF_*)  */
 #ifdef CSV_STACK_IS_NEEDED
     ssize_t f_depth;		/* Depth of stack when leaf was recorded */
 #endif /* CSV_STACK_IS_NEEDED */
 } leaf_t;
 
 /* Flags for f_flags */
 #define LF_KEY		(1<<0)	/* Leaf is a key */
 #define LF_HAS_VALUE	(1<<1)	/* Value has been set */
 
 typedef struct csv_private_s {
     uint32_t c_flags;		/* Flags for this encoder */
 
     /* The path for which we select leafs */
     char *c_path_buf;	    	/* Buffer containing path members */
     path_frame_t *c_path;	/* Array of path members */
     ssize_t c_path_max;		/* Depth of c_path[] */
     ssize_t c_path_cur;		/* Current depth in c_path[] */
 
     /* A stack of open elements (xo_op_list, xo_op_container) */
 #if CSV_STACK_IS_NEEDED
     xo_buffer_t c_stack_buf;	/* Buffer used for stack content */
     stack_frame_t *c_stack;	/* Stack of open tags */
     ssize_t c_stack_max;	/* Maximum stack depth */
 #endif /* CSV_STACK_IS_NEEDED */
     ssize_t c_stack_depth;	/* Current stack depth */
 
     /* List of leafs we are emitting (to ensure consistency) */
     xo_buffer_t c_name_buf;	/* String buffer for leaf names */
     xo_buffer_t c_value_buf;	/* String buffer for leaf values */
     leaf_t *c_leaf;		/* List of leafs */
     ssize_t c_leaf_depth;	/* Current depth of c_leaf[] (next free) */
     ssize_t c_leaf_max;		/* Max depth of c_leaf[] */
 
     xo_buffer_t c_data;		/* Buffer for creating data */
 } csv_private_t;
 
 #define C_STACK_MAX	32	/* default c_stack_max */
 #define C_LEAF_MAX	32	/* default c_leaf_max */
 
 /* Flags for this structure */
 #define CF_HEADER_DONE	(1<<0)	/* Have already written the header */
 #define CF_NO_HEADER	(1<<1)	/* Do not generate header */
 #define CF_NO_KEYS	(1<<2)	/* Do not generate excess keys */
 #define CF_VALUE_ONLY	(1<<3)	/* Only generate the value */
 
 #define CF_DOS_NEWLINE	(1<<4)	/* Generate CR-NL, just like MS-DOS */
 #define CF_LEAFS_DONE	(1<<5)	/* Leafs are already been recorded */
 #define CF_NO_QUOTES	(1<<6)	/* Do not generate quotes */
 #define CF_RECORD_DATA	(1<<7)	/* Record all sibling leafs */
 
 #define CF_DEBUG	(1<<8)	/* Make debug output */
 #define CF_HAS_PATH	(1<<9)	/* A "path" option was provided */
 
 /*
  * A simple debugging print function, similar to psu_dbg.  Controlled by
  * the undocumented "debug" option.
  */
 static void
 csv_dbg (xo_handle_t *xop UNUSED, csv_private_t *csv UNUSED,
 	 const char *fmt, ...)
 {
     if (csv == NULL || !(csv->c_flags & CF_DEBUG))
 	return;
 
     va_list vap;
 
     va_start(vap, fmt);
     vfprintf(stderr, fmt, vap);
     va_end(vap);
 }
 
 /*
  * Create the private data for this handle, initialize it, and record
  * the pointer in the handle.
  */
 static int
 csv_create (xo_handle_t *xop)
 {
     csv_private_t *csv = xo_realloc(NULL, sizeof(*csv));
     if (csv == NULL)
 	return -1;
 
     bzero(csv, sizeof(*csv));
     xo_buf_init(&csv->c_data);
     xo_buf_init(&csv->c_name_buf);
     xo_buf_init(&csv->c_value_buf);
 #ifdef CSV_STACK_IS_NEEDED
     xo_buf_init(&csv->c_stack_buf);
 #endif /* CSV_STACK_IS_NEEDED */
 
     xo_set_private(xop, csv);
 
     return 0;
 }
 
 /*
  * Clean up and release any data in use by this handle
  */
 static void
 csv_destroy (xo_handle_t *xop UNUSED, csv_private_t *csv)
 {
     /* Clean up */
     xo_buf_cleanup(&csv->c_data);
     xo_buf_cleanup(&csv->c_name_buf);
     xo_buf_cleanup(&csv->c_value_buf);
 #ifdef CSV_STACK_IS_NEEDED
     xo_buf_cleanup(&csv->c_stack_buf);
 #endif /* CSV_STACK_IS_NEEDED */
 
     if (csv->c_leaf)
 	xo_free(csv->c_leaf);
     if (csv->c_path_buf)
 	xo_free(csv->c_path_buf);
 }
 
 /*
  * Return the element name at the top of the path stack.  This is the
  * item that we are currently trying to match on.
  */
 static const char *
 csv_path_top (csv_private_t *csv, ssize_t delta)
 {
     if (!(csv->c_flags & CF_HAS_PATH) || csv->c_path == NULL)
 	return NULL;
 
     ssize_t cur = csv->c_path_cur + delta;
 
     if (cur < 0)
 	return NULL;
 
     return csv->c_path[cur].pf_name;
 }
 
 /*
  * Underimplemented stack functionality
  */
 static inline void
 csv_stack_push (csv_private_t *csv UNUSED, const char *name UNUSED)
 {
 #ifdef CSV_STACK_IS_NEEDED
     csv->c_stack_depth += 1;
 #endif /* CSV_STACK_IS_NEEDED */
 }
 
 /*
  * Underimplemented stack functionality
  */
 static inline void
 csv_stack_pop (csv_private_t *csv UNUSED, const char *name UNUSED)
 {
 #ifdef CSV_STACK_IS_NEEDED
     csv->c_stack_depth -= 1;
 #endif /* CSV_STACK_IS_NEEDED */
 }
 
 /* Flags for csv_quote_flags */
 #define QF_NEEDS_QUOTES	(1<<0)		/* Needs to be quoted */
 #define QF_NEEDS_ESCAPE	(1<<1)		/* Needs to be escaped */
 
 /*
  * Determine how much quote processing is needed.  The details of the
  * quoting rules are given at the top of this file.  We return a set
  * of flags, indicating what's needed.
  */
 static uint32_t
 csv_quote_flags (xo_handle_t *xop UNUSED, csv_private_t *csv UNUSED,
 		  const char *value)
 {
     static const char quoted[] = "\n\r\",";
     static const char escaped[] = "\"";
 
     if (csv->c_flags & CF_NO_QUOTES)	/* User doesn't want quotes */
 	return 0;
 
     size_t len = strlen(value);
     uint32_t rc = 0;
 
     if (strcspn(value, quoted) != len)
 	rc |= QF_NEEDS_QUOTES;
     else if (isspace((int) value[0]))	/* Leading whitespace */
 	rc |= QF_NEEDS_QUOTES;
     else if (isspace((int) value[len - 1])) /* Trailing whitespace */
 	rc |= QF_NEEDS_QUOTES;
 
     if (strcspn(value, escaped) != len)
 	rc |= QF_NEEDS_ESCAPE;
 
     csv_dbg(xop, csv, "csv: quote flags [%s] -> %x (%zu/%zu)\n",
 	    value, rc, len, strcspn(value, quoted));
 
     return rc;
 }
 
 /*
  * Escape the string, following the rules in RFC4180
  */
 static void
 csv_escape (xo_buffer_t *xbp, const char *value, size_t len)
 {
     const char *cp, *ep, *np;
 
     for (cp = value, ep = value + len; cp && cp < ep; cp = np) {
 	np = strchr(cp, '"');
 	if (np) {
 	    np += 1;
 	    xo_buf_append(xbp, cp, np - cp);
 	    xo_buf_append(xbp, "\"", 1);
 	} else
 	    xo_buf_append(xbp, cp, ep - cp);
     }
 }
 
 /*
  * Append a newline to the buffer, following the settings of the "dos"
  * flag.
  */
 static void
 csv_append_newline (xo_buffer_t *xbp, csv_private_t *csv)
 {
     if (csv->c_flags & CF_DOS_NEWLINE)
 	xo_buf_append(xbp, "\r\n", 2);
     else 
 	xo_buf_append(xbp, "\n", 1);
 }
 
 /*
  * Create a 'record' of 'fields' from our recorded leaf values.  If
  * this is the first line and "no-header" isn't given, make a record
  * containing the leaf names.
  */
 static void
 csv_emit_record (xo_handle_t *xop, csv_private_t *csv)
 {
     csv_dbg(xop, csv, "csv: emit: ...\n");
 
     ssize_t fnum;
     uint32_t quote_flags;
     leaf_t *lp;
 
     /* If we have no data, then don't bother */
     if (csv->c_leaf_depth == 0)
 	return;
 
     if (!(csv->c_flags & (CF_HEADER_DONE | CF_NO_HEADER))) {
 	csv->c_flags |= CF_HEADER_DONE;
 
 	for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) {
 	    lp = &csv->c_leaf[fnum];
 	    const char *name = xo_buf_data(&csv->c_name_buf, lp->f_name);
 
 	    if (fnum != 0)
 		xo_buf_append(&csv->c_data, ",", 1);
 
 	    xo_buf_append(&csv->c_data, name, strlen(name));
 	}
 
 	csv_append_newline(&csv->c_data, csv);
     }
 
     for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) {
 	lp = &csv->c_leaf[fnum];
 	const char *value;
 
 	if (lp->f_flags & LF_HAS_VALUE) {
 	    value = xo_buf_data(&csv->c_value_buf, lp->f_value);
 	} else {
 	    value = "";
 	}
 
 	quote_flags = csv_quote_flags(xop, csv, value);
 
 	if (fnum != 0)
 	    xo_buf_append(&csv->c_data, ",", 1);
 
 	if (quote_flags & QF_NEEDS_QUOTES)
 	    xo_buf_append(&csv->c_data, "\"", 1);
 
 	if (quote_flags & QF_NEEDS_ESCAPE)
 	    csv_escape(&csv->c_data, value, strlen(value));
 	else
 	    xo_buf_append(&csv->c_data, value, strlen(value));
 
 	if (quote_flags & QF_NEEDS_QUOTES)
 	    xo_buf_append(&csv->c_data, "\"", 1);
     }
 
     csv_append_newline(&csv->c_data, csv);
 
     /* We flush if either flush flag is set */
     if (xo_get_flags(xop) & (XOF_FLUSH | XOF_FLUSH_LINE))
 	xo_flush_h(xop);
 
     /* Clean out values from leafs */
     for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) {
 	lp = &csv->c_leaf[fnum];
 
 	lp->f_flags &= ~LF_HAS_VALUE;
 	lp->f_value = 0;
     }
 
     xo_buf_reset(&csv->c_value_buf);
 
     /*
      * Once we emit the first line, our set of leafs is locked and
      * cannot be changed.
      */
     csv->c_flags |= CF_LEAFS_DONE;
 }
 
 /*
  * Open a "level" of hierarchy, either a container or an instance.  Look
  * for a match in the path=x/y/z hierarchy, and ignore if not a match.
  * If we're at the end of the path, start recording leaf values.
  */
 static int
 csv_open_level (xo_handle_t *xop UNUSED, csv_private_t *csv,
 		const char *name, int instance)
 {
     /* An new "open" event means we stop recording */
     if (csv->c_flags & CF_RECORD_DATA) {
 	csv->c_flags &= ~CF_RECORD_DATA;
 	csv_emit_record(xop, csv);
 	return 0;
     }
 
     const char *path_top = csv_path_top(csv, 0);
 
     /* If the top of the stack does not match the name, then ignore */
     if (path_top == NULL) {
 	if (instance && !(csv->c_flags & CF_HAS_PATH)) {
 	    csv_dbg(xop, csv, "csv: recording (no-path) ...\n");
 	    csv->c_flags |= CF_RECORD_DATA;
 	}
 
     } else if (xo_streq(path_top, name)) {
 	csv->c_path_cur += 1;		/* Advance to next path member */
 
 	csv_dbg(xop, csv, "csv: match: [%s] (%zd/%zd)\n", name,
 	       csv->c_path_cur, csv->c_path_max);
 
 	/* If we're all the way thru the path members, start recording */
 	if (csv->c_path_cur == csv->c_path_max) {
 	    csv_dbg(xop, csv, "csv: recording ...\n");
 	    csv->c_flags |= CF_RECORD_DATA;
 	}
     }
 
     /* Push the name on the stack */
     csv_stack_push(csv, name);
 
     return 0;
 }
 
 /*
  * Close a "level", either a container or an instance.
  */
 static int
 csv_close_level (xo_handle_t *xop UNUSED, csv_private_t *csv, const char *name)
 {
     /* If we're recording, a close triggers an emit */
     if (csv->c_flags & CF_RECORD_DATA) {
 	csv->c_flags &= ~CF_RECORD_DATA;
 	csv_emit_record(xop, csv);
     }
 
     const char *path_top = csv_path_top(csv, -1);
     csv_dbg(xop, csv, "csv: close: [%s] [%s] (%zd)\n", name,
 	   path_top ?: "", csv->c_path_cur);
 
     /* If the top of the stack does not match the name, then ignore */
     if (path_top != NULL && xo_streq(path_top, name)) {
 	csv->c_path_cur -= 1;
 	return 0;
     }
 
     /* Pop the name off the stack */
     csv_stack_pop(csv, name);
 
     return 0;
 }
 
 /*
  * Return the index of a given leaf in the c_leaf[] array, where we
  * record leaf values.  If the leaf is new and we haven't stopped recording
  * leafs, then make a new slot for it and record the name.
  */
 static int
 csv_leaf_num (xo_handle_t *xop UNUSED, csv_private_t *csv,
 	       const char *name, xo_xff_flags_t flags)
 {
     ssize_t fnum;
     leaf_t *lp;
     xo_buffer_t *xbp = &csv->c_name_buf;
 
     for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) {
 	lp = &csv->c_leaf[fnum];
 
 	const char *fname = xo_buf_data(xbp, lp->f_name);
 	if (xo_streq(fname, name))
 	    return fnum;
     }
 
     /* If we're done with adding new leafs, then bail */
     if (csv->c_flags & CF_LEAFS_DONE)
 	return -1;
 
     /* This leaf does not exist yet, so we need to create it */
     /* Start by checking if there's enough room */
     if (csv->c_leaf_depth + 1 >= csv->c_leaf_max) {
 	/* Out of room; realloc it */
 	ssize_t new_max = csv->c_leaf_max * 2;
 	if (new_max == 0)
 	    new_max = C_LEAF_MAX;
 
 	lp = xo_realloc(csv->c_leaf, new_max * sizeof(*lp));
 	if (lp == NULL)
 	    return -1;			/* No luck; bail */
 
 	/* Zero out the new portion */
 	bzero(&lp[csv->c_leaf_max], csv->c_leaf_max * sizeof(*lp));
 
 	/* Update csv data */
 	csv->c_leaf = lp;
 	csv->c_leaf_max = new_max;
     }
 
     lp = &csv->c_leaf[csv->c_leaf_depth++];
 #ifdef CSV_STACK_IS_NEEDED
     lp->f_depth = csv->c_stack_depth;
 #endif /* CSV_STACK_IS_NEEDED */
 
     lp->f_name = xo_buf_offset(xbp);
 
     char *cp = xo_buf_cur(xbp);
     xo_buf_append(xbp, name, strlen(name) + 1);
 
     if (flags & XFF_KEY)
 	lp->f_flags |= LF_KEY;
 
     csv_dbg(xop, csv, "csv: leaf: name: %zd [%s] [%s] %x\n",
 	    fnum, name, cp, lp->f_flags);
 
     return fnum;
 }
 
 /*
  * Record a new value for a leaf
  */
 static void
 csv_leaf_set (xo_handle_t *xop UNUSED, csv_private_t *csv, leaf_t *lp,
 	       const char *value)
 {
     xo_buffer_t *xbp = &csv->c_value_buf;
 
     lp->f_value = xo_buf_offset(xbp);
     lp->f_flags |= LF_HAS_VALUE;
 
     char *cp = xo_buf_cur(xbp);
     xo_buf_append(xbp, value, strlen(value) + 1);
 
     csv_dbg(xop, csv, "csv: leaf: value: [%s] [%s] %x\n",
 	    value, cp, lp->f_flags);
 }
 
 /*
  * Record the requested set of leaf names.  The input should be a set
  * of leaf names, separated by periods.
  */
 static int
 csv_record_leafs (xo_handle_t *xop, csv_private_t *csv, const char *leafs_raw)
 {
     char *cp, *ep, *np;
     ssize_t len = strlen(leafs_raw);
     char *leafs_buf = alloca(len + 1);
 
     memcpy(leafs_buf, leafs_raw, len + 1); /* Make local copy */
 
     for (cp = leafs_buf, ep = leafs_buf + len; cp && cp < ep; cp = np) {
 	np = strchr(cp, '.');
 	if (np)
 	    *np++ = '\0';
 
 	if (*cp == '\0')		/* Skip empty names */
 	    continue;
 
 	csv_dbg(xop, csv, "adding leaf: [%s]\n", cp);
 	csv_leaf_num(xop, csv, cp, 0);
     }
 
     /*
      * Since we've been told explicitly what leafs matter, ignore the rest
      */
     csv->c_flags |= CF_LEAFS_DONE;
 
     return 0;
 }
 
 /*
  * Record the requested path elements.  The input should be a set of
  * container or instances names, separated by slashes.
  */
 static int
 csv_record_path (xo_handle_t *xop, csv_private_t *csv, const char *path_raw)
 {
     int count;
     char *cp, *ep, *np;
     ssize_t len = strlen(path_raw);
     char *path_buf = xo_realloc(NULL, len + 1);
 
     memcpy(path_buf, path_raw, len + 1);
 
     for (cp = path_buf, ep = path_buf + len, count = 2;
 	 cp && cp < ep; cp = np) {
 	np = strchr(cp, '/');
 	if (np) {
 	    np += 1;
 	    count += 1;
 	}
     }
 
     path_frame_t *path = xo_realloc(NULL, sizeof(path[0]) * count);
     if (path == NULL) {
 	xo_failure(xop, "allocation failure for path '%s'", path_buf);
 	return -1;
     }
 
     bzero(path, sizeof(path[0]) * count);
 
     for (count = 0, cp = path_buf; cp && cp < ep; cp = np) {
 	path[count++].pf_name = cp;
 
 	np = strchr(cp, '/');
 	if (np)
 	    *np++ = '\0';
 	csv_dbg(xop, csv, "path: [%s]\n", cp);
     }
 
     path[count].pf_name = NULL;
 
     if (csv->c_path)		     /* In case two paths are given */
 	xo_free(csv->c_path);
     if (csv->c_path_buf)	     /* In case two paths are given */
 	xo_free(csv->c_path_buf);
 
     csv->c_path_buf = path_buf;
     csv->c_path = path;
     csv->c_path_max = count;
     csv->c_path_cur = 0;
 
     return 0;
 }
 
 /*
  * Extract the option values.  The format is:
- *    -libxo encoder=csv:kw=val+kw=val+kw=val,pretty,etc
+ *    -libxo encoder=csv:kw=val:kw=val:kw=val,pretty
+ *    -libxo encoder=csv+kw=val+kw=val+kw=val,pretty
  */
 static int
-csv_options (xo_handle_t *xop, csv_private_t *csv, const char *raw_opts)
+csv_options (xo_handle_t *xop, csv_private_t *csv,
+	     const char *raw_opts, char opts_char)
 {
     ssize_t len = strlen(raw_opts);
     char *options = alloca(len + 1);
     memcpy(options, raw_opts, len);
     options[len] = '\0';
 
     char *cp, *ep, *np, *vp;
     for (cp = options, ep = options + len + 1; cp && cp < ep; cp = np) {
-	np = strchr(cp, '+');
+	np = strchr(cp, opts_char);
 	if (np)
 	    *np++ = '\0';
 
 	vp = strchr(cp, '=');
 	if (vp)
 	    *vp++ = '\0';
 
 	if (xo_streq(cp, "path")) {
 	    /* Record the path */
 	    if (vp != NULL && csv_record_path(xop, csv, vp))
   		return -1;
 
 	    csv->c_flags |= CF_HAS_PATH; /* Yup, we have an explicit path now */
 
 	} else if (xo_streq(cp, "leafs")
 		   || xo_streq(cp, "leaf")
 		   || xo_streq(cp, "leaves")) {
 	    /* Record the leafs */
 	    if (vp != NULL && csv_record_leafs(xop, csv, vp))
   		return -1;
 
 	} else if (xo_streq(cp, "no-keys")) {
 	    csv->c_flags |= CF_NO_KEYS;
 	} else if (xo_streq(cp, "no-header")) {
 	    csv->c_flags |= CF_NO_HEADER;
 	} else if (xo_streq(cp, "value-only")) {
 	    csv->c_flags |= CF_VALUE_ONLY;
 	} else if (xo_streq(cp, "dos")) {
 	    csv->c_flags |= CF_DOS_NEWLINE;
 	} else if (xo_streq(cp, "no-quotes")) {
 	    csv->c_flags |= CF_NO_QUOTES;
 	} else if (xo_streq(cp, "debug")) {
 	    csv->c_flags |= CF_DEBUG;
 	} else {
 	    xo_warn_hc(xop, -1,
 		       "unknown encoder option value: '%s'", cp);
 	    return -1;
 	}
     }
 
     return 0;
 }
 
 /*
  * Handler for incoming data values.  We just record each leaf name and
  * value.  The values are emittd when the instance is closed.
  */
 static int
 csv_data (xo_handle_t *xop UNUSED, csv_private_t *csv UNUSED,
 	  const char *name, const char *value,
 	  xo_xof_flags_t flags)
 {
     csv_dbg(xop, csv, "data: [%s]=[%s] %llx\n", name, value, (unsigned long long) flags);
 
     if (!(csv->c_flags & CF_RECORD_DATA))
 	return 0;
 
     /* Find the leaf number */
     int fnum = csv_leaf_num(xop, csv, name, flags);
     if (fnum < 0)
 	return 0;			/* Don't bother recording */
 
     leaf_t *lp = &csv->c_leaf[fnum];
     csv_leaf_set(xop, csv, lp, value);
 
     return 0;
 }
 
 /*
  * The callback from libxo, passing us operations/events as they
  * happen.
  */
 static int
 csv_handler (XO_ENCODER_HANDLER_ARGS)
 {
     int rc = 0;
     csv_private_t *csv = private;
     xo_buffer_t *xbp = csv ? &csv->c_data : NULL;
 
     csv_dbg(xop, csv, "op %s: [%s] [%s]\n",  xo_encoder_op_name(op),
 	   name ?: "", value ?: "");
     fflush(stdout);
 
     /* If we don't have private data, we're sunk */
     if (csv == NULL && op != XO_OP_CREATE)
 	return -1;
 
     switch (op) {
     case XO_OP_CREATE:		/* Called when the handle is init'd */
 	rc = csv_create(xop);
 	break;
 
     case XO_OP_OPTIONS:
-	rc = csv_options(xop, csv, value);
+	rc = csv_options(xop, csv, value, ':');
+	break;
+
+    case XO_OP_OPTIONS_PLUS:
+	rc = csv_options(xop, csv, value, '+');
 	break;
 
     case XO_OP_OPEN_LIST:
     case XO_OP_CLOSE_LIST:
 	break;				/* Ignore these ops */
 
     case XO_OP_OPEN_CONTAINER:
     case XO_OP_OPEN_LEAF_LIST:
 	rc = csv_open_level(xop, csv, name, 0);
 	break;
 
     case XO_OP_OPEN_INSTANCE:
 	rc = csv_open_level(xop, csv, name, 1);
 	break;
 
     case XO_OP_CLOSE_CONTAINER:
     case XO_OP_CLOSE_LEAF_LIST:
     case XO_OP_CLOSE_INSTANCE:
 	rc = csv_close_level(xop, csv, name);
 	break;
 
     case XO_OP_STRING:		   /* Quoted UTF-8 string */
     case XO_OP_CONTENT:		   /* Other content */
 	rc = csv_data(xop, csv, name, value, flags);
 	break;
 
     case XO_OP_FINISH:		   /* Clean up function */
 	break;
 
     case XO_OP_FLUSH:		   /* Clean up function */
 	rc = write(1, xbp->xb_bufp, xbp->xb_curp - xbp->xb_bufp);
 	if (rc > 0)
 	    rc = 0;
 
 	xo_buf_reset(xbp);
 	break;
 
     case XO_OP_DESTROY:		   /* Clean up function */
 	csv_destroy(xop, csv);
 	break;
 
     case XO_OP_ATTRIBUTE:	   /* Attribute name/value */
 	break;
 
     case XO_OP_VERSION:		/* Version string */
 	break;
     }
 
     return rc;
 }
 
 /*
  * Callback when our encoder is loaded.
  */
 int
 xo_encoder_library_init (XO_ENCODER_INIT_ARGS)
 {
     arg->xei_handler = csv_handler;
     arg->xei_version = XO_ENCODER_VERSION;
 
     return 0;
 }
Index: projects/clang1000-import/contrib/libxo/libxo/libxo.c
===================================================================
--- projects/clang1000-import/contrib/libxo/libxo/libxo.c	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/libxo/libxo.c	(revision 357179)
@@ -1,8465 +1,8518 @@
 /*
  * Copyright (c) 2014-2019, Juniper Networks, Inc.
  * All rights reserved.
  * This SOFTWARE is licensed under the LICENSE provided in the
  * ../Copyright file. By downloading, installing, copying, or otherwise
  * using the SOFTWARE, you agree to be bound by the terms of that
  * LICENSE.
  * Phil Shafer, July 2014
  *
  * This is the implementation of libxo, the formatting library that
  * generates multiple styles of output from a single code path.
  * Command line utilities can have their normal text output while
  * automation tools can see XML or JSON output, and web tools can use
  * HTML output that encodes the text output annotated with additional
  * information.  Specialized encoders can be built that allow custom
  * encoding including binary ones like CBOR, thrift, protobufs, etc.
  *
  * Full documentation is available in ./doc/libxo.txt or online at:
  *   http://juniper.github.io/libxo/libxo-manual.html
  *
  * For first time readers, the core bits of code to start looking at are:
  * - xo_do_emit() -- parse and emit a set of fields
  * - xo_do_emit_fields -- the central function of the library
  * - xo_do_format_field() -- handles formatting a single field
  * - xo_transiton() -- the state machine that keeps things sane
  * and of course the "xo_handle_t" data structure, which carries all
  * configuration and state.
  */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <unistd.h>
 #include <stddef.h>
 #include <wchar.h>
 #include <locale.h>
 #include <sys/types.h>
 #include <stdarg.h>
 #include <string.h>
 #include <errno.h>
 #include <limits.h>
 #include <ctype.h>
 #include <wctype.h>
 #include <getopt.h>
 
 #include "xo_config.h"
 #include "xo.h"
 #include "xo_encoder.h"
 #include "xo_buf.h"
 #include "xo_explicit.h"
 
 /*
  * We ask wcwidth() to do an impossible job, really.  It's supposed to
  * need to tell us the number of columns consumed to display a unicode
  * character.  It returns that number without any sort of context, but
  * we know they are characters whose glyph differs based on placement
  * (end of word, middle of word, etc) and many that affect characters
  * previously emitted.  Without content, it can't hope to tell us.
  * But it's the only standard tool we've got, so we use it.  We would
  * use wcswidth() but it typically just loops through adding the results
  * of wcwidth() calls in an entirely unhelpful way.
  *
  * Even then, there are many poor implementations (macosx), so we have
  * to carry our own.  We could have configure.ac test this (with
  * something like 'assert(wcwidth(0x200d) == 0)'), but it would have
  * to run a binary, which breaks cross-compilation.  Hmm... I could
  * run this test at init time and make a warning for our dear user.
  *
  * Anyhow, it remains a best-effort sort of thing.  And it's all made
  * more hopeless because we assume the display code doing the rendering is
  * playing by the same rules we are.  If it display 0x200d as a square
  * box or a funky question mark, the output will be hosed.
  */
 #ifdef LIBXO_WCWIDTH
 #include "xo_wcwidth.h"
 #else /* LIBXO_WCWIDTH */
 #define xo_wcwidth(_x) wcwidth(_x)
 #endif /* LIBXO_WCWIDTH */
 
 #ifdef HAVE_STDIO_EXT_H
 #include <stdio_ext.h>
 #endif /* HAVE_STDIO_EXT_H */
 
 /*
  * humanize_number is a great function, unless you don't have it.  So
  * we carry one in our pocket.
  */
 #ifdef HAVE_HUMANIZE_NUMBER
 #include <libutil.h>
 #define xo_humanize_number humanize_number 
 #else /* HAVE_HUMANIZE_NUMBER */
 #include "xo_humanize.h"
 #endif /* HAVE_HUMANIZE_NUMBER */
 
 #ifdef HAVE_GETTEXT
 #include <libintl.h>
 #endif /* HAVE_GETTEXT */
 
 /* Rather lame that we can't count on these... */
 #ifndef FALSE
 #define FALSE 0
 #endif
 #ifndef TRUE
 #define TRUE 1
 #endif
 
 /*
  * Three styles of specifying thread-local variables are supported.
  * configure.ac has the brains to run each possibility through the
  * compiler and see what works; we are left to define the THREAD_LOCAL
  * macro to the right value.  Most toolchains (clang, gcc) use
  * "before", but some (borland) use "after" and I've heard of some
  * (ms) that use __declspec.  Any others out there?
  */
 #define THREAD_LOCAL_before 1
 #define THREAD_LOCAL_after 2
 #define THREAD_LOCAL_declspec 3
 
 #ifndef HAVE_THREAD_LOCAL
 #define THREAD_LOCAL(_x) _x
 #elif HAVE_THREAD_LOCAL == THREAD_LOCAL_before
 #define THREAD_LOCAL(_x) __thread _x
 #elif HAVE_THREAD_LOCAL == THREAD_LOCAL_after
 #define THREAD_LOCAL(_x) _x __thread
 #elif HAVE_THREAD_LOCAL == THREAD_LOCAL_declspec
 #define THREAD_LOCAL(_x) __declspec(_x)
 #else
 #error unknown thread-local setting
 #endif /* HAVE_THREADS_H */
 
 const char xo_version[] = LIBXO_VERSION;
 const char xo_version_extra[] = LIBXO_VERSION_EXTRA;
 static const char xo_default_format[] = "%s";
 
 #ifndef UNUSED
 #define UNUSED __attribute__ ((__unused__))
 #endif /* UNUSED */
 
 #define XO_INDENT_BY 2	/* Amount to indent when pretty printing */
 #define XO_DEPTH	128	 /* Default stack depth */
 #define XO_MAX_ANCHOR_WIDTH (8*1024) /* Anything wider is just silly */
 
 #define XO_FAILURE_NAME	"failure"
 
 /* Flags for the stack frame */
 typedef unsigned xo_xsf_flags_t; /* XSF_* flags */
 #define XSF_NOT_FIRST	(1<<0)	/* Not the first element */
 #define XSF_LIST	(1<<1)	/* Frame is a list */
 #define XSF_INSTANCE	(1<<2)	/* Frame is an instance */
 #define XSF_DTRT	(1<<3)	/* Save the name for DTRT mode */
 
 #define XSF_CONTENT	(1<<4)	/* Some content has been emitted */
 #define XSF_EMIT	(1<<5)	/* Some field has been emitted */
 #define XSF_EMIT_KEY	(1<<6)	/* A key has been emitted */
 #define XSF_EMIT_LEAF_LIST (1<<7) /* A leaf-list field has been emitted */
 
 /* These are the flags we propagate between markers and their parents */
 #define XSF_MARKER_FLAGS \
  (XSF_NOT_FIRST | XSF_CONTENT | XSF_EMIT | XSF_EMIT_KEY | XSF_EMIT_LEAF_LIST )
 
 /*
  * Turn the transition between two states into a number suitable for
  * a "switch" statement.
  */
 #define XSS_TRANSITION(_old, _new) ((_old) << 8 | (_new))
 
 /*
  * xo_stack_t: As we open and close containers and levels, we
  * create a stack of frames to track them.  This is needed for
  * XOF_WARN and XOF_XPATH.
  */
 typedef struct xo_stack_s {
     xo_xsf_flags_t xs_flags;	/* Flags for this frame */
     xo_state_t xs_state;	/* State for this stack frame */
     char *xs_name;		/* Name (for XPath value) */
     char *xs_keys;		/* XPath predicate for any key fields */
 } xo_stack_t;
 
 /*
  * libxo supports colors and effects, for those who like them.
  * XO_COL_* ("colors") refers to fancy ansi codes, while X__EFF_*
  * ("effects") are bits since we need to maintain state.
  */
 typedef uint8_t xo_color_t;
 #define XO_COL_DEFAULT		0
 #define XO_COL_BLACK		1
 #define XO_COL_RED		2
 #define XO_COL_GREEN		3
 #define XO_COL_YELLOW		4
 #define XO_COL_BLUE		5
 #define XO_COL_MAGENTA		6
 #define XO_COL_CYAN		7
 #define XO_COL_WHITE		8
 
 #define XO_NUM_COLORS		9
 
 /*
  * Yes, there's no blink.  We're civilized.  We like users.  Blink
  * isn't something one does to someone you like.  Friends don't let
  * friends use blink.  On friends.  You know what I mean.  Blink is
  * like, well, it's like bursting into show tunes at a funeral.  It's
  * just not done.  Not something anyone wants.  And on those rare
  * instances where it might actually be appropriate, it's still wrong,
  * since it's likely done by the wrong person for the wrong reason.
  * Just like blink.  And if I implemented blink, I'd be like a funeral
  * director who adds "Would you like us to burst into show tunes?" on
  * the list of questions asked while making funeral arrangements.
  * It's formalizing wrongness in the wrong way.  And we're just too
  * civilized to do that.  Hhhmph!
  */
 #define XO_EFF_RESET		(1<<0)
 #define XO_EFF_NORMAL		(1<<1)
 #define XO_EFF_BOLD		(1<<2)
 #define XO_EFF_UNDERLINE	(1<<3)
 #define XO_EFF_INVERSE		(1<<4)
 
 #define XO_EFF_CLEAR_BITS XO_EFF_RESET /* Reset gets reset, surprisingly */
 
 typedef uint8_t xo_effect_t;
 typedef struct xo_colors_s {
     xo_effect_t xoc_effects;	/* Current effect set */
     xo_color_t xoc_col_fg;	/* Foreground color */
     xo_color_t xoc_col_bg;	/* Background color */
 } xo_colors_t;
 
 /*
  * xo_handle_t: this is the principle data structure for libxo.
  * It's used as a store for state, options, content, and all manor
  * of other information.
  */
 struct xo_handle_s {
     xo_xof_flags_t xo_flags;	/* Flags (XOF_*) from the user*/
     xo_xof_flags_t xo_iflags;	/* Internal flags (XOIF_*) */
     xo_style_t xo_style;	/* XO_STYLE_* value */
     unsigned short xo_indent;	/* Indent level (if pretty) */
     unsigned short xo_indent_by; /* Indent amount (tab stop) */
     xo_write_func_t xo_write;	/* Write callback */
     xo_close_func_t xo_close;	/* Close callback */
     xo_flush_func_t xo_flush;	/* Flush callback */
     xo_formatter_t xo_formatter; /* Custom formating function */
     xo_checkpointer_t xo_checkpointer; /* Custom formating support function */
     void *xo_opaque;		/* Opaque data for write function */
     xo_buffer_t xo_data;	/* Output data */
     xo_buffer_t xo_fmt;	   	/* Work area for building format strings */
     xo_buffer_t xo_attrs;	/* Work area for building XML attributes */
     xo_buffer_t xo_predicate;	/* Work area for building XPath predicates */
     xo_stack_t *xo_stack;	/* Stack pointer */
     int xo_depth;		/* Depth of stack */
     int xo_stack_size;		/* Size of the stack */
     xo_info_t *xo_info;		/* Info fields for all elements */
     int xo_info_count;		/* Number of info entries */
     va_list xo_vap;		/* Variable arguments (stdargs) */
     char *xo_leading_xpath;	/* A leading XPath expression */
     mbstate_t xo_mbstate;	/* Multi-byte character conversion state */
     ssize_t xo_anchor_offset;	/* Start of anchored text */
     ssize_t xo_anchor_columns;	/* Number of columns since the start anchor */
     ssize_t xo_anchor_min_width; /* Desired width of anchored text */
     ssize_t xo_units_offset;	/* Start of units insertion point */
     ssize_t xo_columns;	/* Columns emitted during this xo_emit call */
 #ifndef LIBXO_TEXT_ONLY
     xo_color_t xo_color_map_fg[XO_NUM_COLORS]; /* Foreground color mappings */
     xo_color_t xo_color_map_bg[XO_NUM_COLORS]; /* Background color mappings */
 #endif /* LIBXO_TEXT_ONLY */
     xo_colors_t xo_colors;	/* Current color and effect values */
     xo_buffer_t xo_color_buf;	/* HTML: buffer of colors and effects */
     char *xo_version;		/* Version string */
     int xo_errno;		/* Saved errno for "%m" */
     char *xo_gt_domain;		/* Gettext domain, suitable for dgettext(3) */
     xo_encoder_func_t xo_encoder; /* Encoding function */
     void *xo_private;		/* Private data for external encoders */
 };
 
 /* Flag operations */
 #define XOF_BIT_ISSET(_flag, _bit)	(((_flag) & (_bit)) ? 1 : 0)
 #define XOF_BIT_SET(_flag, _bit)	do { (_flag) |= (_bit); } while (0)
 #define XOF_BIT_CLEAR(_flag, _bit)	do { (_flag) &= ~(_bit); } while (0)
 
 #define XOF_ISSET(_xop, _bit) XOF_BIT_ISSET(_xop->xo_flags, _bit)
 #define XOF_SET(_xop, _bit) XOF_BIT_SET(_xop->xo_flags, _bit)
 #define XOF_CLEAR(_xop, _bit) XOF_BIT_CLEAR(_xop->xo_flags, _bit)
 
 #define XOIF_ISSET(_xop, _bit) XOF_BIT_ISSET(_xop->xo_iflags, _bit)
 #define XOIF_SET(_xop, _bit) XOF_BIT_SET(_xop->xo_iflags, _bit)
 #define XOIF_CLEAR(_xop, _bit) XOF_BIT_CLEAR(_xop->xo_iflags, _bit)
 
 /* Internal flags */
 #define XOIF_REORDER	XOF_BIT(0) /* Reordering fields; record field info */
 #define XOIF_DIV_OPEN	XOF_BIT(1) /* A <div> is open */
 #define XOIF_TOP_EMITTED XOF_BIT(2) /* The top JSON braces have been emitted */
 #define XOIF_ANCHOR	XOF_BIT(3) /* An anchor is in place  */
 
 #define XOIF_UNITS_PENDING XOF_BIT(4) /* We have a units-insertion pending */
 #define XOIF_INIT_IN_PROGRESS XOF_BIT(5) /* Init of handle is in progress */
 #define XOIF_MADE_OUTPUT XOF_BIT(6)	 /* Have already made output */
 
 /*
  * Normal printf has width and precision, which for strings operate as
  * min and max number of columns.  But this depends on the idea that
  * one byte means one column, which UTF-8 and multi-byte characters
  * pitches on its ear.  It may take 40 bytes of data to populate 14
  * columns, but we can't go off looking at 40 bytes of data without the
  * caller's permission for fear/knowledge that we'll generate core files.
  * 
  * So we make three values, distinguishing between "max column" and
  * "number of bytes that we will inspect inspect safely" We call the
  * later "size", and make the format "%[[<min>].[[<size>].<max>]]s".
  *
  * Under the "first do no harm" theory, we default "max" to "size".
  * This is a reasonable assumption for folks that don't grok the
  * MBS/WCS/UTF-8 world, and while it will be annoying, it will never
  * be evil.
  *
  * For example, xo_emit("{:tag/%-14.14s}", buf) will make 14
  * columns of output, but will never look at more than 14 bytes of the
  * input buffer.  This is mostly compatible with printf and caller's
  * expectations.
  *
  * In contrast xo_emit("{:tag/%-14..14s}", buf) will look at however
  * many bytes (or until a NUL is seen) are needed to fill 14 columns
  * of output.  xo_emit("{:tag/%-14.*.14s}", xx, buf) will look at up
  * to xx bytes (or until a NUL is seen) in order to fill 14 columns
  * of output.
  *
  * It's fairly amazing how a good idea (handle all languages of the
  * world) blows such a big hole in the bottom of the fairly weak boat
  * that is C string handling.  The simplicity and completenesss are
  * sunk in ways we haven't even begun to understand.
  */
 #define XF_WIDTH_MIN	0	/* Minimal width */
 #define XF_WIDTH_SIZE	1	/* Maximum number of bytes to examine */
 #define XF_WIDTH_MAX	2	/* Maximum width */
 #define XF_WIDTH_NUM	3	/* Numeric fields in printf (min.size.max) */
 
 /* Input and output string encodings */
 #define XF_ENC_WIDE	1	/* Wide characters (wchar_t) */
 #define XF_ENC_UTF8	2	/* UTF-8 */
 #define XF_ENC_LOCALE	3	/* Current locale */
 
 /*
  * A place to parse printf-style format flags for each field
  */
 typedef struct xo_format_s {
     unsigned char xf_fc;	/* Format character */
     unsigned char xf_enc;	/* Encoding of the string (XF_ENC_*) */
     unsigned char xf_skip;	/* Skip this field */
     unsigned char xf_lflag;	/* 'l' (long) */
     unsigned char xf_hflag;;	/* 'h' (half) */
     unsigned char xf_jflag;	/* 'j' (intmax_t) */
     unsigned char xf_tflag;	/* 't' (ptrdiff_t) */
     unsigned char xf_zflag;	/* 'z' (size_t) */
     unsigned char xf_qflag;	/* 'q' (quad_t) */
     unsigned char xf_seen_minus; /* Seen a minus */
     int xf_leading_zero;	/* Seen a leading zero (zero fill)  */
     unsigned xf_dots;		/* Seen one or more '.'s */
     int xf_width[XF_WIDTH_NUM]; /* Width/precision/size numeric fields */
     unsigned xf_stars;		/* Seen one or more '*'s */
     unsigned char xf_star[XF_WIDTH_NUM]; /* Seen one or more '*'s */
 } xo_format_t;
 
 /*
  * This structure represents the parsed field information, suitable for
  * processing by xo_do_emit and anything else that needs to parse fields.
  * Note that all pointers point to the main format string.
  *
  * XXX This is a first step toward compilable or cachable format
  * strings.  We can also cache the results of dgettext when no format
  * is used, assuming the 'p' modifier has _not_ been set.
  */
 typedef struct xo_field_info_s {
     xo_xff_flags_t xfi_flags;	/* Flags for this field */
     unsigned xfi_ftype;		/* Field type, as character (e.g. 'V') */
     const char *xfi_start;   /* Start of field in the format string */
     const char *xfi_content;	/* Field's content */
     const char *xfi_format;	/* Field's Format */
     const char *xfi_encoding;	/* Field's encoding format */
     const char *xfi_next;	/* Next character in format string */
     ssize_t xfi_len;		/* Length of field */
     ssize_t xfi_clen;		/* Content length */
     ssize_t xfi_flen;		/* Format length */
     ssize_t xfi_elen;		/* Encoding length */
     unsigned xfi_fnum;		/* Field number (if used; 0 otherwise) */
     unsigned xfi_renum;		/* Reordered number (0 == no renumbering) */
 } xo_field_info_t;
 
 /*
  * We keep a 'default' handle to allow callers to avoid having to
  * allocate one.  Passing NULL to any of our functions will use
  * this default handle.  Most functions have a variant that doesn't
  * require a handle at all, since most output is to stdout, which
  * the default handle handles handily.
  */
 static THREAD_LOCAL(xo_handle_t) xo_default_handle;
 static THREAD_LOCAL(int) xo_default_inited;
 static int xo_locale_inited;
 static const char *xo_program;
 
 /*
  * To allow libxo to be used in diverse environment, we allow the
  * caller to give callbacks for memory allocation.
  */
 xo_realloc_func_t xo_realloc = realloc;
 xo_free_func_t xo_free = free;
 
 /* Forward declarations */
 static ssize_t
 xo_transition (xo_handle_t *xop, xo_xof_flags_t flags, const char *name,
 	       xo_state_t new_state);
 
 static int
 xo_set_options_simple (xo_handle_t *xop, const char *input);
 
 static int
 xo_color_find (const char *str);
 
 static void
 xo_buf_append_div (xo_handle_t *xop, const char *class, xo_xff_flags_t flags,
 		   const char *name, ssize_t nlen,
 		   const char *value, ssize_t vlen,
 		   const char *fmt, ssize_t flen,
 		   const char *encoding, ssize_t elen);
 
 static void
 xo_anchor_clear (xo_handle_t *xop);
 
 /*
  * xo_style is used to retrieve the current style.  When we're built
  * for "text only" mode, we use this function to drive the removal
  * of most of the code in libxo.  We return a constant and the compiler
  * happily removes the non-text code that is not longer executed.  This
  * trims our code nicely without needing to trampel perfectly readable
  * code with ifdefs.
  */
 static inline xo_style_t
 xo_style (xo_handle_t *xop UNUSED)
 {
 #ifdef LIBXO_TEXT_ONLY
     return XO_STYLE_TEXT;
 #else /* LIBXO_TEXT_ONLY */
     return xop->xo_style;
 #endif /* LIBXO_TEXT_ONLY */
 }
 
 /*
  * Allow the compiler to optimize out non-text-only code while
  * still compiling it.
  */
 static inline int
 xo_text_only (void)
 {
 #ifdef LIBXO_TEXT_ONLY
     return TRUE;
 #else /* LIBXO_TEXT_ONLY */
     return FALSE;
 #endif /* LIBXO_TEXT_ONLY */
 }
 
 /*
  * Callback to write data to a FILE pointer
  */
 static xo_ssize_t
 xo_write_to_file (void *opaque, const char *data)
 {
     FILE *fp = (FILE *) opaque;
 
     return fprintf(fp, "%s", data);
 }
 
 /*
  * Callback to close a file
  */
 static void
 xo_close_file (void *opaque)
 {
     FILE *fp = (FILE *) opaque;
 
     fclose(fp);
 }
 
 /*
  * Callback to flush a FILE pointer
  */
 static int
 xo_flush_file (void *opaque)
 {
     FILE *fp = (FILE *) opaque;
 
     return fflush(fp);
 }
 
 /*
  * Use a rotating stock of buffers to make a printable string
  */
 #define XO_NUMBUFS 8
 #define XO_SMBUFSZ 128
 
 static const char *
 xo_printable (const char *str)
 {
     static THREAD_LOCAL(char) bufset[XO_NUMBUFS][XO_SMBUFSZ];
     static THREAD_LOCAL(int) bufnum = 0;
 
     if (str == NULL)
 	return "";
 
     if (++bufnum == XO_NUMBUFS)
 	bufnum = 0;
 
     char *res = bufset[bufnum], *cp, *ep;
 
     for (cp = res, ep = res + XO_SMBUFSZ - 1; *str && cp < ep; cp++, str++) {
 	if (*str == '\n') {
 	    *cp++ = '\\';
 	    *cp = 'n';
 	} else if (*str == '\r') {
 	    *cp++ = '\\';
 	    *cp = 'r';
 	} else if (*str == '\"') {
 	    *cp++ = '\\';
 	    *cp = '"';
 	} else 
 	    *cp = *str;
     }
 
     *cp = '\0';
     return res;
 }
 
 static int
 xo_depth_check (xo_handle_t *xop, int depth)
 {
     xo_stack_t *xsp;
 
     if (depth >= xop->xo_stack_size) {
 	depth += XO_DEPTH;	/* Extra room */
 
 	xsp = xo_realloc(xop->xo_stack, sizeof(xop->xo_stack[0]) * depth);
 	if (xsp == NULL) {
 	    xo_failure(xop, "xo_depth_check: out of memory (%d)", depth);
 	    return -1;
 	}
 
 	int count = depth - xop->xo_stack_size;
 
 	bzero(xsp + xop->xo_stack_size, count * sizeof(*xsp));
 	xop->xo_stack_size = depth;
 	xop->xo_stack = xsp;
     }
 
     return 0;
 }
 
 void
 xo_no_setlocale (void)
 {
     xo_locale_inited = 1;	/* Skip initialization */
 }
 
 /*
  * For XML, the first character of a tag cannot be numeric, but people
  * will likely not notice.  So we people-proof them by forcing a leading
  * underscore if they use invalid tags.  Note that this doesn't cover
  * all broken tags, just this fairly specific case.
  */
 static const char *
 xo_xml_leader_len (xo_handle_t *xop, const char *name, xo_ssize_t nlen)
 {
     if (name == NULL || isalpha(name[0]) || name[0] == '_')
         return "";
 
     xo_failure(xop, "invalid XML tag name: '%.*s'", nlen, name);
     return "_";
 }
 
 static const char *
 xo_xml_leader (xo_handle_t *xop, const char *name)
 {
     return xo_xml_leader_len(xop, name, strlen(name));
 }
 
 /*
  * We need to decide if stdout is line buffered (_IOLBF).  Lacking a
  * standard way to decide this (e.g. getlinebuf()), we have configure
  * look to find __flbf, which glibc supported.  If not, we'll rely on
  * isatty, with the assumption that terminals are the only thing
  * that's line buffered.  We _could_ test for "steam._flags & _IOLBF",
  * which is all __flbf does, but that's even tackier.  Like a
  * bedazzled Elvis outfit on an ugly lap dog sort of tacky.  Not
  * something we're willing to do.
  */
 static int
 xo_is_line_buffered (FILE *stream)
 {
 #if HAVE___FLBF
     if (__flbf(stream))
 	return 1;
 #else /* HAVE___FLBF */
     if (isatty(fileno(stream)))
 	return 1;
 #endif /* HAVE___FLBF */
     return 0;
 }
 
 /*
  * Initialize an xo_handle_t, using both static defaults and
  * the global settings from the LIBXO_OPTIONS environment
  * variable.
  */
 static void
 xo_init_handle (xo_handle_t *xop)
 {
     xop->xo_opaque = stdout;
     xop->xo_write = xo_write_to_file;
     xop->xo_flush = xo_flush_file;
 
     if (xo_is_line_buffered(stdout))
 	XOF_SET(xop, XOF_FLUSH_LINE);
 
     /*
      * We need to initialize the locale, which isn't really pretty.
      * Libraries should depend on their caller to set up the
      * environment.  But we really can't count on the caller to do
      * this, because well, they won't.  Trust me.
      */
     if (!xo_locale_inited) {
 	xo_locale_inited = 1;	/* Only do this once */
 
 #ifdef __FreeBSD__		/* Who does The Right Thing */
 	const char *cp = "";
 #else /* __FreeBSD__ */
 	const char *cp = getenv("LC_ALL");
 	if (cp == NULL)
 	    cp = getenv("LC_CTYPE");
 	if (cp == NULL)
 	    cp = getenv("LANG");
 	if (cp == NULL)
 	    cp = "C";		/* Default for C programs */
 #endif /* __FreeBSD__ */
 
 	(void) setlocale(LC_CTYPE, cp);
     }
 
     /*
      * Initialize only the xo_buffers we know we'll need; the others
      * can be allocated as needed.
      */
     xo_buf_init(&xop->xo_data);
     xo_buf_init(&xop->xo_fmt);
 
     if (XOIF_ISSET(xop, XOIF_INIT_IN_PROGRESS))
 	return;
     XOIF_SET(xop, XOIF_INIT_IN_PROGRESS);
 
     xop->xo_indent_by = XO_INDENT_BY;
     xo_depth_check(xop, XO_DEPTH);
 
     XOIF_CLEAR(xop, XOIF_INIT_IN_PROGRESS);
 }
 
 /*
  * Initialize the default handle.
  */
 static void
 xo_default_init (void)
 {
     xo_handle_t *xop = &xo_default_handle;
 
     xo_init_handle(xop);
 
 #if !defined(NO_LIBXO_OPTIONS)
     if (!XOF_ISSET(xop, XOF_NO_ENV)) {
        char *env = getenv("LIBXO_OPTIONS");
 
        if (env)
            xo_set_options_simple(xop, env);
 
     }
 #endif /* NO_LIBXO_OPTIONS */
 
     xo_default_inited = 1;
 }
 
 /*
  * Cheap convenience function to return either the argument, or
  * the internal handle, after it has been initialized.  The usage
  * is:
  *    xop = xo_default(xop);
  */
 static xo_handle_t *
 xo_default (xo_handle_t *xop)
 {
     if (xop == NULL) {
 	if (xo_default_inited == 0)
 	    xo_default_init();
 	xop = &xo_default_handle;
     }
 
     return xop;
 }
 
 /*
  * Return the number of spaces we should be indenting.  If
  * we are pretty-printing, this is indent * indent_by.
  */
 static int
 xo_indent (xo_handle_t *xop)
 {
     int rc = 0;
 
     xop = xo_default(xop);
 
     if (XOF_ISSET(xop, XOF_PRETTY)) {
 	rc = xop->xo_indent * xop->xo_indent_by;
 	if (XOIF_ISSET(xop, XOIF_TOP_EMITTED))
 	    rc += xop->xo_indent_by;
     }
 
     return (rc > 0) ? rc : 0;
 }
 
 static void
 xo_buf_indent (xo_handle_t *xop, int indent)
 {
     xo_buffer_t *xbp = &xop->xo_data;
 
     if (indent <= 0)
 	indent = xo_indent(xop);
 
     if (!xo_buf_has_room(xbp, indent))
 	return;
 
     memset(xbp->xb_curp, ' ', indent);
     xbp->xb_curp += indent;
 }
 
 static char xo_xml_amp[] = "&amp;";
 static char xo_xml_lt[] = "&lt;";
 static char xo_xml_gt[] = "&gt;";
 static char xo_xml_quot[] = "&quot;";
 
 static ssize_t
 xo_escape_xml (xo_buffer_t *xbp, ssize_t len, xo_xff_flags_t flags)
 {
     ssize_t slen;
     ssize_t delta = 0;
     char *cp, *ep, *ip;
     const char *sp;
     int attr = XOF_BIT_ISSET(flags, XFF_ATTR);
 
     for (cp = xbp->xb_curp, ep = cp + len; cp < ep; cp++) {
 	/* We're subtracting 2: 1 for the NUL, 1 for the char we replace */
 	if (*cp == '<')
 	    delta += sizeof(xo_xml_lt) - 2;
 	else if (*cp == '>')
 	    delta += sizeof(xo_xml_gt) - 2;
 	else if (*cp == '&')
 	    delta += sizeof(xo_xml_amp) - 2;
 	else if (attr && *cp == '"')
 	    delta += sizeof(xo_xml_quot) - 2;
     }
 
     if (delta == 0)		/* Nothing to escape; bail */
 	return len;
 
     if (!xo_buf_has_room(xbp, delta)) /* No room; bail, but don't append */
 	return 0;
 
     ep = xbp->xb_curp;
     cp = ep + len;
     ip = cp + delta;
     do {
 	cp -= 1;
 	ip -= 1;
 
 	if (*cp == '<')
 	    sp = xo_xml_lt;
 	else if (*cp == '>')
 	    sp = xo_xml_gt;
 	else if (*cp == '&')
 	    sp = xo_xml_amp;
 	else if (attr && *cp == '"')
 	    sp = xo_xml_quot;
 	else {
 	    *ip = *cp;
 	    continue;
 	}
 
 	slen = strlen(sp);
 	ip -= slen - 1;
 	memcpy(ip, sp, slen);
 	
     } while (cp > ep && cp != ip);
 
     return len + delta;
 }
 
 static ssize_t
 xo_escape_json (xo_buffer_t *xbp, ssize_t len, xo_xff_flags_t flags UNUSED)
 {
     ssize_t delta = 0;
     char *cp, *ep, *ip;
 
     for (cp = xbp->xb_curp, ep = cp + len; cp < ep; cp++) {
 	if (*cp == '\\' || *cp == '"')
 	    delta += 1;
 	else if (*cp == '\n' || *cp == '\r')
 	    delta += 1;
     }
 
     if (delta == 0)		/* Nothing to escape; bail */
 	return len;
 
     if (!xo_buf_has_room(xbp, delta)) /* No room; bail, but don't append */
 	return 0;
 
     ep = xbp->xb_curp;
     cp = ep + len;
     ip = cp + delta;
     do {
 	cp -= 1;
 	ip -= 1;
 
 	if (*cp == '\\' || *cp == '"') {
 	    *ip-- = *cp;
 	    *ip = '\\';
 	} else if (*cp == '\n') {
 	    *ip-- = 'n';
 	    *ip = '\\';
 	} else if (*cp == '\r') {
 	    *ip-- = 'r';
 	    *ip = '\\';
 	} else {
 	    *ip = *cp;
 	}
 	
     } while (cp > ep && cp != ip);
 
     return len + delta;
 }
 
 /*
  * PARAM-VALUE     = UTF-8-STRING ; characters '"', '\' and
  *                                ; ']' MUST be escaped.
  */
 static ssize_t
 xo_escape_sdparams (xo_buffer_t *xbp, ssize_t len, xo_xff_flags_t flags UNUSED)
 {
     ssize_t delta = 0;
     char *cp, *ep, *ip;
 
     for (cp = xbp->xb_curp, ep = cp + len; cp < ep; cp++) {
 	if (*cp == '\\' || *cp == '"' || *cp == ']')
 	    delta += 1;
     }
 
     if (delta == 0)		/* Nothing to escape; bail */
 	return len;
 
     if (!xo_buf_has_room(xbp, delta)) /* No room; bail, but don't append */
 	return 0;
 
     ep = xbp->xb_curp;
     cp = ep + len;
     ip = cp + delta;
     do {
 	cp -= 1;
 	ip -= 1;
 
 	if (*cp == '\\' || *cp == '"' || *cp == ']') {
 	    *ip-- = *cp;
 	    *ip = '\\';
 	} else {
 	    *ip = *cp;
 	}
 	
     } while (cp > ep && cp != ip);
 
     return len + delta;
 }
 
 static void
 xo_buf_escape (xo_handle_t *xop, xo_buffer_t *xbp,
 	       const char *str, ssize_t len, xo_xff_flags_t flags)
 {
     if (!xo_buf_has_room(xbp, len))
 	return;
 
     memcpy(xbp->xb_curp, str, len);
 
     switch (xo_style(xop)) {
     case XO_STYLE_XML:
     case XO_STYLE_HTML:
 	len = xo_escape_xml(xbp, len, flags);
 	break;
 
     case XO_STYLE_JSON:
 	len = xo_escape_json(xbp, len, flags);
 	break;
 
     case XO_STYLE_SDPARAMS:
 	len = xo_escape_sdparams(xbp, len, flags);
 	break;
     }
 
     xbp->xb_curp += len;
 }
 
 /*
  * Write the current contents of the data buffer using the handle's
  * xo_write function.
  */
 static ssize_t
 xo_write (xo_handle_t *xop)
 {
     ssize_t rc = 0;
     xo_buffer_t *xbp = &xop->xo_data;
 
     if (xbp->xb_curp != xbp->xb_bufp) {
 	xo_buf_append(xbp, "", 1); /* Append ending NUL */
 	xo_anchor_clear(xop);
 	if (xop->xo_write)
 	    rc = xop->xo_write(xop->xo_opaque, xbp->xb_bufp);
 	xbp->xb_curp = xbp->xb_bufp;
     }
 
     /* Turn off the flags that don't survive across writes */
     XOIF_CLEAR(xop, XOIF_UNITS_PENDING);
 
     return rc;
 }
 
 /*
  * Format arguments into our buffer.  If a custom formatter has been set,
  * we use that to do the work; otherwise we vsnprintf().
  */
 static ssize_t
 xo_vsnprintf (xo_handle_t *xop, xo_buffer_t *xbp, const char *fmt, va_list vap)
 {
     va_list va_local;
     ssize_t rc;
     ssize_t left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
 
     va_copy(va_local, vap);
 
     if (xop->xo_formatter)
 	rc = xop->xo_formatter(xop, xbp->xb_curp, left, fmt, va_local);
     else
 	rc = vsnprintf(xbp->xb_curp, left, fmt, va_local);
 
     if (rc >= left) {
 	if (!xo_buf_has_room(xbp, rc)) {
 	    va_end(va_local);
 	    return -1;
 	}
 
 	/*
 	 * After we call vsnprintf(), the stage of vap is not defined.
 	 * We need to copy it before we pass.  Then we have to do our
 	 * own logic below to move it along.  This is because the
 	 * implementation can have va_list be a pointer (bsd) or a
 	 * structure (macosx) or anything in between.
 	 */
 
 	va_end(va_local);	/* Reset vap to the start */
 	va_copy(va_local, vap);
 
 	left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
 	if (xop->xo_formatter)
 	    rc = xop->xo_formatter(xop, xbp->xb_curp, left, fmt, va_local);
 	else
 	    rc = vsnprintf(xbp->xb_curp, left, fmt, va_local);
     }
     va_end(va_local);
 
     return rc;
 }
 
 /*
  * Print some data through the handle.
  */
 static ssize_t
 xo_printf_v (xo_handle_t *xop, const char *fmt, va_list vap)
 {
     xo_buffer_t *xbp = &xop->xo_data;
     ssize_t left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
     ssize_t rc;
     va_list va_local;
 
     va_copy(va_local, vap);
 
     rc = vsnprintf(xbp->xb_curp, left, fmt, va_local);
 
     if (rc >= left) {
 	if (!xo_buf_has_room(xbp, rc)) {
 	    va_end(va_local);
 	    return -1;
 	}
 
 	va_end(va_local);	/* Reset vap to the start */
 	va_copy(va_local, vap);
 
 	left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
 	rc = vsnprintf(xbp->xb_curp, left, fmt, va_local);
     }
 
     va_end(va_local);
 
     if (rc > 0)
 	xbp->xb_curp += rc;
 
     return rc;
 }
 
 static ssize_t
 xo_printf (xo_handle_t *xop, const char *fmt, ...)
 {
     ssize_t rc;
     va_list vap;
 
     va_start(vap, fmt);
 
     rc = xo_printf_v(xop, fmt, vap);
 
     va_end(vap);
     return rc;
 }
 
 /*
  * These next few function are make The Essential UTF-8 Ginsu Knife.
  * Identify an input and output character, and convert it.
  */
 static uint8_t xo_utf8_data_bits[5] = { 0, 0x7f, 0x1f, 0x0f, 0x07 };
 static uint8_t xo_utf8_len_bits[5]  = { 0, 0x00, 0xc0, 0xe0, 0xf0 };
 
 /*
  * If the byte has a high-bit set, it's UTF-8, not ASCII.
  */
 static int
 xo_is_utf8 (char ch)
 {
     return (ch & 0x80);
 }
 
 /*
  * Look at the high bits of the first byte to determine the length
  * of the UTF-8 character.
  */
 static inline ssize_t
 xo_utf8_to_wc_len (const char *buf)
 {
     uint8_t bval = (uint8_t) *buf;
     ssize_t len;
 
     if ((bval & 0x80) == 0x0)
 	len = 1;
     else if ((bval & 0xe0) == 0xc0)
 	len = 2;
     else if ((bval & 0xf0) == 0xe0)
 	len = 3;
     else if ((bval & 0xf8) == 0xf0)
 	len = 4;
     else
 	len = -1;
 
     return len;
 }
 
 static ssize_t
 xo_buf_utf8_len (xo_handle_t *xop, const char *buf, ssize_t bufsiz)
 {
     unsigned b = (unsigned char) *buf;
     ssize_t len, i;
 
     len = xo_utf8_to_wc_len(buf);
     if (len < 0) {
         xo_failure(xop, "invalid UTF-8 data: %02hhx", b);
 	return -1;
     }
 
     if (len > bufsiz) {
         xo_failure(xop, "invalid UTF-8 data (short): %02hhx (%d/%d)",
 		   b, len, bufsiz);
 	return -1;
     }
 
     for (i = 2; i < len; i++) {
 	b = (unsigned char ) buf[i];
 	if ((b & 0xc0) != 0x80) {
 	    xo_failure(xop, "invalid UTF-8 data (byte %d): %x", i, b);
 	    return -1;
 	}
     }
 
     return len;
 }
 
 /*
  * Build a wide character from the input buffer; the number of
  * bits we pull off the first character is dependent on the length,
  * but we put 6 bits off all other bytes.
  */
 static inline wchar_t
 xo_utf8_char (const char *buf, ssize_t len)
 {
     /* Most common case: singleton byte */
     if (len == 1)
 	return (unsigned char) buf[0];
 
     ssize_t i;
     wchar_t wc;
     const unsigned char *cp = (const unsigned char *) buf;
 
     wc = *cp & xo_utf8_data_bits[len];
     for (i = 1; i < len; i++) {
 	wc <<= 6;		/* Low six bits have data */
 	wc |= cp[i] & 0x3f;
 	if ((cp[i] & 0xc0) != 0x80)
 	    return (wchar_t) -1;
     }
 
     return wc;
 }
 
 /*
  * Determine the number of bytes needed to encode a wide character.
  */
 static ssize_t
 xo_utf8_emit_len (wchar_t wc)
 {
     ssize_t len;
 
     if ((wc & ((1 << 7) - 1)) == wc) /* Simple case */
 	len = 1;
     else if ((wc & ((1 << 11) - 1)) == wc)
 	len = 2;
     else if ((wc & ((1 << 16) - 1)) == wc)
 	len = 3;
     else if ((wc & ((1 << 21) - 1)) == wc)
 	len = 4;
     else
 	len = -1;		/* Invalid */
 
     return len;
 }
 
 /*
  * Emit one wide character into the given buffer
  */
 static void
 xo_utf8_emit_char (char *buf, ssize_t len, wchar_t wc)
 {
     ssize_t i;
 
     if (len == 1) { /* Simple case */
 	buf[0] = wc & 0x7f;
 	return;
     }
 
     /* Start with the low bits and insert them, six bits at a time */
     for (i = len - 1; i >= 0; i--) {
 	buf[i] = 0x80 | (wc & 0x3f);
 	wc >>= 6;		/* Drop the low six bits */
     }
 
     /* Finish off the first byte with the length bits */
     buf[0] &= xo_utf8_data_bits[len]; /* Clear out the length bits */
     buf[0] |= xo_utf8_len_bits[len]; /* Drop in new length bits */
 }
 
 /*
  * Append a single UTF-8 character to a buffer, converting it to locale
  * encoding.  Returns the number of columns consumed by that character,
  * as best we can determine it.
  */
 static ssize_t
 xo_buf_append_locale_from_utf8 (xo_handle_t *xop, xo_buffer_t *xbp,
 				const char *ibuf, ssize_t ilen)
 {
     wchar_t wc;
     ssize_t len;
 
     /*
      * Build our wide character from the input buffer; the number of
      * bits we pull off the first character is dependent on the length,
      * but we put 6 bits off all other bytes.
      */
     wc = xo_utf8_char(ibuf, ilen);
     if (wc == (wchar_t) -1) {
 	xo_failure(xop, "invalid UTF-8 byte sequence");
 	return 0;
     }
 
     if (XOF_ISSET(xop, XOF_NO_LOCALE)) {
 	if (!xo_buf_has_room(xbp, ilen))
 	    return 0;
 
 	memcpy(xbp->xb_curp, ibuf, ilen);
 	xbp->xb_curp += ilen;
 
     } else {
 	if (!xo_buf_has_room(xbp, MB_LEN_MAX + 1))
 	    return 0;
 
 	bzero(&xop->xo_mbstate, sizeof(xop->xo_mbstate));
 	len = wcrtomb(xbp->xb_curp, wc, &xop->xo_mbstate);
 
 	if (len <= 0) {
 	    xo_failure(xop, "could not convert wide char: %lx",
 		       (unsigned long) wc);
 	    return 0;
 	}
 	xbp->xb_curp += len;
     }
 
     return xo_wcwidth(wc);
 }
 
 /*
  * Append a UTF-8 string to a buffer, converting it into locale encoding
  */
 static void
 xo_buf_append_locale (xo_handle_t *xop, xo_buffer_t *xbp,
 		      const char *cp, ssize_t len)
 {
     const char *sp = cp, *ep = cp + len;
     ssize_t save_off = xbp->xb_bufp - xbp->xb_curp;
     ssize_t slen;
     int cols = 0;
 
     for ( ; cp < ep; cp++) {
 	if (!xo_is_utf8(*cp)) {
 	    cols += 1;
 	    continue;
 	}
 
 	/*
 	 * We're looking at a non-ascii UTF-8 character.
 	 * First we copy the previous data.
 	 * Then we need find the length and validate it.
 	 * Then we turn it into a wide string.
 	 * Then we turn it into a localized string.
 	 * Then we repeat.  Isn't i18n fun?
 	 */
 	if (sp != cp)
 	    xo_buf_append(xbp, sp, cp - sp); /* Append previous data */
 
 	slen = xo_buf_utf8_len(xop, cp, ep - cp);
 	if (slen <= 0) {
 	    /* Bad data; back it all out */
 	    xbp->xb_curp = xbp->xb_bufp + save_off;
 	    return;
 	}
 
 	cols += xo_buf_append_locale_from_utf8(xop, xbp, cp, slen);
 
 	/* Next time through, we'll start at the next character */
 	cp += slen - 1;
 	sp = cp + 1;
     }
 
     /* Update column values */
     if (XOF_ISSET(xop, XOF_COLUMNS))
 	xop->xo_columns += cols;
     if (XOIF_ISSET(xop, XOIF_ANCHOR))
 	xop->xo_anchor_columns += cols;
 
     /* Before we fall into the basic logic below, we need reset len */
     len = ep - sp;
     if (len != 0) /* Append trailing data */
 	xo_buf_append(xbp, sp, len);
 }
 
 /*
  * Append the given string to the given buffer, without escaping or
  * character set conversion.  This is the straight copy to the data
  * buffer with no fanciness.
  */
 static void
 xo_data_append (xo_handle_t *xop, const char *str, ssize_t len)
 {
     xo_buf_append(&xop->xo_data, str, len);
 }
 
 /*
  * Append the given string to the given buffer
  */
 static void
 xo_data_escape (xo_handle_t *xop, const char *str, ssize_t len)
 {
     xo_buf_escape(xop, &xop->xo_data, str, len, 0);
 }
 
 #ifdef LIBXO_NO_RETAIN
 /*
  * Empty implementations of the retain logic
  */
 
 void
 xo_retain_clear_all (void)
 {
     return;
 }
 
 void
 xo_retain_clear (const char *fmt UNUSED)
 {
     return;
 }
 static void
 xo_retain_add (const char *fmt UNUSED, xo_field_info_t *fields UNUSED,
 		unsigned num_fields UNUSED)
 {
     return;
 }
 
 static int
 xo_retain_find (const char *fmt UNUSED, xo_field_info_t **valp UNUSED,
 		 unsigned *nump UNUSED)
 {
     return -1;
 }
 
 #else /* !LIBXO_NO_RETAIN */
 /*
  * Retain: We retain parsed field definitions to enhance performance,
  * especially inside loops.  We depend on the caller treating the format
  * strings as immutable, so that we can retain pointers into them.  We
  * hold the pointers in a hash table, so allow quick access.  Retained
  * information is retained until xo_retain_clear is called.
  */
 
 /*
  * xo_retain_entry_t holds information about one retained set of
  * parsed fields.
  */
 typedef struct xo_retain_entry_s {
     struct xo_retain_entry_s *xre_next; /* Pointer to next (older) entry */
     unsigned long xre_hits;		 /* Number of times we've hit */
     const char *xre_format;		 /* Pointer to format string */
     unsigned xre_num_fields;		 /* Number of fields saved */
     xo_field_info_t *xre_fields;	 /* Pointer to fields */
 } xo_retain_entry_t;
 
 /*
  * xo_retain_t holds a complete set of parsed fields as a hash table.
  */
 #ifndef XO_RETAIN_SIZE
 #define XO_RETAIN_SIZE 6
 #endif /* XO_RETAIN_SIZE */
 #define RETAIN_HASH_SIZE (1<<XO_RETAIN_SIZE)
 
 typedef struct xo_retain_s {
     xo_retain_entry_t *xr_bucket[RETAIN_HASH_SIZE];
 } xo_retain_t;
 
 static THREAD_LOCAL(xo_retain_t) xo_retain;
 static THREAD_LOCAL(unsigned) xo_retain_count;
 
 /*
  * Simple hash function based on Thomas Wang's paper.  The original is
  * gone, but an archive is available on the Way Back Machine:
  *
  * http://web.archive.org/web/20071223173210/\
  *     http://www.concentric.net/~Ttwang/tech/inthash.htm
  *
  * For our purposes, we can assume the low four bits are uninteresting
  * since any string less that 16 bytes wouldn't be worthy of
  * retaining.  We toss the high bits also, since these bits are likely
  * to be common among constant format strings.  We then run Wang's
  * algorithm, and cap the result at RETAIN_HASH_SIZE.
  */
 static unsigned
 xo_retain_hash (const char *fmt)
 {
     volatile uintptr_t iptr = (uintptr_t) (const void *) fmt;
 
     /* Discard low four bits and high bits; they aren't interesting */
     uint32_t val = (uint32_t) ((iptr >> 4) & (((1 << 24) - 1)));
 
     val = (val ^ 61) ^ (val >> 16);
     val = val + (val << 3);
     val = val ^ (val >> 4);
     val = val * 0x3a8f05c5;	/* My large prime number */
     val = val ^ (val >> 15);
     val &= RETAIN_HASH_SIZE - 1;
 
     return val;
 }	
 
 /*
  * Walk all buckets, clearing all retained entries
  */
 void
 xo_retain_clear_all (void)
 {
     int i;
     xo_retain_entry_t *xrep, *next;
 
     for (i = 0; i < RETAIN_HASH_SIZE; i++) {
 	for (xrep = xo_retain.xr_bucket[i]; xrep; xrep = next) {
 	    next = xrep->xre_next;
 	    xo_free(xrep);
 	}
 	xo_retain.xr_bucket[i] = NULL;
     }
     xo_retain_count = 0;
 }
 
 /*
  * Walk all buckets, clearing all retained entries
  */
 void
 xo_retain_clear (const char *fmt)
 {
     xo_retain_entry_t **xrepp;
     unsigned hash = xo_retain_hash(fmt);
 
     for (xrepp = &xo_retain.xr_bucket[hash]; *xrepp;
 	 xrepp = &(*xrepp)->xre_next) {
 	if ((*xrepp)->xre_format == fmt) {
 	    *xrepp = (*xrepp)->xre_next;
 	    xo_retain_count -= 1;
 	    return;
 	}
     }
 }
 
 /*
  * Search the hash for an entry matching 'fmt'; return it's fields.
  */
 static int
 xo_retain_find (const char *fmt, xo_field_info_t **valp, unsigned *nump)
 {
     if (xo_retain_count == 0)
 	return -1;
 
     unsigned hash = xo_retain_hash(fmt);
     xo_retain_entry_t *xrep;
 
     for (xrep = xo_retain.xr_bucket[hash]; xrep != NULL;
 	 xrep = xrep->xre_next) {
 	if (xrep->xre_format == fmt) {
 	    *valp = xrep->xre_fields;
 	    *nump = xrep->xre_num_fields;
 	    xrep->xre_hits += 1;
 	    return 0;
 	}
     }
 
     return -1;
 }
 
 static void
 xo_retain_add (const char *fmt, xo_field_info_t *fields, unsigned num_fields)
 {
     unsigned hash = xo_retain_hash(fmt);
     xo_retain_entry_t *xrep;
     ssize_t sz = sizeof(*xrep) + (num_fields + 1) * sizeof(*fields);
     xo_field_info_t *xfip;
 
     xrep = xo_realloc(NULL, sz);
     if (xrep == NULL)
 	return;
 
     xfip = (xo_field_info_t *) &xrep[1];
     memcpy(xfip, fields, num_fields * sizeof(*fields));
 
     bzero(xrep, sizeof(*xrep));
 
     xrep->xre_format = fmt;
     xrep->xre_fields = xfip;
     xrep->xre_num_fields = num_fields;
 
     /* Record the field info in the retain bucket */
     xrep->xre_next = xo_retain.xr_bucket[hash];
     xo_retain.xr_bucket[hash] = xrep;
     xo_retain_count += 1;
 }
 
 #endif /* !LIBXO_NO_RETAIN */
 
 /*
  * Generate a warning.  Normally, this is a text message written to
  * standard error.  If the XOF_WARN_XML flag is set, then we generate
  * XMLified content on standard output.
  */
 static void
 xo_warn_hcv (xo_handle_t *xop, int code, int check_warn,
 	     const char *fmt, va_list vap)
 {
     xop = xo_default(xop);
     if (check_warn && !XOF_ISSET(xop, XOF_WARN))
 	return;
 
     if (fmt == NULL)
 	return;
 
     ssize_t len = strlen(fmt);
     ssize_t plen = xo_program ? strlen(xo_program) : 0;
     char *newfmt = alloca(len + 1 + plen + 2); /* NUL, and ": " */
 
     if (plen) {
 	memcpy(newfmt, xo_program, plen);
 	newfmt[plen++] = ':';
 	newfmt[plen++] = ' ';
     }
 
     memcpy(newfmt + plen, fmt, len);
     newfmt[len + plen] = '\0';
 
     if (XOF_ISSET(xop, XOF_WARN_XML)) {
 	static char err_open[] = "<error>";
 	static char err_close[] = "</error>";
 	static char msg_open[] = "<message>";
 	static char msg_close[] = "</message>";
 
 	xo_buffer_t *xbp = &xop->xo_data;
 
 	xo_buf_append(xbp, err_open, sizeof(err_open) - 1);
 	xo_buf_append(xbp, msg_open, sizeof(msg_open) - 1);
 
 	va_list va_local;
 	va_copy(va_local, vap);
 
 	ssize_t left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
 	ssize_t rc = vsnprintf(xbp->xb_curp, left, newfmt, vap);
 
 	if (rc >= left) {
 	    if (!xo_buf_has_room(xbp, rc)) {
 		va_end(va_local);
 		return;
 	    }
 
 	    va_end(vap);	/* Reset vap to the start */
 	    va_copy(vap, va_local);
 
 	    left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
 	    rc = vsnprintf(xbp->xb_curp, left, fmt, vap);
 	}
 
 	va_end(va_local);
 
 	rc = xo_escape_xml(xbp, rc, 1);
 	xbp->xb_curp += rc;
 
 	xo_buf_append(xbp, msg_close, sizeof(msg_close) - 1);
 	xo_buf_append(xbp, err_close, sizeof(err_close) - 1);
 
 	if (code >= 0) {
 	    const char *msg = strerror(code);
 
 	    if (msg) {
 		xo_buf_append(xbp, ": ", 2);
 		xo_buf_append(xbp, msg, strlen(msg));
 	    }
 	}
 
 	xo_buf_append(xbp, "\n", 1); /* Append newline and NUL to string */
 	(void) xo_write(xop);
 
     } else {
 	vfprintf(stderr, newfmt, vap);
 	if (code >= 0) {
 	    const char *msg = strerror(code);
 
 	    if (msg)
 		fprintf(stderr, ": %s", msg);
 	}
 	fprintf(stderr, "\n");
     }
 }
 
 void
 xo_warn_hc (xo_handle_t *xop, int code, const char *fmt, ...)
 {
     va_list vap;
 
     va_start(vap, fmt);
     xo_warn_hcv(xop, code, 0, fmt, vap);
     va_end(vap);
 }
 
 void
 xo_warn_c (int code, const char *fmt, ...)
 {
     va_list vap;
 
     va_start(vap, fmt);
     xo_warn_hcv(NULL, code, 0, fmt, vap);
     va_end(vap);
 }
 
 void
 xo_warn (const char *fmt, ...)
 {
     int code = errno;
     va_list vap;
 
     va_start(vap, fmt);
     xo_warn_hcv(NULL, code, 0, fmt, vap);
     va_end(vap);
 }
 
 void
 xo_warnx (const char *fmt, ...)
 {
     va_list vap;
 
     va_start(vap, fmt);
     xo_warn_hcv(NULL, -1, 0, fmt, vap);
     va_end(vap);
 }
 
 void
 xo_err (int eval, const char *fmt, ...)
 {
     int code = errno;
     va_list vap;
 
     va_start(vap, fmt);
     xo_warn_hcv(NULL, code, 0, fmt, vap);
     va_end(vap);
     xo_finish();
     exit(eval);
 }
 
 void
 xo_errx (int eval, const char *fmt, ...)
 {
     va_list vap;
 
     va_start(vap, fmt);
     xo_warn_hcv(NULL, -1, 0, fmt, vap);
     va_end(vap);
     xo_finish();
     exit(eval);
 }
 
 void
 xo_errc (int eval, int code, const char *fmt, ...)
 {
     va_list vap;
 
     va_start(vap, fmt);
     xo_warn_hcv(NULL, code, 0, fmt, vap);
     va_end(vap);
     xo_finish();
     exit(eval);
 }
 
 /*
  * Generate a warning.  Normally, this is a text message written to
  * standard error.  If the XOF_WARN_XML flag is set, then we generate
  * XMLified content on standard output.
  */
 void
 xo_message_hcv (xo_handle_t *xop, int code, const char *fmt, va_list vap)
 {
     static char msg_open[] = "<message>";
     static char msg_close[] = "</message>";
     xo_buffer_t *xbp;
     ssize_t rc;
     va_list va_local;
 
     xop = xo_default(xop);
 
     if (fmt == NULL || *fmt == '\0')
 	return;
 
     int need_nl = (fmt[strlen(fmt) - 1] != '\n');
 
     switch (xo_style(xop)) {
     case XO_STYLE_XML:
 	xbp = &xop->xo_data;
 	if (XOF_ISSET(xop, XOF_PRETTY))
 	    xo_buf_indent(xop, xop->xo_indent_by);
 	xo_buf_append(xbp, msg_open, sizeof(msg_open) - 1);
 
 	va_copy(va_local, vap);
 
 	ssize_t left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
 
 	rc = vsnprintf(xbp->xb_curp, left, fmt, vap);
 	if (rc >= left) {
 	    if (!xo_buf_has_room(xbp, rc)) {
 		va_end(va_local);
 		return;
 	    }
 
 	    va_end(vap);	/* Reset vap to the start */
 	    va_copy(vap, va_local);
 
 	    left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
 	    rc = vsnprintf(xbp->xb_curp, left, fmt, vap);
 	}
 
 	va_end(va_local);
 
 	rc = xo_escape_xml(xbp, rc, 0);
 	xbp->xb_curp += rc;
 
 	if (need_nl && code > 0) {
 	    const char *msg = strerror(code);
 
 	    if (msg) {
 		xo_buf_append(xbp, ": ", 2);
 		xo_buf_append(xbp, msg, strlen(msg));
 	    }
 	}
 
 	if (need_nl)
 	    xo_buf_append(xbp, "\n", 1); /* Append newline and NUL to string */
 
 	xo_buf_append(xbp, msg_close, sizeof(msg_close) - 1);
 
 	if (XOF_ISSET(xop, XOF_PRETTY))
 	    xo_buf_append(xbp, "\n", 1); /* Append newline and NUL to string */
 
 	(void) xo_write(xop);
 	break;
 
     case XO_STYLE_HTML:
 	{
 	    char buf[BUFSIZ], *bp = buf, *cp;
 	    ssize_t bufsiz = sizeof(buf);
 	    ssize_t rc2;
 
 	    va_copy(va_local, vap);
 
 	    rc = vsnprintf(bp, bufsiz, fmt, va_local);
 	    if (rc > bufsiz) {
 		bufsiz = rc + BUFSIZ;
 		bp = alloca(bufsiz);
 		va_end(va_local);
 		va_copy(va_local, vap);
 		rc = vsnprintf(bp, bufsiz, fmt, va_local);
 	    }
 
 	    va_end(va_local);
 	    cp = bp + rc;
 
 	    if (need_nl) {
 		rc2 = snprintf(cp, bufsiz - rc, "%s%s\n",
 			       (code > 0) ? ": " : "",
 			       (code > 0) ? strerror(code) : "");
 		if (rc2 > 0)
 		    rc += rc2;
 	    }
 
 	    xo_buf_append_div(xop, "message", 0, NULL, 0, bp, rc,
 			      NULL, 0, NULL, 0);
 	}
 	break;
 
     case XO_STYLE_JSON:
     case XO_STYLE_SDPARAMS:
     case XO_STYLE_ENCODER:
 	/* No means of representing messages */
 	return;
 
     case XO_STYLE_TEXT:
 	rc = xo_printf_v(xop, fmt, vap);
 	/*
 	 * XXX need to handle UTF-8 widths
 	 */
 	if (rc > 0) {
 	    if (XOF_ISSET(xop, XOF_COLUMNS))
 		xop->xo_columns += rc;
 	    if (XOIF_ISSET(xop, XOIF_ANCHOR))
 		xop->xo_anchor_columns += rc;
 	}
 
 	if (need_nl && code > 0) {
 	    const char *msg = strerror(code);
 
 	    if (msg) {
 		xo_printf(xop, ": %s", msg);
 	    }
 	}
 	if (need_nl)
 	    xo_printf(xop, "\n");
 
 	break;
     }
 
     switch (xo_style(xop)) {
     case XO_STYLE_HTML:
 	if (XOIF_ISSET(xop, XOIF_DIV_OPEN)) {
 	    static char div_close[] = "</div>";
 
 	    XOIF_CLEAR(xop, XOIF_DIV_OPEN);
 	    xo_data_append(xop, div_close, sizeof(div_close) - 1);
 
 	    if (XOF_ISSET(xop, XOF_PRETTY))
 		xo_data_append(xop, "\n", 1);
 	}
 	break;
     }
 
     (void) xo_flush_h(xop);
 }
 
 void
 xo_message_hc (xo_handle_t *xop, int code, const char *fmt, ...)
 {
     va_list vap;
 
     va_start(vap, fmt);
     xo_message_hcv(xop, code, fmt, vap);
     va_end(vap);
 }
 
 void
 xo_message_c (int code, const char *fmt, ...)
 {
     va_list vap;
 
     va_start(vap, fmt);
     xo_message_hcv(NULL, code, fmt, vap);
     va_end(vap);
 }
 
 void
 xo_message_e (const char *fmt, ...)
 {
     int code = errno;
     va_list vap;
 
     va_start(vap, fmt);
     xo_message_hcv(NULL, code, fmt, vap);
     va_end(vap);
 }
 
 void
 xo_message (const char *fmt, ...)
 {
     va_list vap;
 
     va_start(vap, fmt);
     xo_message_hcv(NULL, 0, fmt, vap);
     va_end(vap);
 }
 
 void
 xo_failure (xo_handle_t *xop, const char *fmt, ...)
 {
     if (!XOF_ISSET(xop, XOF_WARN))
 	return;
 
     va_list vap;
 
     va_start(vap, fmt);
     xo_warn_hcv(xop, -1, 1, fmt, vap);
     va_end(vap);
 }
 
 /**
  * Create a handle for use by later libxo functions.
  *
  * Note: normal use of libxo does not require a distinct handle, since
  * the default handle (used when NULL is passed) generates text on stdout.
  *
  * @param style Style of output desired (XO_STYLE_* value)
  * @param flags Set of XOF_* flags in use with this handle
  * @return Newly allocated handle
  * @see xo_destroy
  */
 xo_handle_t *
 xo_create (xo_style_t style, xo_xof_flags_t flags)
 {
     xo_handle_t *xop = xo_realloc(NULL, sizeof(*xop));
 
     if (xop) {
 	bzero(xop, sizeof(*xop));
 
 	xop->xo_style = style;
 	XOF_SET(xop, flags);
 	xo_init_handle(xop);
 	xop->xo_style = style;	/* Reset style (see LIBXO_OPTIONS) */
     }
 
     return xop;
 }
 
 /**
  * Create a handle that will write to the given file.  Use
  * the XOF_CLOSE_FP flag to have the file closed on xo_destroy().
  *
  * @param fp FILE pointer to use
  * @param style Style of output desired (XO_STYLE_* value)
  * @param flags Set of XOF_* flags to use with this handle
  * @return Newly allocated handle
  * @see xo_destroy
  */
 xo_handle_t *
 xo_create_to_file (FILE *fp, xo_style_t style, xo_xof_flags_t flags)
 {
     xo_handle_t *xop = xo_create(style, flags);
 
     if (xop) {
 	xop->xo_opaque = fp;
 	xop->xo_write = xo_write_to_file;
 	xop->xo_close = xo_close_file;
 	xop->xo_flush = xo_flush_file;
     }
 
     return xop;
 }
 
 /**
  * Set the default handler to output to a file.
  *
  * @param xop libxo handle
  * @param fp FILE pointer to use
  * @return 0 on success, non-zero on failure
  */
 int
 xo_set_file_h (xo_handle_t *xop, FILE *fp)
 {
     xop = xo_default(xop);
 
     if (fp == NULL) {
 	xo_failure(xop, "xo_set_file: NULL fp");
 	return -1;
     }
 
     xop->xo_opaque = fp;
     xop->xo_write = xo_write_to_file;
     xop->xo_close = xo_close_file;
     xop->xo_flush = xo_flush_file;
 
     return 0;
 }
 
 /**
  * Set the default handler to output to a file.
  *
  * @param fp FILE pointer to use
  * @return 0 on success, non-zero on failure
  */
 int
 xo_set_file (FILE *fp)
 {
     return xo_set_file_h(NULL, fp);
 }
 
 /**
  * Release any resources held by the handle.
  *
  * @param xop XO handle to alter (or NULL for default handle)
  */
 void
 xo_destroy (xo_handle_t *xop_arg)
 {
     xo_handle_t *xop = xo_default(xop_arg);
 
     xo_flush_h(xop);
 
     if (xop->xo_close && XOF_ISSET(xop, XOF_CLOSE_FP))
 	xop->xo_close(xop->xo_opaque);
 
     xo_free(xop->xo_stack);
     xo_buf_cleanup(&xop->xo_data);
     xo_buf_cleanup(&xop->xo_fmt);
     xo_buf_cleanup(&xop->xo_predicate);
     xo_buf_cleanup(&xop->xo_attrs);
     xo_buf_cleanup(&xop->xo_color_buf);
 
     if (xop->xo_version)
 	xo_free(xop->xo_version);
 
     if (xop_arg == NULL) {
 	bzero(&xo_default_handle, sizeof(xo_default_handle));
 	xo_default_inited = 0;
     } else
 	xo_free(xop);
 }
 
 /**
  * Record a new output style to use for the given handle (or default if
  * handle is NULL).  This output style will be used for any future output.
  *
  * @param xop XO handle to alter (or NULL for default handle)
  * @param style new output style (XO_STYLE_*)
  */
 void
 xo_set_style (xo_handle_t *xop, xo_style_t style)
 {
     xop = xo_default(xop);
     xop->xo_style = style;
 }
 
 /**
  * Return the current style of a handle
  *
  * @param xop XO handle to access
  * @return The handle's current style
  */
 xo_style_t
 xo_get_style (xo_handle_t *xop)
 {
     xop = xo_default(xop);
     return xo_style(xop);
 }
 
 /**
  * Return the XO_STYLE_* value matching a given name
  *
  * @param name String name of a style
  * @return XO_STYLE_* value
  */
 static int
 xo_name_to_style (const char *name)
 {
     if (xo_streq(name, "xml"))
 	return XO_STYLE_XML;
     else if (xo_streq(name, "json"))
 	return XO_STYLE_JSON;
     else if (xo_streq(name, "encoder"))
 	return XO_STYLE_ENCODER;
     else if (xo_streq(name, "text"))
 	return XO_STYLE_TEXT;
     else if (xo_streq(name, "html"))
 	return XO_STYLE_HTML;
     else if (xo_streq(name, "sdparams"))
 	return XO_STYLE_SDPARAMS;
 
     return -1;
 }
 
 /*
  * Indicate if the style is an "encoding" one as opposed to a "display" one.
  */
 static int
 xo_style_is_encoding (xo_handle_t *xop)
 {
     if (xo_style(xop) == XO_STYLE_JSON
 	|| xo_style(xop) == XO_STYLE_XML
 	|| xo_style(xop) == XO_STYLE_SDPARAMS
 	|| xo_style(xop) == XO_STYLE_ENCODER)
 	return 1;
     return 0;
 }
 
 /* Simple name-value mapping */
 typedef struct xo_mapping_s {
     xo_xff_flags_t xm_value;	/* Flag value */
     const char *xm_name;	/* String name */
 } xo_mapping_t;
 
 static xo_xff_flags_t
 xo_name_lookup (xo_mapping_t *map, const char *value, ssize_t len)
 {
     if (len == 0)
 	return 0;
 
     if (len < 0)
 	len = strlen(value);
 
     while (isspace((int) *value)) {
 	value += 1;
 	len -= 1;
     }
 
     while (isspace((int) value[len]))
 	len -= 1;
 
     if (*value == '\0')
 	return 0;
 
     for ( ; map->xm_name; map++)
 	if (strncmp(map->xm_name, value, len) == 0)
 	    return map->xm_value;
 
     return 0;
 }
 
 #ifdef NOT_NEEDED_YET
 static const char *
 xo_value_lookup (xo_mapping_t *map, xo_xff_flags_t value)
 {
     if (value == 0)
 	return NULL;
 
     for ( ; map->xm_name; map++)
 	if (map->xm_value == value)
 	    return map->xm_name;
 
     return NULL;
 }
 #endif /* NOT_NEEDED_YET */
 
 static xo_mapping_t xo_xof_names[] = {
     { XOF_COLOR_ALLOWED, "color" },
     { XOF_COLOR, "color-force" },
     { XOF_COLUMNS, "columns" },
     { XOF_DTRT, "dtrt" },
     { XOF_FLUSH, "flush" },
     { XOF_FLUSH_LINE, "flush-line" },
     { XOF_IGNORE_CLOSE, "ignore-close" },
     { XOF_INFO, "info" },
     { XOF_KEYS, "keys" },
     { XOF_LOG_GETTEXT, "log-gettext" },
     { XOF_LOG_SYSLOG, "log-syslog" },
     { XOF_NO_HUMANIZE, "no-humanize" },
     { XOF_NO_LOCALE, "no-locale" },
     { XOF_RETAIN_NONE, "no-retain" },
     { XOF_NO_TOP, "no-top" },
     { XOF_NOT_FIRST, "not-first" },
     { XOF_PRETTY, "pretty" },
     { XOF_RETAIN_ALL, "retain" },
     { XOF_UNDERSCORES, "underscores" },
     { XOF_UNITS, "units" },
     { XOF_WARN, "warn" },
     { XOF_WARN_XML, "warn-xml" },
     { XOF_XPATH, "xpath" },
     { 0, NULL }
 };
 
 /* Options available via the environment variable ($LIBXO_OPTIONS) */
 static xo_mapping_t xo_xof_simple_names[] = {
     { XOF_COLOR_ALLOWED, "color" },
     { XOF_FLUSH, "flush" },
     { XOF_FLUSH_LINE, "flush-line" },
     { XOF_NO_HUMANIZE, "no-humanize" },
     { XOF_NO_LOCALE, "no-locale" },
     { XOF_RETAIN_NONE, "no-retain" },
     { XOF_PRETTY, "pretty" },
     { XOF_RETAIN_ALL, "retain" },
     { XOF_UNDERSCORES, "underscores" },
     { XOF_WARN, "warn" },
     { 0, NULL }
 };
 
 /*
  * Convert string name to XOF_* flag value.
  * Not all are useful.  Or safe.  Or sane.
  */
 static unsigned
 xo_name_to_flag (const char *name)
 {
     return (unsigned) xo_name_lookup(xo_xof_names, name, -1);
 }
 
 /**
  * Set the style of an libxo handle based on a string name
  *
  * @param xop XO handle
  * @param name String value of name
  * @return 0 on success, non-zero on failure
  */
 int
 xo_set_style_name (xo_handle_t *xop, const char *name)
 {
     if (name == NULL)
 	return -1;
 
     int style = xo_name_to_style(name);
 
     if (style < 0)
 	return -1;
 
     xo_set_style(xop, style);
     return 0;
 }
 
 /*
  * Fill in the color map, based on the input string; currently unimplemented
  * Look for something like "colors=red/blue+green/yellow" as fg/bg pairs.
  */
 static void
 xo_set_color_map (xo_handle_t *xop, char *value)
 {
     if (xo_text_only())
 	return;
 
     char *cp, *ep, *vp, *np;
     ssize_t len = value ? strlen(value) + 1 : 0;
     int num = 1, fg, bg;
 
     for (cp = value, ep = cp + len - 1; cp && *cp && cp < ep; cp = np) {
 	np = strchr(cp, '+');
 	if (np)
 	    *np++ = '\0';
 
 	vp = strchr(cp, '/');
 	if (vp)
 	    *vp++ = '\0';
 
 	fg = *cp ? xo_color_find(cp) : -1;
 	bg = (vp && *vp) ? xo_color_find(vp) : -1;
 
 #ifndef LIBXO_TEXT_ONLY
 	xop->xo_color_map_fg[num] = (fg < 0) ? num : fg;
 	xop->xo_color_map_bg[num] = (bg < 0) ? num : bg;
 #endif /* LIBXO_TEXT_ONLY */
 
 	if (++num > XO_NUM_COLORS)
 	    break;
     }
 
     /* If no color initialization happened, then we don't need the map */
     if (num > 1)
 	XOF_SET(xop, XOF_COLOR_MAP);
     else
 	XOF_CLEAR(xop, XOF_COLOR_MAP);
 
 #ifndef LIBXO_TEXT_ONLY
     /* Fill in the rest of the colors with the defaults */
     for ( ; num < XO_NUM_COLORS; num++)
 	xop->xo_color_map_fg[num] = xop->xo_color_map_bg[num] = num;
 #endif /* LIBXO_TEXT_ONLY */
 }
 
 static int
 xo_set_options_simple (xo_handle_t *xop, const char *input)
 {
     xo_xof_flags_t new_flag;
     char *cp, *ep, *vp, *np, *bp;
     ssize_t len = strlen(input) + 1;
 
     bp = alloca(len);
     memcpy(bp, input, len);
 
     for (cp = bp, ep = cp + len - 1; cp && cp < ep; cp = np) {
 	np = strchr(cp, ',');
 	if (np)
 	    *np++ = '\0';
 
 	vp = strchr(cp, '=');
 	if (vp)
 	    *vp++ = '\0';
 
 	if (xo_streq("colors", cp)) {
 	    xo_set_color_map(xop, vp);
 	    continue;
 	}
 
 	new_flag = xo_name_lookup(xo_xof_simple_names, cp, -1);
 	if (new_flag != 0) {
 	    XOF_SET(xop, new_flag);
 	} else if (xo_streq(cp, "no-color")) {
 	    XOF_CLEAR(xop, XOF_COLOR_ALLOWED);
 	} else {
 	    xo_failure(xop, "unknown simple option: %s", cp);
 	    return -1;
 	}
     }
 
     return 0;
 }
 
 /**
  * Set the options for a handle using a string of options
  * passed in.  The input is a comma-separated set of names
  * and optional values: "xml,pretty,indent=4"
  *
  * @param xop XO handle
  * @param input Comma-separated set of option values
  * @return 0 on success, non-zero on failure
  */
 int
 xo_set_options (xo_handle_t *xop, const char *input)
 {
     char *cp, *ep, *vp, *np, *bp;
     int style = -1, new_style, rc = 0;
     ssize_t len;
     xo_xof_flags_t new_flag;
 
     if (input == NULL)
 	return 0;
 
     xop = xo_default(xop);
 
 #ifdef LIBXO_COLOR_ON_BY_DEFAULT
     /* If the installer used --enable-color-on-by-default, then we allow it */
     XOF_SET(xop, XOF_COLOR_ALLOWED);
 #endif /* LIBXO_COLOR_ON_BY_DEFAULT */
 
     /*
      * We support a simpler, old-school style of giving option
      * also, using a single character for each option.  It's
      * ideal for lazy people, such as myself.
      */
     if (*input == ':') {
 	ssize_t sz;
 
 	for (input++ ; *input; input++) {
 	    switch (*input) {
 	    case 'c':
 		XOF_SET(xop, XOF_COLOR_ALLOWED);
 		break;
 
 	    case 'f':
 		XOF_SET(xop, XOF_FLUSH);
 		break;
 
 	    case 'F':
 		XOF_SET(xop, XOF_FLUSH_LINE);
 		break;
 
 	    case 'g':
 		XOF_SET(xop, XOF_LOG_GETTEXT);
 		break;
 
 	    case 'H':
 		xop->xo_style = XO_STYLE_HTML;
 		break;
 
 	    case 'I':
 		XOF_SET(xop, XOF_INFO);
 		break;
 
 	    case 'i':
 		sz = strspn(input + 1, "0123456789");
 		if (sz > 0) {
 		    xop->xo_indent_by = atoi(input + 1);
 		    input += sz - 1;	/* Skip value */
 		}
 		break;
 
 	    case 'J':
 		xop->xo_style = XO_STYLE_JSON;
 		break;
 
 	    case 'k':
 		XOF_SET(xop, XOF_KEYS);
 		break;
 
 	    case 'n':
 		XOF_SET(xop, XOF_NO_HUMANIZE);
 		break;
 
 	    case 'P':
 		XOF_SET(xop, XOF_PRETTY);
 		break;
 
 	    case 'T':
 		xop->xo_style = XO_STYLE_TEXT;
 		break;
 
 	    case 'U':
 		XOF_SET(xop, XOF_UNITS);
 		break;
 
 	    case 'u':
 		XOF_SET(xop, XOF_UNDERSCORES);
 		break;
 
 	    case 'W':
 		XOF_SET(xop, XOF_WARN);
 		break;
 
 	    case 'X':
 		xop->xo_style = XO_STYLE_XML;
 		break;
 
 	    case 'x':
 		XOF_SET(xop, XOF_XPATH);
 		break;
 	    }
 	}
 	return 0;
     }
 
     len = strlen(input) + 1;
     bp = alloca(len);
     memcpy(bp, input, len);
 
     for (cp = bp, ep = cp + len - 1; cp && cp < ep; cp = np) {
 	np = strchr(cp, ',');
 	if (np)
 	    *np++ = '\0';
 
+	/*
+	 * "@foo" is a shorthand for "encoder=foo".  This is driven
+	 * chiefly by a desire to make pluggable encoders not appear
+	 * so distinct from built-in encoders.
+	 */
+	if (*cp == '@') {
+	    vp = cp + 1;
+
+	    if (*vp == '\0')
+		xo_failure(xop, "missing value for encoder option");
+	    else {
+		rc = xo_encoder_init(xop, vp);
+		if (rc)
+		    xo_warnx("error initializing encoder: %s", vp);
+	    }
+
+	    continue;
+	}
+
 	vp = strchr(cp, '=');
 	if (vp)
 	    *vp++ = '\0';
 
 	if (xo_streq("colors", cp)) {
 	    xo_set_color_map(xop, vp);
 	    continue;
 	}
 
 	/*
 	 * For options, we don't allow "encoder" since we want to
 	 * handle it explicitly below as "encoder=xxx".
 	 */
 	new_style = xo_name_to_style(cp);
 	if (new_style >= 0 && new_style != XO_STYLE_ENCODER) {
 	    if (style >= 0)
 		xo_warnx("ignoring multiple styles: '%s'", cp);
 	    else
 		style = new_style;
 	} else {
 	    new_flag = xo_name_to_flag(cp);
 	    if (new_flag != 0)
 		XOF_SET(xop, new_flag);
 	    else if (xo_streq(cp, "no-color"))
 		XOF_CLEAR(xop, XOF_COLOR_ALLOWED);
 	    else if (xo_streq(cp, "indent")) {
 		if (vp)
 		    xop->xo_indent_by = atoi(vp);
 		else
 		    xo_failure(xop, "missing value for indent option");
 	    } else if (xo_streq(cp, "encoder")) {
 		if (vp == NULL)
 		    xo_failure(xop, "missing value for encoder option");
 		else {
 		    rc = xo_encoder_init(xop, vp);
 		    if (rc)
 			xo_warnx("error initializing encoder: %s", vp);
 		}
 		
 	    } else {
 		xo_warnx("unknown libxo option value: '%s'", cp);
 		rc = -1;
 	    }
 	}
     }
 
     if (style > 0)
 	xop->xo_style= style;
 
     return rc;
 }
 
 /**
  * Set one or more flags for a given handle (or default if handle is NULL).
  * These flags will affect future output.
  *
  * @param xop XO handle to alter (or NULL for default handle)
  * @param flags Flags to be set (XOF_*)
  */
 void
 xo_set_flags (xo_handle_t *xop, xo_xof_flags_t flags)
 {
     xop = xo_default(xop);
 
     XOF_SET(xop, flags);
 }
 
 /**
  * Accessor to return the current set of flags for a handle
  * @param xop XO handle
  * @return Current set of flags
  */
 xo_xof_flags_t
 xo_get_flags (xo_handle_t *xop)
 {
     xop = xo_default(xop);
 
     return xop->xo_flags;
 }
 
 /**
  * strndup with a twist: len < 0 means len = strlen(str)
  */
 static char *
 xo_strndup (const char *str, ssize_t len)
 {
     if (len < 0)
 	len = strlen(str);
 
     char *cp = xo_realloc(NULL, len + 1);
     if (cp) {
 	memcpy(cp, str, len);
 	cp[len] = '\0';
     }
 
     return cp;
 }
 
 /**
  * Record a leading prefix for the XPath we generate.  This allows the
  * generated data to be placed within an XML hierarchy but still have
  * accurate XPath expressions.
  *
  * @param xop XO handle to alter (or NULL for default handle)
  * @param path The XPath expression
  */
 void
 xo_set_leading_xpath (xo_handle_t *xop, const char *path)
 {
     xop = xo_default(xop);
 
     if (xop->xo_leading_xpath) {
 	xo_free(xop->xo_leading_xpath);
 	xop->xo_leading_xpath = NULL;
     }
 
     if (path == NULL)
 	return;
 
     xop->xo_leading_xpath = xo_strndup(path, -1);
 }
 
 /**
  * Record the info data for a set of tags
  *
  * @param xop XO handle to alter (or NULL for default handle)
  * @param info Info data (xo_info_t) to be recorded (or NULL) (MUST BE SORTED)
  * @pararm count Number of entries in info (or -1 to count them ourselves)
  */
 void
 xo_set_info (xo_handle_t *xop, xo_info_t *infop, int count)
 {
     xop = xo_default(xop);
 
     if (count < 0 && infop) {
 	xo_info_t *xip;
 
 	for (xip = infop, count = 0; xip->xi_name; xip++, count++)
 	    continue;
     }
 
     xop->xo_info = infop;
     xop->xo_info_count = count;
 }
 
 /**
  * Set the formatter callback for a handle.  The callback should
  * return a newly formatting contents of a formatting instruction,
  * meaning the bits inside the braces.
  */
 void
 xo_set_formatter (xo_handle_t *xop, xo_formatter_t func,
 		  xo_checkpointer_t cfunc)
 {
     xop = xo_default(xop);
 
     xop->xo_formatter = func;
     xop->xo_checkpointer = cfunc;
 }
 
 /**
  * Clear one or more flags for a given handle (or default if handle is NULL).
  * These flags will affect future output.
  *
  * @param xop XO handle to alter (or NULL for default handle)
  * @param flags Flags to be cleared (XOF_*)
  */
 void
 xo_clear_flags (xo_handle_t *xop, xo_xof_flags_t flags)
 {
     xop = xo_default(xop);
 
     XOF_CLEAR(xop, flags);
 }
 
 static const char *
 xo_state_name (xo_state_t state)
 {
     static const char *names[] = {
 	"init",
 	"open_container",
 	"close_container",
 	"open_list",
 	"close_list",
 	"open_instance",
 	"close_instance",
 	"open_leaf_list",
 	"close_leaf_list",
 	"discarding",
 	"marker",
 	"emit",
 	"emit_leaf_list",
 	"finish",
 	NULL
     };
 
     if (state < (sizeof(names) / sizeof(names[0])))
 	return names[state];
 
     return "unknown";
 }
 
 static void
 xo_line_ensure_open (xo_handle_t *xop, xo_xff_flags_t flags UNUSED)
 {
     static char div_open[] = "<div class=\"line\">";
     static char div_open_blank[] = "<div class=\"blank-line\">";
 
     if (XOF_ISSET(xop, XOF_CONTINUATION)) {
 	XOF_CLEAR(xop, XOF_CONTINUATION);
 	XOIF_SET(xop, XOIF_DIV_OPEN);
 	return;
     }
 
     if (XOIF_ISSET(xop, XOIF_DIV_OPEN))
 	return;
 
     if (xo_style(xop) != XO_STYLE_HTML)
 	return;
 
     XOIF_SET(xop, XOIF_DIV_OPEN);
     if (flags & XFF_BLANK_LINE)
 	xo_data_append(xop, div_open_blank, sizeof(div_open_blank) - 1);
     else
 	xo_data_append(xop, div_open, sizeof(div_open) - 1);
 
     if (XOF_ISSET(xop, XOF_PRETTY))
 	xo_data_append(xop, "\n", 1);
 }
 
 static void
 xo_line_close (xo_handle_t *xop)
 {
     static char div_close[] = "</div>";
 
     switch (xo_style(xop)) {
     case XO_STYLE_HTML:
 	if (!XOIF_ISSET(xop, XOIF_DIV_OPEN))
 	    xo_line_ensure_open(xop, 0);
 
 	XOIF_CLEAR(xop, XOIF_DIV_OPEN);
 	xo_data_append(xop, div_close, sizeof(div_close) - 1);
 
 	if (XOF_ISSET(xop, XOF_PRETTY))
 	    xo_data_append(xop, "\n", 1);
 	break;
 
     case XO_STYLE_TEXT:
 	xo_data_append(xop, "\n", 1);
 	break;
     }
 }
 
 static int
 xo_info_compare (const void *key, const void *data)
 {
     const char *name = key;
     const xo_info_t *xip = data;
 
     return strcmp(name, xip->xi_name);
 }
 
 
 static xo_info_t *
 xo_info_find (xo_handle_t *xop, const char *name, ssize_t nlen)
 {
     xo_info_t *xip;
     char *cp = alloca(nlen + 1); /* Need local copy for NUL termination */
 
     memcpy(cp, name, nlen);
     cp[nlen] = '\0';
 
     xip = bsearch(cp, xop->xo_info, xop->xo_info_count,
 		  sizeof(xop->xo_info[0]), xo_info_compare);
     return xip;
 }
 
 #define CONVERT(_have, _need) (((_have) << 8) | (_need))
 
 /*
  * Check to see that the conversion is safe and sane.
  */
 static int
 xo_check_conversion (xo_handle_t *xop, int have_enc, int need_enc)
 {
     switch (CONVERT(have_enc, need_enc)) {
     case CONVERT(XF_ENC_UTF8, XF_ENC_UTF8):
     case CONVERT(XF_ENC_UTF8, XF_ENC_LOCALE):
     case CONVERT(XF_ENC_WIDE, XF_ENC_UTF8):
     case CONVERT(XF_ENC_WIDE, XF_ENC_LOCALE):
     case CONVERT(XF_ENC_LOCALE, XF_ENC_LOCALE):
     case CONVERT(XF_ENC_LOCALE, XF_ENC_UTF8):
 	return 0;
 
     default:
 	xo_failure(xop, "invalid conversion (%c:%c)", have_enc, need_enc);
 	return 1;
     }
 }
 
 static int
 xo_format_string_direct (xo_handle_t *xop, xo_buffer_t *xbp,
 			 xo_xff_flags_t flags,
 			 const wchar_t *wcp, const char *cp,
 			 ssize_t len, int max,
 			 int need_enc, int have_enc)
 {
     int cols = 0;
     wchar_t wc = 0;
     ssize_t ilen, olen;
     ssize_t width;
     int attr = XOF_BIT_ISSET(flags, XFF_ATTR);
     const char *sp;
 
     if (len > 0 && !xo_buf_has_room(xbp, len))
 	return 0;
 
     for (;;) {
 	if (len == 0)
 	    break;
 
 	if (cp) {
 	    if (*cp == '\0')
 		break;
 	    if ((flags & XFF_UNESCAPE) && (*cp == '\\' || *cp == '%')) {
 		cp += 1;
 		len -= 1;
 		if (len == 0 || *cp == '\0')
 		    break;
 	    }
 	}
 
 	if (wcp && *wcp == L'\0')
 	    break;
 
 	ilen = 0;
 
 	switch (have_enc) {
 	case XF_ENC_WIDE:		/* Wide character */
 	    wc = *wcp++;
 	    ilen = 1;
 	    break;
 
 	case XF_ENC_UTF8:		/* UTF-8 */
 	    ilen = xo_utf8_to_wc_len(cp);
 	    if (ilen < 0) {
 		xo_failure(xop, "invalid UTF-8 character: %02hhx", *cp);
 		return -1;	/* Can't continue; we can't find the end */
 	    }
 
 	    if (len > 0 && len < ilen) {
 		len = 0;	/* Break out of the loop */
 		continue;
 	    }
 
 	    wc = xo_utf8_char(cp, ilen);
 	    if (wc == (wchar_t) -1) {
 		xo_failure(xop, "invalid UTF-8 character: %02hhx/%d",
 			   *cp, ilen);
 		return -1;	/* Can't continue; we can't find the end */
 	    }
 	    cp += ilen;
 	    break;
 
 	case XF_ENC_LOCALE:		/* Native locale */
 	    ilen = (len > 0) ? len : MB_LEN_MAX;
 	    ilen = mbrtowc(&wc, cp, ilen, &xop->xo_mbstate);
 	    if (ilen < 0) {		/* Invalid data; skip */
 		xo_failure(xop, "invalid mbs char: %02hhx", *cp);
 		wc = L'?';
 		ilen = 1;
 	    }
 
 	    if (ilen == 0) {		/* Hit a wide NUL character */
 		len = 0;
 		continue;
 	    }
 
 	    cp += ilen;
 	    break;
 	}
 
 	/* Reduce len, but not below zero */
 	if (len > 0) {
 	    len -= ilen;
 	    if (len < 0)
 		len = 0;
 	}
 
 	/*
 	 * Find the width-in-columns of this character, which must be done
 	 * in wide characters, since we lack a mbswidth() function.  If
 	 * it doesn't fit
 	 */
 	width = xo_wcwidth(wc);
 	if (width < 0)
 	    width = iswcntrl(wc) ? 0 : 1;
 
 	if (xo_style(xop) == XO_STYLE_TEXT || xo_style(xop) == XO_STYLE_HTML) {
 	    if (max > 0 && cols + width > max)
 		break;
 	}
 
 	switch (need_enc) {
 	case XF_ENC_UTF8:
 
 	    /* Output in UTF-8 needs to be escaped, based on the style */
 	    switch (xo_style(xop)) {
 	    case XO_STYLE_XML:
 	    case XO_STYLE_HTML:
 		if (wc == '<')
 		    sp = xo_xml_lt;
 		else if (wc == '>')
 		    sp = xo_xml_gt;
 		else if (wc == '&')
 		    sp = xo_xml_amp;
 		else if (attr && wc == '"')
 		    sp = xo_xml_quot;
 		else
 		    break;
 
 		ssize_t slen = strlen(sp);
 		if (!xo_buf_has_room(xbp, slen - 1))
 		    return -1;
 
 		memcpy(xbp->xb_curp, sp, slen);
 		xbp->xb_curp += slen;
 		goto done_with_encoding; /* Need multi-level 'break' */
 
 	    case XO_STYLE_JSON:
 		if (wc != '\\' && wc != '"' && wc != '\n' && wc != '\r')
 		    break;
 
 		if (!xo_buf_has_room(xbp, 2))
 		    return -1;
 
 		*xbp->xb_curp++ = '\\';
 		if (wc == '\n')
 		    wc = 'n';
 		else if (wc == '\r')
 		    wc = 'r';
 		else wc = wc & 0x7f;
 
 		*xbp->xb_curp++ = wc;
 		goto done_with_encoding;
 
 	    case XO_STYLE_SDPARAMS:
 		if (wc != '\\' && wc != '"' && wc != ']')
 		    break;
 
 		if (!xo_buf_has_room(xbp, 2))
 		    return -1;
 
 		*xbp->xb_curp++ = '\\';
 		wc = wc & 0x7f;
 		*xbp->xb_curp++ = wc;
 		goto done_with_encoding;
 	    }
 
 	    olen = xo_utf8_emit_len(wc);
 	    if (olen < 0) {
 		xo_failure(xop, "ignoring bad length");
 		continue;
 	    }
 
 	    if (!xo_buf_has_room(xbp, olen))
 		return -1;
 
 	    xo_utf8_emit_char(xbp->xb_curp, olen, wc);
 	    xbp->xb_curp += olen;
 	    break;
 
 	case XF_ENC_LOCALE:
 	    if (!xo_buf_has_room(xbp, MB_LEN_MAX + 1))
 		return -1;
 
 	    olen = wcrtomb(xbp->xb_curp, wc, &xop->xo_mbstate);
 	    if (olen <= 0) {
 		xo_failure(xop, "could not convert wide char: %lx",
 			   (unsigned long) wc);
 		width = 1;
 		*xbp->xb_curp++ = '?';
 	    } else
 		xbp->xb_curp += olen;
 	    break;
 	}
 
     done_with_encoding:
 	cols += width;
     }
 
     return cols;
 }
 
 static int
 xo_needed_encoding (xo_handle_t *xop)
 {
     if (XOF_ISSET(xop, XOF_UTF8)) /* Check the override flag */
 	return XF_ENC_UTF8;
 
     if (xo_style(xop) == XO_STYLE_TEXT) /* Text means locale */
 	return XF_ENC_LOCALE;
 
     return XF_ENC_UTF8;		/* Otherwise, we love UTF-8 */
 }
 
 static ssize_t
 xo_format_string (xo_handle_t *xop, xo_buffer_t *xbp, xo_xff_flags_t flags,
 		  xo_format_t *xfp)
 {
     static char null[] = "(null)";
     static char null_no_quotes[] = "null";
 
     char *cp = NULL;
     wchar_t *wcp = NULL;
     ssize_t len;
     ssize_t cols = 0, rc = 0;
     ssize_t off = xbp->xb_curp - xbp->xb_bufp, off2;
     int need_enc = xo_needed_encoding(xop);
 
     if (xo_check_conversion(xop, xfp->xf_enc, need_enc))
 	return 0;
 
     len = xfp->xf_width[XF_WIDTH_SIZE];
 
     if (xfp->xf_fc == 'm') {
 	cp = strerror(xop->xo_errno);
 	if (len < 0)
 	    len = cp ? strlen(cp) : 0;
 	goto normal_string;
 
     } else if (xfp->xf_enc == XF_ENC_WIDE) {
 	wcp = va_arg(xop->xo_vap, wchar_t *);
 	if (xfp->xf_skip)
 	    return 0;
 
 	/*
 	 * Dont' deref NULL; use the traditional "(null)" instead
 	 * of the more accurate "who's been a naughty boy, then?".
 	 */
 	if (wcp == NULL) {
 	    cp = null;
 	    len = sizeof(null) - 1;
 	}
 
     } else {
 	cp = va_arg(xop->xo_vap, char *); /* UTF-8 or native */
 
     normal_string:
 	if (xfp->xf_skip)
 	    return 0;
 
 	/* Echo "Dont' deref NULL" logic */
 	if (cp == NULL) {
 	    if ((flags & XFF_NOQUOTE) && xo_style_is_encoding(xop)) {
 		cp = null_no_quotes;
 		len = sizeof(null_no_quotes) - 1;
 	    } else {
 		cp = null;
 		len = sizeof(null) - 1;
 	    }
 	}
 
 	/*
 	 * Optimize the most common case, which is "%s".  We just
 	 * need to copy the complete string to the output buffer.
 	 */
 	if (xfp->xf_enc == need_enc
 		&& xfp->xf_width[XF_WIDTH_MIN] < 0
 		&& xfp->xf_width[XF_WIDTH_SIZE] < 0
 		&& xfp->xf_width[XF_WIDTH_MAX] < 0
 	        && !(XOIF_ISSET(xop, XOIF_ANCHOR)
 		     || XOF_ISSET(xop, XOF_COLUMNS))) {
 	    len = strlen(cp);
 	    xo_buf_escape(xop, xbp, cp, len, flags);
 
 	    /*
 	     * Our caller expects xb_curp left untouched, so we have
 	     * to reset it and return the number of bytes written to
 	     * the buffer.
 	     */
 	    off2 = xbp->xb_curp - xbp->xb_bufp;
 	    rc = off2 - off;
 	    xbp->xb_curp = xbp->xb_bufp + off;
 
 	    return rc;
 	}
     }
 
     cols = xo_format_string_direct(xop, xbp, flags, wcp, cp, len,
 				   xfp->xf_width[XF_WIDTH_MAX],
 				   need_enc, xfp->xf_enc);
     if (cols < 0)
 	goto bail;
 
     /*
      * xo_buf_append* will move xb_curp, so we save/restore it.
      */
     off2 = xbp->xb_curp - xbp->xb_bufp;
     rc = off2 - off;
     xbp->xb_curp = xbp->xb_bufp + off;
 
     if (cols < xfp->xf_width[XF_WIDTH_MIN]) {
 	/*
 	 * Find the number of columns needed to display the string.
 	 * If we have the original wide string, we just call wcswidth,
 	 * but if we did the work ourselves, then we need to do it.
 	 */
 	int delta = xfp->xf_width[XF_WIDTH_MIN] - cols;
 	if (!xo_buf_has_room(xbp, xfp->xf_width[XF_WIDTH_MIN]))
 	    goto bail;
 
 	/*
 	 * If seen_minus, then pad on the right; otherwise move it so
 	 * we can pad on the left.
 	 */
 	if (xfp->xf_seen_minus) {
 	    cp = xbp->xb_curp + rc;
 	} else {
 	    cp = xbp->xb_curp;
 	    memmove(xbp->xb_curp + delta, xbp->xb_curp, rc);
 	}
 
 	/* Set the padding */
 	memset(cp, (xfp->xf_leading_zero > 0) ? '0' : ' ', delta);
 	rc += delta;
 	cols += delta;
     }
 
     if (XOF_ISSET(xop, XOF_COLUMNS))
 	xop->xo_columns += cols;
     if (XOIF_ISSET(xop, XOIF_ANCHOR))
 	xop->xo_anchor_columns += cols;
 
     return rc;
 
  bail:
     xbp->xb_curp = xbp->xb_bufp + off;
     return 0;
 }
 
 /*
  * Look backwards in a buffer to find a numeric value
  */
 static int
 xo_buf_find_last_number (xo_buffer_t *xbp, ssize_t start_offset)
 {
     int rc = 0;			/* Fail with zero */
     int digit = 1;
     char *sp = xbp->xb_bufp;
     char *cp = sp + start_offset;
 
     while (--cp >= sp)
 	if (isdigit((int) *cp))
 	    break;
 
     for ( ; cp >= sp; cp--) {
 	if (!isdigit((int) *cp))
 	    break;
 	rc += (*cp - '0') * digit;
 	digit *= 10;
     }
 
     return rc;
 }
 
 static ssize_t
 xo_count_utf8_cols (const char *str, ssize_t len)
 {
     ssize_t tlen;
     wchar_t wc;
     ssize_t cols = 0;
     const char *ep = str + len;
 
     while (str < ep) {
 	tlen = xo_utf8_to_wc_len(str);
 	if (tlen < 0)		/* Broken input is very bad */
 	    return cols;
 
 	wc = xo_utf8_char(str, tlen);
 	if (wc == (wchar_t) -1)
 	    return cols;
 
 	/* We only print printable characters */
 	if (iswprint((wint_t) wc)) {
 	    /*
 	     * Find the width-in-columns of this character, which must be done
 	     * in wide characters, since we lack a mbswidth() function.
 	     */
 	    ssize_t width = xo_wcwidth(wc);
 	    if (width < 0)
 		width = iswcntrl(wc) ? 0 : 1;
 
 	    cols += width;
 	}
 
 	str += tlen;
     }
 
     return cols;
 }
 
 #ifdef HAVE_GETTEXT
 static inline const char *
 xo_dgettext (xo_handle_t *xop, const char *str)
 {
     const char *domainname = xop->xo_gt_domain;
     const char *res;
 
     res = dgettext(domainname, str);
 
     if (XOF_ISSET(xop, XOF_LOG_GETTEXT))
 	fprintf(stderr, "xo: gettext: %s%s%smsgid \"%s\" returns \"%s\"\n",
 		domainname ? "domain \"" : "", xo_printable(domainname),
 		domainname ? "\", " : "", xo_printable(str), xo_printable(res));
 
     return res;
 }
 
 static inline const char *
 xo_dngettext (xo_handle_t *xop, const char *sing, const char *plural,
 	      unsigned long int n)
 {
     const char *domainname = xop->xo_gt_domain;
     const char *res;
 
     res = dngettext(domainname, sing, plural, n);
     if (XOF_ISSET(xop, XOF_LOG_GETTEXT))
 	fprintf(stderr, "xo: gettext: %s%s%s"
 		"msgid \"%s\", msgid_plural \"%s\" (%lu) returns \"%s\"\n",
 		domainname ? "domain \"" : "", 
 		xo_printable(domainname), domainname ? "\", " : "",
 		xo_printable(sing),
 		xo_printable(plural), n, xo_printable(res));
 
     return res;
 }
 #else /* HAVE_GETTEXT */
 static inline const char *
 xo_dgettext (xo_handle_t *xop UNUSED, const char *str)
 {
     return str;
 }
 
 static inline const char *
 xo_dngettext (xo_handle_t *xop UNUSED, const char *singular,
 	      const char *plural, unsigned long int n)
 {
     return (n == 1) ? singular : plural;
 }
 #endif /* HAVE_GETTEXT */
 
 /*
  * This is really _re_formatting, since the normal format code has
  * generated a beautiful string into xo_data, starting at
  * start_offset.  We need to see if it's plural, which means
  * comma-separated options, or singular.  Then we make the appropriate
  * call to d[n]gettext() to get the locale-based version.  Note that
  * both input and output of gettext() this should be UTF-8.
  */
 static ssize_t
 xo_format_gettext (xo_handle_t *xop, xo_xff_flags_t flags,
 		   ssize_t start_offset, ssize_t cols, int need_enc)
 {
     xo_buffer_t *xbp = &xop->xo_data;
 
     if (!xo_buf_has_room(xbp, 1))
 	return cols;
 
     xbp->xb_curp[0] = '\0'; /* NUL-terminate the input string */
     
     char *cp = xbp->xb_bufp + start_offset;
     ssize_t len = xbp->xb_curp - cp;
     const char *newstr = NULL;
 
     /*
      * The plural flag asks us to look backwards at the last numeric
      * value rendered and disect the string into two pieces.
      */
     if (flags & XFF_GT_PLURAL) {
 	int n = xo_buf_find_last_number(xbp, start_offset);
 	char *two = memchr(cp, (int) ',', len);
 	if (two == NULL) {
 	    xo_failure(xop, "no comma in plural gettext field: '%s'", cp);
 	    return cols;
 	}
 
 	if (two == cp) {
 	    xo_failure(xop, "nothing before comma in plural gettext "
 		       "field: '%s'", cp);
 	    return cols;
 	}
 
 	if (two == xbp->xb_curp) {
 	    xo_failure(xop, "nothing after comma in plural gettext "
 		       "field: '%s'", cp);
 	    return cols;
 	}
 
 	*two++ = '\0';
 	if (flags & XFF_GT_FIELD) {
 	    newstr = xo_dngettext(xop, cp, two, n);
 	} else {
 	    /* Don't do a gettext() look up, just get the plural form */
 	    newstr = (n == 1) ? cp : two;
 	}
 
 	/*
 	 * If we returned the first string, optimize a bit by
 	 * backing up over comma
 	 */
 	if (newstr == cp) {
 	    xbp->xb_curp = two - 1; /* One for comma */
 	    /*
 	     * If the caller wanted UTF8, we're done; nothing changed,
 	     * but we need to count the columns used.
 	     */
 	    if (need_enc == XF_ENC_UTF8)
 		return xo_count_utf8_cols(cp, xbp->xb_curp - cp);
 	}
 
     } else {
 	/* The simple case (singular) */
 	newstr = xo_dgettext(xop, cp);
 
 	if (newstr == cp) {
 	    /* If the caller wanted UTF8, we're done; nothing changed */
 	    if (need_enc == XF_ENC_UTF8)
 		return cols;
 	}
     }
 
     /*
      * Since the new string string might be in gettext's buffer or
      * in the buffer (as the plural form), we make a copy.
      */
     ssize_t nlen = strlen(newstr);
     char *newcopy = alloca(nlen + 1);
     memcpy(newcopy, newstr, nlen + 1);
 
     xbp->xb_curp = xbp->xb_bufp + start_offset; /* Reset the buffer */
     return xo_format_string_direct(xop, xbp, flags, NULL, newcopy, nlen, 0,
 				   need_enc, XF_ENC_UTF8);
 }
 
 static void
 xo_data_append_content (xo_handle_t *xop, const char *str, ssize_t len,
 			xo_xff_flags_t flags)
 {
     int cols;
     int need_enc = xo_needed_encoding(xop);
     ssize_t start_offset = xo_buf_offset(&xop->xo_data);
 
     cols = xo_format_string_direct(xop, &xop->xo_data, XFF_UNESCAPE | flags,
 				   NULL, str, len, -1,
 				   need_enc, XF_ENC_UTF8);
     if (flags & XFF_GT_FLAGS)
 	cols = xo_format_gettext(xop, flags, start_offset, cols, need_enc);
 
     if (XOF_ISSET(xop, XOF_COLUMNS))
 	xop->xo_columns += cols;
     if (XOIF_ISSET(xop, XOIF_ANCHOR))
 	xop->xo_anchor_columns += cols;
 }
 
 /**
  * Bump one of the 'width' values in a format strings (e.g. "%40.50.60s").
  * @param xfp Formatting instructions
  * @param digit Single digit (0-9) of input
  */
 static void
 xo_bump_width (xo_format_t *xfp, int digit)
 {
     int *ip = &xfp->xf_width[xfp->xf_dots];
 
     *ip = ((*ip > 0) ? *ip : 0) * 10 + digit;
 }
 
 static ssize_t
 xo_trim_ws (xo_buffer_t *xbp, ssize_t len)
 {
     char *cp, *sp, *ep;
     ssize_t delta;
 
     /* First trim leading space */
     for (cp = sp = xbp->xb_curp, ep = cp + len; cp < ep; cp++) {
 	if (*cp != ' ')
 	    break;
     }
 
     delta = cp - sp;
     if (delta) {
 	len -= delta;
 	memmove(sp, cp, len);
     }
 
     /* Then trim off the end */
     for (cp = xbp->xb_curp, sp = ep = cp + len; cp < ep; ep--) {
 	if (ep[-1] != ' ')
 	    break;
     }
 
     delta = sp - ep;
     if (delta) {
 	len -= delta;
 	cp[len] = '\0';
     }
 
     return len;
 }
 
 /*
  * Interface to format a single field.  The arguments are in xo_vap,
  * and the format is in 'fmt'.  If 'xbp' is null, we use xop->xo_data;
  * this is the most common case.
  */
 static ssize_t
 xo_do_format_field (xo_handle_t *xop, xo_buffer_t *xbp,
 		const char *fmt, ssize_t flen, xo_xff_flags_t flags)
 {
     xo_format_t xf;
     const char *cp, *ep, *sp, *xp = NULL;
     ssize_t rc, cols;
     int style = (flags & XFF_XML) ? XO_STYLE_XML : xo_style(xop);
     unsigned make_output = !(flags & XFF_NO_OUTPUT) ? 1 : 0;
     int need_enc = xo_needed_encoding(xop);
     int real_need_enc = need_enc;
     ssize_t old_cols = xop->xo_columns;
 
     /* The gettext interface is UTF-8, so we'll need that for now */
     if (flags & XFF_GT_FIELD)
 	need_enc = XF_ENC_UTF8;
 
     if (xbp == NULL)
 	xbp = &xop->xo_data;
 
     ssize_t start_offset = xo_buf_offset(xbp);
 
     for (cp = fmt, ep = fmt + flen; cp < ep; cp++) {
 	/*
 	 * Since we're starting a new field, save the starting offset.
 	 * We'll need this later for field-related operations.
 	 */
 
 	if (*cp != '%') {
 	add_one:
 	    if (xp == NULL)
 		xp = cp;
 
 	    if (*cp == '\\' && cp[1] != '\0')
 		cp += 1;
 	    continue;
 
 	} else if (cp + 1 < ep && cp[1] == '%') {
 	    cp += 1;
 	    goto add_one;
 	}
 
 	if (xp) {
 	    if (make_output) {
 		cols = xo_format_string_direct(xop, xbp, flags | XFF_UNESCAPE,
 					       NULL, xp, cp - xp, -1,
 					       need_enc, XF_ENC_UTF8);
 		if (XOF_ISSET(xop, XOF_COLUMNS))
 		    xop->xo_columns += cols;
 		if (XOIF_ISSET(xop, XOIF_ANCHOR))
 		    xop->xo_anchor_columns += cols;
 	    }
 
 	    xp = NULL;
 	}
 
 	bzero(&xf, sizeof(xf));
 	xf.xf_leading_zero = -1;
 	xf.xf_width[0] = xf.xf_width[1] = xf.xf_width[2] = -1;
 
 	/*
 	 * "%@" starts an XO-specific set of flags:
 	 *   @X@ - XML-only field; ignored if style isn't XML
 	 */
 	if (cp[1] == '@') {
 	    for (cp += 2; cp < ep; cp++) {
 		if (*cp == '@') {
 		    break;
 		}
 		if (*cp == '*') {
 		    /*
 		     * '*' means there's a "%*.*s" value in vap that
 		     * we want to ignore
 		     */
 		    if (!XOF_ISSET(xop, XOF_NO_VA_ARG))
 			va_arg(xop->xo_vap, int);
 		}
 	    }
 	}
 
 	/* Hidden fields are only visible to JSON and XML */
 	if (XOF_ISSET(xop, XFF_ENCODE_ONLY)) {
 	    if (style != XO_STYLE_XML
 		    && !xo_style_is_encoding(xop))
 		xf.xf_skip = 1;
 	} else if (XOF_ISSET(xop, XFF_DISPLAY_ONLY)) {
 	    if (style != XO_STYLE_TEXT
 		    && xo_style(xop) != XO_STYLE_HTML)
 		xf.xf_skip = 1;
 	}
 
 	if (!make_output)
 	    xf.xf_skip = 1;
 
 	/*
 	 * Looking at one piece of a format; find the end and
 	 * call snprintf.  Then advance xo_vap on our own.
 	 *
 	 * Note that 'n', 'v', and '$' are not supported.
 	 */
 	sp = cp;		/* Save start pointer */
 	for (cp += 1; cp < ep; cp++) {
 	    if (*cp == 'l')
 		xf.xf_lflag += 1;
 	    else if (*cp == 'h')
 		xf.xf_hflag += 1;
 	    else if (*cp == 'j')
 		xf.xf_jflag += 1;
 	    else if (*cp == 't')
 		xf.xf_tflag += 1;
 	    else if (*cp == 'z')
 		xf.xf_zflag += 1;
 	    else if (*cp == 'q')
 		xf.xf_qflag += 1;
 	    else if (*cp == '.') {
 		if (++xf.xf_dots >= XF_WIDTH_NUM) {
 		    xo_failure(xop, "Too many dots in format: '%s'", fmt);
 		    return -1;
 		}
 	    } else if (*cp == '-')
 		xf.xf_seen_minus = 1;
 	    else if (isdigit((int) *cp)) {
 		if (xf.xf_leading_zero < 0)
 		    xf.xf_leading_zero = (*cp == '0');
 		xo_bump_width(&xf, *cp - '0');
 	    } else if (*cp == '*') {
 		xf.xf_stars += 1;
 		xf.xf_star[xf.xf_dots] = 1;
 	    } else if (strchr("diouxXDOUeEfFgGaAcCsSpm", *cp) != NULL)
 		break;
 	    else if (*cp == 'n' || *cp == 'v') {
 		xo_failure(xop, "unsupported format: '%s'", fmt);
 		return -1;
 	    }
 	}
 
 	if (cp == ep)
 	    xo_failure(xop, "field format missing format character: %s",
 			  fmt);
 
 	xf.xf_fc = *cp;
 
 	if (!XOF_ISSET(xop, XOF_NO_VA_ARG)) {
 	    if (*cp == 's' || *cp == 'S') {
 		/* Handle "%*.*.*s" */
 		int s;
 		for (s = 0; s < XF_WIDTH_NUM; s++) {
 		    if (xf.xf_star[s]) {
 			xf.xf_width[s] = va_arg(xop->xo_vap, int);
 			
 			/* Normalize a negative width value */
 			if (xf.xf_width[s] < 0) {
 			    if (s == 0) {
 				xf.xf_width[0] = -xf.xf_width[0];
 				xf.xf_seen_minus = 1;
 			    } else
 				xf.xf_width[s] = -1; /* Ignore negative values */
 			}
 		    }
 		}
 	    }
 	}
 
 	/* If no max is given, it defaults to size */
 	if (xf.xf_width[XF_WIDTH_MAX] < 0 && xf.xf_width[XF_WIDTH_SIZE] >= 0)
 	    xf.xf_width[XF_WIDTH_MAX] = xf.xf_width[XF_WIDTH_SIZE];
 
 	if (xf.xf_fc == 'D' || xf.xf_fc == 'O' || xf.xf_fc == 'U')
 	    xf.xf_lflag = 1;
 
 	if (!xf.xf_skip) {
 	    xo_buffer_t *fbp = &xop->xo_fmt;
 	    ssize_t len = cp - sp + 1;
 	    if (!xo_buf_has_room(fbp, len + 1))
 		return -1;
 
 	    char *newfmt = fbp->xb_curp;
 	    memcpy(newfmt, sp, len);
 	    newfmt[0] = '%';	/* If we skipped over a "%@...@s" format */
 	    newfmt[len] = '\0';
 
 	    /*
 	     * Bad news: our strings are UTF-8, but the stock printf
 	     * functions won't handle field widths for wide characters
 	     * correctly.  So we have to handle this ourselves.
 	     */
 	    if (xop->xo_formatter == NULL
 		    && (xf.xf_fc == 's' || xf.xf_fc == 'S'
 			|| xf.xf_fc == 'm')) {
 
 		xf.xf_enc = (xf.xf_fc == 'm') ? XF_ENC_UTF8
 		    : (xf.xf_lflag || (xf.xf_fc == 'S')) ? XF_ENC_WIDE
 		    : xf.xf_hflag ? XF_ENC_LOCALE : XF_ENC_UTF8;
 
 		rc = xo_format_string(xop, xbp, flags, &xf);
 
 		if ((flags & XFF_TRIM_WS) && xo_style_is_encoding(xop))
 		    rc = xo_trim_ws(xbp, rc);
 
 	    } else {
 		ssize_t columns = rc = xo_vsnprintf(xop, xbp, newfmt,
 						    xop->xo_vap);
 
 		if (rc > 0) {
 		    /*
 		     * For XML and HTML, we need "&<>" processing; for JSON,
 		     * it's quotes.  Text gets nothing.
 		     */
 		    switch (style) {
 		    case XO_STYLE_XML:
 			if (flags & XFF_TRIM_WS)
 			    columns = rc = xo_trim_ws(xbp, rc);
 			/* FALLTHRU */
 		    case XO_STYLE_HTML:
 			rc = xo_escape_xml(xbp, rc, (flags & XFF_ATTR));
 			break;
 
 		    case XO_STYLE_JSON:
 			if (flags & XFF_TRIM_WS)
 			    columns = rc = xo_trim_ws(xbp, rc);
 			rc = xo_escape_json(xbp, rc, 0);
 			break;
 
 		    case XO_STYLE_SDPARAMS:
 			if (flags & XFF_TRIM_WS)
 			    columns = rc = xo_trim_ws(xbp, rc);
 			rc = xo_escape_sdparams(xbp, rc, 0);
 			break;
 
 		    case XO_STYLE_ENCODER:
 			if (flags & XFF_TRIM_WS)
 			    columns = rc = xo_trim_ws(xbp, rc);
 			break;
 		    }
 
 		    /*
 		     * We can assume all the non-%s data we've
 		     * added is ASCII, so the columns and bytes are the
 		     * same.  xo_format_string handles all the fancy
 		     * string conversions and updates xo_anchor_columns
 		     * accordingly.
 		     */
 		    if (XOF_ISSET(xop, XOF_COLUMNS))
 			xop->xo_columns += columns;
 		    if (XOIF_ISSET(xop, XOIF_ANCHOR))
 			xop->xo_anchor_columns += columns;
 		}
 	    }
 
 	    if (rc > 0)
 		xbp->xb_curp += rc;
 	}
 
 	/*
 	 * Now for the tricky part: we need to move the argument pointer
 	 * along by the amount needed.
 	 */
 	if (!XOF_ISSET(xop, XOF_NO_VA_ARG)) {
 
 	    if (xf.xf_fc == 's' ||xf.xf_fc == 'S') {
 		/*
 		 * The 'S' and 's' formats are normally handled in
 		 * xo_format_string, but if we skipped it, then we
 		 * need to pop it.
 		 */
 		if (xf.xf_skip)
 		    va_arg(xop->xo_vap, char *);
 
 	    } else if (xf.xf_fc == 'm') {
 		/* Nothing on the stack for "%m" */
 
 	    } else {
 		int s;
 		for (s = 0; s < XF_WIDTH_NUM; s++) {
 		    if (xf.xf_star[s])
 			va_arg(xop->xo_vap, int);
 		}
 
 		if (strchr("diouxXDOU", xf.xf_fc) != NULL) {
 		    if (xf.xf_hflag > 1) {
 			va_arg(xop->xo_vap, int);
 
 		    } else if (xf.xf_hflag > 0) {
 			va_arg(xop->xo_vap, int);
 
 		    } else if (xf.xf_lflag > 1) {
 			va_arg(xop->xo_vap, unsigned long long);
 
 		    } else if (xf.xf_lflag > 0) {
 			va_arg(xop->xo_vap, unsigned long);
 
 		    } else if (xf.xf_jflag > 0) {
 			va_arg(xop->xo_vap, intmax_t);
 
 		    } else if (xf.xf_tflag > 0) {
 			va_arg(xop->xo_vap, ptrdiff_t);
 
 		    } else if (xf.xf_zflag > 0) {
 			va_arg(xop->xo_vap, size_t);
 
 		    } else if (xf.xf_qflag > 0) {
 			va_arg(xop->xo_vap, quad_t);
 
 		    } else {
 			va_arg(xop->xo_vap, int);
 		    }
 		} else if (strchr("eEfFgGaA", xf.xf_fc) != NULL)
 		    if (xf.xf_lflag)
 			va_arg(xop->xo_vap, long double);
 		    else
 			va_arg(xop->xo_vap, double);
 
 		else if (xf.xf_fc == 'C' || (xf.xf_fc == 'c' && xf.xf_lflag))
 		    va_arg(xop->xo_vap, wint_t);
 
 		else if (xf.xf_fc == 'c')
 		    va_arg(xop->xo_vap, int);
 
 		else if (xf.xf_fc == 'p')
 		    va_arg(xop->xo_vap, void *);
 	    }
 	}
     }
 
     if (xp) {
 	if (make_output) {
 	    cols = xo_format_string_direct(xop, xbp, flags | XFF_UNESCAPE,
 					   NULL, xp, cp - xp, -1,
 					   need_enc, XF_ENC_UTF8);
 
 	    if (XOF_ISSET(xop, XOF_COLUMNS))
 		xop->xo_columns += cols;
 	    if (XOIF_ISSET(xop, XOIF_ANCHOR))
 		xop->xo_anchor_columns += cols;
 	}
 
 	xp = NULL;
     }
 
     if (flags & XFF_GT_FLAGS) {
 	/*
 	 * Handle gettext()ing the field by looking up the value
 	 * and then copying it in, while converting to locale, if
 	 * needed.
 	 */
 	ssize_t new_cols = xo_format_gettext(xop, flags, start_offset,
 					 old_cols, real_need_enc);
 	
 	if (XOF_ISSET(xop, XOF_COLUMNS))
 	    xop->xo_columns += new_cols - old_cols;
 	if (XOIF_ISSET(xop, XOIF_ANCHOR))
 	    xop->xo_anchor_columns += new_cols - old_cols;
     }
 
     return 0;
 }
 
 /*
  * Remove any numeric precision/width format from the format string by
  * inserting the "%" after the [0-9]+, returning the substring.
  */
 static char *
 xo_fix_encoding (xo_handle_t *xop UNUSED, char *encoding)
 {
     char *cp = encoding;
 
     if (cp[0] != '%' || !isdigit((int) cp[1]))
 	return encoding;
 
     for (cp += 2; *cp; cp++) {
 	if (!isdigit((int) *cp))
 	    break;
     }
 
     *--cp = '%';		/* Back off and insert the '%' */
 
     return cp;
 }
 
 static void
 xo_color_append_html (xo_handle_t *xop)
 {
     /*
      * If the color buffer has content, we add it now.  It's already
      * prebuilt and ready, since we want to add it to every <div>.
      */
     if (!xo_buf_is_empty(&xop->xo_color_buf)) {
 	xo_buffer_t *xbp = &xop->xo_color_buf;
 
 	xo_data_append(xop, xbp->xb_bufp, xbp->xb_curp - xbp->xb_bufp);
     }
 }
 
 /*
  * A wrapper for humanize_number that autoscales, since the
  * HN_AUTOSCALE flag scales as needed based on the size of
  * the output buffer, not the size of the value.  I also
  * wish HN_DECIMAL was more imperative, without the <10
  * test.  But the boat only goes where we want when we hold
  * the rudder, so xo_humanize fixes part of the problem.
  */
 static ssize_t
 xo_humanize (char *buf, ssize_t len, uint64_t value, int flags)
 {
     int scale = 0;
 
     if (value) {
 	uint64_t left = value;
 
 	if (flags & HN_DIVISOR_1000) {
 	    for ( ; left; scale++)
 		left /= 1000;
 	} else {
 	    for ( ; left; scale++)
 		left /= 1024;
 	}
 	scale -= 1;
     }
     
     return xo_humanize_number(buf, len, value, "", scale, flags);
 }
 
 /*
  * This is an area where we can save information from the handle for
  * later restoration.  We need to know what data was rendered to know
  * what needs cleaned up.
  */
 typedef struct xo_humanize_save_s {
     ssize_t xhs_offset;		/* Saved xo_offset */
     ssize_t xhs_columns;	/* Saved xo_columns */
     ssize_t xhs_anchor_columns; /* Saved xo_anchor_columns */
 } xo_humanize_save_t;
 
 /*
  * Format a "humanized" value for a numeric, meaning something nice
  * like "44M" instead of "44470272".  We autoscale, choosing the
  * most appropriate value for K/M/G/T/P/E based on the value given.
  */
 static void
 xo_format_humanize (xo_handle_t *xop, xo_buffer_t *xbp,
 		    xo_humanize_save_t *savep, xo_xff_flags_t flags)
 {
     if (XOF_ISSET(xop, XOF_NO_HUMANIZE))
 	return;
 
     ssize_t end_offset = xbp->xb_curp - xbp->xb_bufp;
     if (end_offset == savep->xhs_offset) /* Huh? Nothing to render */
 	return;
 
     /*
      * We have a string that's allegedly a number. We want to
      * humanize it, which means turning it back into a number
      * and calling xo_humanize_number on it.
      */
     uint64_t value;
     char *ep;
 
     xo_buf_append(xbp, "", 1); /* NUL-terminate it */
 
     value = strtoull(xbp->xb_bufp + savep->xhs_offset, &ep, 0);
     if (!(value == ULLONG_MAX && errno == ERANGE)
 	&& (ep != xbp->xb_bufp + savep->xhs_offset)) {
 	/*
 	 * There are few values where humanize_number needs
 	 * more bytes than the original value.  I've used
 	 * 10 as a rectal number to cover those scenarios.
 	 */
 	if (xo_buf_has_room(xbp, 10)) {
 	    xbp->xb_curp = xbp->xb_bufp + savep->xhs_offset;
 
 	    ssize_t rc;
 	    ssize_t left = (xbp->xb_bufp + xbp->xb_size) - xbp->xb_curp;
 	    int hn_flags = HN_NOSPACE; /* On by default */
 
 	    if (flags & XFF_HN_SPACE)
 		hn_flags &= ~HN_NOSPACE;
 
 	    if (flags & XFF_HN_DECIMAL)
 		hn_flags |= HN_DECIMAL;
 
 	    if (flags & XFF_HN_1000)
 		hn_flags |= HN_DIVISOR_1000;
 
 	    rc = xo_humanize(xbp->xb_curp, left, value, hn_flags);
 	    if (rc > 0) {
 		xbp->xb_curp += rc;
 		xop->xo_columns = savep->xhs_columns + rc;
 		xop->xo_anchor_columns = savep->xhs_anchor_columns + rc;
 	    }
 	}
     }
 }
 
 /*
  * Convenience function that either append a fixed value (if one is
  * given) or formats a field using a format string.  If it's
  * encode_only, then we can't skip formatting the field, since it may
  * be pulling arguments off the stack.
  */
 static inline void
 xo_simple_field (xo_handle_t *xop, unsigned encode_only,
 		      const char *value, ssize_t vlen,
 		      const char *fmt, ssize_t flen, xo_xff_flags_t flags)
 {
     if (encode_only)
 	flags |= XFF_NO_OUTPUT;
 
     if (vlen == 0)
 	xo_do_format_field(xop, NULL, fmt, flen, flags);
     else if (!encode_only)
 	xo_data_append_content(xop, value, vlen, flags);
 }
 
 /*
  * Html mode: append a <div> to the output buffer contain a field
  * along with all the supporting information indicated by the flags.
  */
 static void
 xo_buf_append_div (xo_handle_t *xop, const char *class, xo_xff_flags_t flags,
 		   const char *name, ssize_t nlen,
 		   const char *value, ssize_t vlen,
 		   const char *fmt, ssize_t flen,
 		   const char *encoding, ssize_t elen)
 {
     static char div_start[] = "<div class=\"";
     static char div_tag[] = "\" data-tag=\"";
     static char div_xpath[] = "\" data-xpath=\"";
     static char div_key[] = "\" data-key=\"key";
     static char div_end[] = "\">";
     static char div_close[] = "</div>";
 
     /* The encoding format defaults to the normal format */
     if (encoding == NULL && fmt != NULL) {
 	char *enc  = alloca(flen + 1);
 	memcpy(enc, fmt, flen);
 	enc[flen] = '\0';
 	encoding = xo_fix_encoding(xop, enc);
 	elen = strlen(encoding);
     }
 
     /*
      * To build our XPath predicate, we need to save the va_list before
      * we format our data, and then restore it before we format the
      * xpath expression.
      * Display-only keys implies that we've got an encode-only key
      * elsewhere, so we don't use them from making predicates.
      */
     int need_predidate = 
 	(name && (flags & XFF_KEY) && !(flags & XFF_DISPLAY_ONLY)
 	 && XOF_ISSET(xop, XOF_XPATH)) ? 1 : 0;
 
     if (need_predidate) {
 	va_list va_local;
 
 	va_copy(va_local, xop->xo_vap);
 	if (xop->xo_checkpointer)
 	    xop->xo_checkpointer(xop, xop->xo_vap, 0);
 
 	/*
 	 * Build an XPath predicate expression to match this key.
 	 * We use the format buffer.
 	 */
 	xo_buffer_t *pbp = &xop->xo_predicate;
 	pbp->xb_curp = pbp->xb_bufp; /* Restart buffer */
 
 	xo_buf_append(pbp, "[", 1);
 	xo_buf_escape(xop, pbp, name, nlen, 0);
 	if (XOF_ISSET(xop, XOF_PRETTY))
 	    xo_buf_append(pbp, " = '", 4);
 	else
 	    xo_buf_append(pbp, "='", 2);
 
 	xo_xff_flags_t pflags = flags | XFF_XML | XFF_ATTR;
 	pflags &= ~(XFF_NO_OUTPUT | XFF_ENCODE_ONLY);
 	xo_do_format_field(xop, pbp, encoding, elen, pflags);
 
 	xo_buf_append(pbp, "']", 2);
 
 	/* Now we record this predicate expression in the stack */
 	xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
 	ssize_t olen = xsp->xs_keys ? strlen(xsp->xs_keys) : 0;
 	ssize_t dlen = pbp->xb_curp - pbp->xb_bufp;
 
 	char *cp = xo_realloc(xsp->xs_keys, olen + dlen + 1);
 	if (cp) {
 	    memcpy(cp + olen, pbp->xb_bufp, dlen);
 	    cp[olen + dlen] = '\0';
 	    xsp->xs_keys = cp;
 	}
 
 	/* Now we reset the xo_vap as if we were never here */
 	va_end(xop->xo_vap);
 	va_copy(xop->xo_vap, va_local);
 	va_end(va_local);
 	if (xop->xo_checkpointer)
 	    xop->xo_checkpointer(xop, xop->xo_vap, 1);
     }
 
     if (flags & XFF_ENCODE_ONLY) {
 	/*
 	 * Even if this is encode-only, we need to go through the
 	 * work of formatting it to make sure the args are cleared
 	 * from xo_vap.  This is not true when vlen is zero, since
 	 * that means our "value" isn't on the stack.
 	 */
 	xo_simple_field(xop, TRUE, NULL, 0, encoding, elen, flags);
 	return;
     }
 
     xo_line_ensure_open(xop, 0);
 
     if (XOF_ISSET(xop, XOF_PRETTY))
 	xo_buf_indent(xop, xop->xo_indent_by);
 
     xo_data_append(xop, div_start, sizeof(div_start) - 1);
     xo_data_append(xop, class, strlen(class));
 
     /*
      * If the color buffer has content, we add it now.  It's already
      * prebuilt and ready, since we want to add it to every <div>.
      */
     if (!xo_buf_is_empty(&xop->xo_color_buf)) {
 	xo_buffer_t *xbp = &xop->xo_color_buf;
 
 	xo_data_append(xop, xbp->xb_bufp, xbp->xb_curp - xbp->xb_bufp);
     }
 
     if (name) {
 	xo_data_append(xop, div_tag, sizeof(div_tag) - 1);
 	xo_data_escape(xop, name, nlen);
 
 	/*
 	 * Save the offset at which we'd place units.  See xo_format_units.
 	 */
 	if (XOF_ISSET(xop, XOF_UNITS)) {
 	    XOIF_SET(xop, XOIF_UNITS_PENDING);
 	    /*
 	     * Note: We need the '+1' here because we know we've not
 	     * added the closing quote.  We add one, knowing the quote
 	     * will be added shortly.
 	     */
 	    xop->xo_units_offset =
 		xop->xo_data.xb_curp -xop->xo_data.xb_bufp + 1;
 	}
 
 	if (XOF_ISSET(xop, XOF_XPATH)) {
 	    int i;
 	    xo_stack_t *xsp;
 
 	    xo_data_append(xop, div_xpath, sizeof(div_xpath) - 1);
 	    if (xop->xo_leading_xpath)
 		xo_data_append(xop, xop->xo_leading_xpath,
 			       strlen(xop->xo_leading_xpath));
 
 	    for (i = 0; i <= xop->xo_depth; i++) {
 		xsp = &xop->xo_stack[i];
 		if (xsp->xs_name == NULL)
 		    continue;
 
 		/*
 		 * XSS_OPEN_LIST and XSS_OPEN_LEAF_LIST stack frames
 		 * are directly under XSS_OPEN_INSTANCE frames so we
 		 * don't need to put these in our XPath expressions.
 		 */
 		if (xsp->xs_state == XSS_OPEN_LIST
 			|| xsp->xs_state == XSS_OPEN_LEAF_LIST)
 		    continue;
 
 		xo_data_append(xop, "/", 1);
 		xo_data_escape(xop, xsp->xs_name, strlen(xsp->xs_name));
 		if (xsp->xs_keys) {
 		    /* Don't show keys for the key field */
 		    if (i != xop->xo_depth || !(flags & XFF_KEY))
 			xo_data_append(xop, xsp->xs_keys, strlen(xsp->xs_keys));
 		}
 	    }
 
 	    xo_data_append(xop, "/", 1);
 	    xo_data_escape(xop, name, nlen);
 	}
 
 	if (XOF_ISSET(xop, XOF_INFO) && xop->xo_info) {
 	    static char in_type[] = "\" data-type=\"";
 	    static char in_help[] = "\" data-help=\"";
 
 	    xo_info_t *xip = xo_info_find(xop, name, nlen);
 	    if (xip) {
 		if (xip->xi_type) {
 		    xo_data_append(xop, in_type, sizeof(in_type) - 1);
 		    xo_data_escape(xop, xip->xi_type, strlen(xip->xi_type));
 		}
 		if (xip->xi_help) {
 		    xo_data_append(xop, in_help, sizeof(in_help) - 1);
 		    xo_data_escape(xop, xip->xi_help, strlen(xip->xi_help));
 		}
 	    }
 	}
 
 	if ((flags & XFF_KEY) && XOF_ISSET(xop, XOF_KEYS))
 	    xo_data_append(xop, div_key, sizeof(div_key) - 1);
     }
 
     xo_buffer_t *xbp = &xop->xo_data;
     ssize_t base_offset = xbp->xb_curp - xbp->xb_bufp;
 
     xo_data_append(xop, div_end, sizeof(div_end) - 1);
 
     xo_humanize_save_t save;	/* Save values for humanizing logic */
 
     save.xhs_offset = xbp->xb_curp - xbp->xb_bufp;
     save.xhs_columns = xop->xo_columns;
     save.xhs_anchor_columns = xop->xo_anchor_columns;
 
     xo_simple_field(xop, FALSE, value, vlen, fmt, flen, flags);
 
     if (flags & XFF_HUMANIZE) {
 	/*
 	 * Unlike text style, we want to retain the original value and
 	 * stuff it into the "data-number" attribute.
 	 */
 	static const char div_number[] = "\" data-number=\"";
 	ssize_t div_len = sizeof(div_number) - 1;
 
 	ssize_t end_offset = xbp->xb_curp - xbp->xb_bufp;
 	ssize_t olen = end_offset - save.xhs_offset;
 
 	char *cp = alloca(olen + 1);
 	memcpy(cp, xbp->xb_bufp + save.xhs_offset, olen);
 	cp[olen] = '\0';
 
 	xo_format_humanize(xop, xbp, &save, flags);
 
 	if (xo_buf_has_room(xbp, div_len + olen)) {
 	    ssize_t new_offset = xbp->xb_curp - xbp->xb_bufp;
 
 
 	    /* Move the humanized string off to the left */
 	    memmove(xbp->xb_bufp + base_offset + div_len + olen,
 		    xbp->xb_bufp + base_offset, new_offset - base_offset);
 
 	    /* Copy the data_number attribute name */
 	    memcpy(xbp->xb_bufp + base_offset, div_number, div_len);
 
 	    /* Copy the original long value */
 	    memcpy(xbp->xb_bufp + base_offset + div_len, cp, olen);
 	    xbp->xb_curp += div_len + olen;
 	}
     }
 
     xo_data_append(xop, div_close, sizeof(div_close) - 1);
 
     if (XOF_ISSET(xop, XOF_PRETTY))
 	xo_data_append(xop, "\n", 1);
 }
 
 static void
 xo_format_text (xo_handle_t *xop, const char *str, ssize_t len)
 {
     switch (xo_style(xop)) {
     case XO_STYLE_TEXT:
 	xo_buf_append_locale(xop, &xop->xo_data, str, len);
 	break;
 
     case XO_STYLE_HTML:
 	xo_buf_append_div(xop, "text", 0, NULL, 0, str, len, NULL, 0, NULL, 0);
 	break;
     }
 }
 
 static void
 xo_format_title (xo_handle_t *xop, xo_field_info_t *xfip,
 		 const char *value, ssize_t vlen)
 {
     const char *fmt = xfip->xfi_format;
     ssize_t flen = xfip->xfi_flen;
     xo_xff_flags_t flags = xfip->xfi_flags;
 
     static char div_open[] = "<div class=\"title";
     static char div_middle[] = "\">";
     static char div_close[] = "</div>";
 
     if (flen == 0) {
 	fmt = "%s";
 	flen = 2;
     }
 
     switch (xo_style(xop)) {
     case XO_STYLE_XML:
     case XO_STYLE_JSON:
     case XO_STYLE_SDPARAMS:
     case XO_STYLE_ENCODER:
 	/*
 	 * Even though we don't care about text, we need to do
 	 * enough parsing work to skip over the right bits of xo_vap.
 	 */
 	xo_simple_field(xop, TRUE, value, vlen, fmt, flen, flags);
 	return;
     }
 
     xo_buffer_t *xbp = &xop->xo_data;
     ssize_t start = xbp->xb_curp - xbp->xb_bufp;
     ssize_t left = xbp->xb_size - start;
     ssize_t rc;
 
     if (xo_style(xop) == XO_STYLE_HTML) {
 	xo_line_ensure_open(xop, 0);
 	if (XOF_ISSET(xop, XOF_PRETTY))
 	    xo_buf_indent(xop, xop->xo_indent_by);
 	xo_buf_append(&xop->xo_data, div_open, sizeof(div_open) - 1);
 	xo_color_append_html(xop);
 	xo_buf_append(&xop->xo_data, div_middle, sizeof(div_middle) - 1);
     }
 
     start = xbp->xb_curp - xbp->xb_bufp; /* Reset start */
     if (vlen) {
 	char *newfmt = alloca(flen + 1);
 	memcpy(newfmt, fmt, flen);
 	newfmt[flen] = '\0';
 
 	/* If len is non-zero, the format string apply to the name */
 	char *newstr = alloca(vlen + 1);
 	memcpy(newstr, value, vlen);
 	newstr[vlen] = '\0';
 
 	if (newstr[vlen - 1] == 's') {
 	    char *bp;
 
 	    rc = snprintf(NULL, 0, newfmt, newstr);
 	    if (rc > 0) {
 		/*
 		 * We have to do this the hard way, since we might need
 		 * the columns.
 		 */
 		bp = alloca(rc + 1);
 		rc = snprintf(bp, rc + 1, newfmt, newstr);
 
 		xo_data_append_content(xop, bp, rc, flags);
 	    }
 	    goto move_along;
 
 	} else {
 	    rc = snprintf(xbp->xb_curp, left, newfmt, newstr);
 	    if (rc >= left) {
 		if (!xo_buf_has_room(xbp, rc))
 		    return;
 		left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
 		rc = snprintf(xbp->xb_curp, left, newfmt, newstr);
 	    }
 
 	    if (rc > 0) {
 		if (XOF_ISSET(xop, XOF_COLUMNS))
 		    xop->xo_columns += rc;
 		if (XOIF_ISSET(xop, XOIF_ANCHOR))
 		    xop->xo_anchor_columns += rc;
 	    }
 	}
 
     } else {
 	xo_do_format_field(xop, NULL, fmt, flen, flags);
 
 	/* xo_do_format_field moved curp, so we need to reset it */
 	rc = xbp->xb_curp - (xbp->xb_bufp + start);
 	xbp->xb_curp = xbp->xb_bufp + start;
     }
 
     /* If we're styling HTML, then we need to escape it */
     if (xo_style(xop) == XO_STYLE_HTML) {
 	rc = xo_escape_xml(xbp, rc, 0);
     }
 
     if (rc > 0)
 	xbp->xb_curp += rc;
 
  move_along:
     if (xo_style(xop) == XO_STYLE_HTML) {
 	xo_data_append(xop, div_close, sizeof(div_close) - 1);
 	if (XOF_ISSET(xop, XOF_PRETTY))
 	    xo_data_append(xop, "\n", 1);
     }
 }
 
 /*
  * strspn() with a string length
  */
 static ssize_t
 xo_strnspn (const char *str, size_t len,  const char *accept)
 {
     ssize_t i;
     const char *cp, *ep;
 
     for (i = 0, cp = str, ep = str + len; cp < ep && *cp != '\0'; i++, cp++) {
 	if (strchr(accept, *cp) == NULL)
 	    break;
     }
 
     return i;
 }
 
 /*
  * Decide if a format string should be considered "numeric",
  * in the sense that the number does not need to be quoted.
  * This means that it consists only of a single numeric field
  * with nothing exotic or "interesting".  This means that
  * static values are never considered numeric.
  */
 static int
 xo_format_is_numeric (const char *fmt, ssize_t flen)
 {
     if (flen <= 0 || *fmt++ != '%') /* Must start with '%' */
 	return FALSE;
     flen -= 1;
 
     /* Handle leading flags; don't want "#" since JSON can't handle hex */
     ssize_t spn = xo_strnspn(fmt, flen, "0123456789.*+ -");
     if (spn >= flen)
 	return FALSE;
 
     fmt += spn;			/* Move along the input string */
     flen -= spn;
 
     /* Handle the length modifiers */
     spn = xo_strnspn(fmt, flen, "hljtqz");
     if (spn >= flen)
 	return FALSE;
 
     fmt += spn;			/* Move along the input string */
     flen -= spn;
 
     if (flen != 1)		/* Should only be one character left */
 	return FALSE;
 
     return (strchr("diouDOUeEfFgG", *fmt) == NULL) ? FALSE : TRUE;
 }
 
 /*
  * Update the stack flags using the object flags, allowing callers
  * to monkey with the stack flags without even knowing they exist.
  */
 static void
 xo_stack_set_flags (xo_handle_t *xop)
 {
     if (XOF_ISSET(xop, XOF_NOT_FIRST)) {
 	xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
 
 	xsp->xs_flags |= XSF_NOT_FIRST;
 	XOF_CLEAR(xop, XOF_NOT_FIRST);
     }
 }
 
 static void
 xo_format_prep (xo_handle_t *xop, xo_xff_flags_t flags)
 {
     if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST) {
 	xo_data_append(xop, ",", 1);
 	if (!(flags & XFF_LEAF_LIST) && XOF_ISSET(xop, XOF_PRETTY))
 	    xo_data_append(xop, "\n", 1);
     } else
 	xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
 }
 
 #if 0
 /* Useful debugging function */
 void
 xo_arg (xo_handle_t *xop);
 void
 xo_arg (xo_handle_t *xop)
 {
     xop = xo_default(xop);
     fprintf(stderr, "0x%x", va_arg(xop->xo_vap, unsigned));
 }
 #endif /* 0 */
 
 static void
 xo_format_value (xo_handle_t *xop, const char *name, ssize_t nlen,
 		 const char *value, ssize_t vlen,
 		 const char *fmt, ssize_t flen,
 		 const char *encoding, ssize_t elen, xo_xff_flags_t flags)
 {
     int pretty = XOF_ISSET(xop, XOF_PRETTY);
     int quote;
 
     /*
      * Before we emit a value, we need to know that the frame is ready.
      */
     xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
 
     if (flags & XFF_LEAF_LIST) {
 	/*
 	 * Check if we've already started to emit normal leafs
 	 * or if we're not in a leaf list.
 	 */
 	if ((xsp->xs_flags & (XSF_EMIT | XSF_EMIT_KEY))
 	    || !(xsp->xs_flags & XSF_EMIT_LEAF_LIST)) {
 	    char nbuf[nlen + 1];
 	    memcpy(nbuf, name, nlen);
 	    nbuf[nlen] = '\0';
 
 	    ssize_t rc = xo_transition(xop, 0, nbuf, XSS_EMIT_LEAF_LIST);
 	    if (rc < 0)
 		flags |= XFF_DISPLAY_ONLY | XFF_ENCODE_ONLY;
 	    else
 		xop->xo_stack[xop->xo_depth].xs_flags |= XSF_EMIT_LEAF_LIST;
 	}
 
 	xsp = &xop->xo_stack[xop->xo_depth];
 	if (xsp->xs_name) {
 	    name = xsp->xs_name;
 	    nlen = strlen(name);
 	}
 
     } else if (flags & XFF_KEY) {
 	/* Emitting a 'k' (key) field */
 	if ((xsp->xs_flags & XSF_EMIT) && !(flags & XFF_DISPLAY_ONLY)) {
 	    xo_failure(xop, "key field emitted after normal value field: '%.*s'",
 		       nlen, name);
 
 	} else if (!(xsp->xs_flags & XSF_EMIT_KEY)) {
 	    char nbuf[nlen + 1];
 	    memcpy(nbuf, name, nlen);
 	    nbuf[nlen] = '\0';
 
 	    ssize_t rc = xo_transition(xop, 0, nbuf, XSS_EMIT);
 	    if (rc < 0)
 		flags |= XFF_DISPLAY_ONLY | XFF_ENCODE_ONLY;
 	    else
 		xop->xo_stack[xop->xo_depth].xs_flags |= XSF_EMIT_KEY;
 
 	    xsp = &xop->xo_stack[xop->xo_depth];
 	    xsp->xs_flags |= XSF_EMIT_KEY;
 	}
 
     } else {
 	/* Emitting a normal value field */
 	if ((xsp->xs_flags & XSF_EMIT_LEAF_LIST)
 	    || !(xsp->xs_flags & XSF_EMIT)) {
 	    char nbuf[nlen + 1];
 	    memcpy(nbuf, name, nlen);
 	    nbuf[nlen] = '\0';
 
 	    ssize_t rc = xo_transition(xop, 0, nbuf, XSS_EMIT);
 	    if (rc < 0)
 		flags |= XFF_DISPLAY_ONLY | XFF_ENCODE_ONLY;
 	    else
 		xop->xo_stack[xop->xo_depth].xs_flags |= XSF_EMIT;
 
 	    xsp = &xop->xo_stack[xop->xo_depth];
 	    xsp->xs_flags |= XSF_EMIT;
 	}
     }
 
     xo_buffer_t *xbp = &xop->xo_data;
     xo_humanize_save_t save;	/* Save values for humanizing logic */
 
     const char *leader = xo_xml_leader_len(xop, name, nlen);
 
     switch (xo_style(xop)) {
     case XO_STYLE_TEXT:
 	if (flags & XFF_ENCODE_ONLY)
 	    flags |= XFF_NO_OUTPUT;
 
 	save.xhs_offset = xbp->xb_curp - xbp->xb_bufp;
 	save.xhs_columns = xop->xo_columns;
 	save.xhs_anchor_columns = xop->xo_anchor_columns;
 
 	xo_simple_field(xop, FALSE, value, vlen, fmt, flen, flags);
 
 	if (flags & XFF_HUMANIZE)
 	    xo_format_humanize(xop, xbp, &save, flags);
 	break;
 
     case XO_STYLE_HTML:
 	if (flags & XFF_ENCODE_ONLY)
 	    flags |= XFF_NO_OUTPUT;
 
 	xo_buf_append_div(xop, "data", flags, name, nlen, value, vlen,
 			  fmt, flen, encoding, elen);
 	break;
 
     case XO_STYLE_XML:
 	/*
 	 * Even though we're not making output, we still need to
 	 * let the formatting code handle the va_arg popping.
 	 */
 	if (flags & XFF_DISPLAY_ONLY) {
 	    xo_simple_field(xop, TRUE, value, vlen, fmt, flen, flags);
 	    break;
 	}
 
 	if (encoding) {
    	    fmt = encoding;
 	    flen = elen;
 	} else {
 	    char *enc  = alloca(flen + 1);
 	    memcpy(enc, fmt, flen);
 	    enc[flen] = '\0';
 	    fmt = xo_fix_encoding(xop, enc);
 	    flen = strlen(fmt);
 	}
 
 	if (nlen == 0) {
 	    static char missing[] = "missing-field-name";
 	    xo_failure(xop, "missing field name: %s", fmt);
 	    name = missing;
 	    nlen = sizeof(missing) - 1;
 	}
 
 	if (pretty)
 	    xo_buf_indent(xop, -1);
 	xo_data_append(xop, "<", 1);
         if (*leader)
             xo_data_append(xop, leader, 1);
 	xo_data_escape(xop, name, nlen);
 
 	if (xop->xo_attrs.xb_curp != xop->xo_attrs.xb_bufp) {
 	    xo_data_append(xop, xop->xo_attrs.xb_bufp,
 			   xop->xo_attrs.xb_curp - xop->xo_attrs.xb_bufp);
 	    xop->xo_attrs.xb_curp = xop->xo_attrs.xb_bufp;
 	}
 
 	/*
 	 * We indicate 'key' fields using the 'key' attribute.  While
 	 * this is really committing the crime of mixing meta-data with
 	 * data, it's often useful.  Especially when format meta-data is
 	 * difficult to come by.
 	 */
 	if ((flags & XFF_KEY) && XOF_ISSET(xop, XOF_KEYS)) {
 	    static char attr[] = " key=\"key\"";
 	    xo_data_append(xop, attr, sizeof(attr) - 1);
 	}
 
 	/*
 	 * Save the offset at which we'd place units.  See xo_format_units.
 	 */
 	if (XOF_ISSET(xop, XOF_UNITS)) {
 	    XOIF_SET(xop, XOIF_UNITS_PENDING);
 	    xop->xo_units_offset = xop->xo_data.xb_curp -xop->xo_data.xb_bufp;
 	}
 
 	xo_data_append(xop, ">", 1);
 
 	xo_simple_field(xop, FALSE, value, vlen, fmt, flen, flags);
 
 	xo_data_append(xop, "</", 2);
         if (*leader)
             xo_data_append(xop, leader, 1);
 	xo_data_escape(xop, name, nlen);
 	xo_data_append(xop, ">", 1);
 	if (pretty)
 	    xo_data_append(xop, "\n", 1);
 	break;
 
     case XO_STYLE_JSON:
 	if (flags & XFF_DISPLAY_ONLY) {
 	    xo_simple_field(xop, TRUE, value, vlen, fmt, flen, flags);
 	    break;
 	}
 
 	if (encoding) {
 	    fmt = encoding;
 	    flen = elen;
 	} else {
 	    char *enc  = alloca(flen + 1);
 	    memcpy(enc, fmt, flen);
 	    enc[flen] = '\0';
 	    fmt = xo_fix_encoding(xop, enc);
 	    flen = strlen(fmt);
 	}
 
 	xo_stack_set_flags(xop);
 
 	int first = (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST)
 	    ? 0 : 1;
 
 	xo_format_prep(xop, flags);
 
 	if (flags & XFF_QUOTE)
 	    quote = 1;
 	else if (flags & XFF_NOQUOTE)
 	    quote = 0;
 	else if (vlen != 0)
 	    quote = 1;
 	else if (flen == 0) {
 	    quote = 0;
 	    fmt = "true";	/* JSON encodes empty tags as a boolean true */
 	    flen = 4;
 	} else if (xo_format_is_numeric(fmt, flen))
 	    quote = 0;
 	else
 	    quote = 1;
 
 	if (nlen == 0) {
 	    static char missing[] = "missing-field-name";
 	    xo_failure(xop, "missing field name: %s", fmt);
 	    name = missing;
 	    nlen = sizeof(missing) - 1;
 	}
 
 	if (flags & XFF_LEAF_LIST) {
 	    if (!first && pretty)
 		xo_data_append(xop, "\n", 1);
 	    if (pretty)
 		xo_buf_indent(xop, -1);
 	} else {
 	    if (pretty)
 		xo_buf_indent(xop, -1);
 	    xo_data_append(xop, "\"", 1);
 
 	    xbp = &xop->xo_data;
 	    ssize_t off = xbp->xb_curp - xbp->xb_bufp;
 
 	    xo_data_escape(xop, name, nlen);
 
 	    if (XOF_ISSET(xop, XOF_UNDERSCORES)) {
 		ssize_t coff = xbp->xb_curp - xbp->xb_bufp;
 		for ( ; off < coff; off++)
 		    if (xbp->xb_bufp[off] == '-')
 			xbp->xb_bufp[off] = '_';
 	    }
 	    xo_data_append(xop, "\":", 2);
 	    if (pretty)
 	        xo_data_append(xop, " ", 1);
 	}
 
 	if (quote)
 	    xo_data_append(xop, "\"", 1);
 
 	xo_simple_field(xop, FALSE, value, vlen, fmt, flen, flags);
 
 	if (quote)
 	    xo_data_append(xop, "\"", 1);
 	break;
 
     case XO_STYLE_SDPARAMS:
 	if (flags & XFF_DISPLAY_ONLY) {
 	    xo_simple_field(xop, TRUE, value, vlen, fmt, flen, flags);
 	    break;
 	}
 
 	if (encoding) {
 	    fmt = encoding;
 	    flen = elen;
 	} else {
 	    char *enc  = alloca(flen + 1);
 	    memcpy(enc, fmt, flen);
 	    enc[flen] = '\0';
 	    fmt = xo_fix_encoding(xop, enc);
 	    flen = strlen(fmt);
 	}
 
 	if (nlen == 0) {
 	    static char missing[] = "missing-field-name";
 	    xo_failure(xop, "missing field name: %s", fmt);
 	    name = missing;
 	    nlen = sizeof(missing) - 1;
 	}
 
 	xo_data_escape(xop, name, nlen);
 	xo_data_append(xop, "=\"", 2);
 
 	xo_simple_field(xop, FALSE, value, vlen, fmt, flen, flags);
 
 	xo_data_append(xop, "\" ", 2);
 	break;
 
     case XO_STYLE_ENCODER:
 	if (flags & XFF_DISPLAY_ONLY) {
 	    xo_simple_field(xop, TRUE, value, vlen, fmt, flen, flags);
 	    break;
 	}
 
 	if (flags & XFF_QUOTE)
 	    quote = 1;
 	else if (flags & XFF_NOQUOTE)
 	    quote = 0;
 	else if (flen == 0) {
 	    quote = 0;
 	    fmt = "true";	/* JSON encodes empty tags as a boolean true */
 	    flen = 4;
 	} else if (strchr("diouxXDOUeEfFgGaAcCp", fmt[flen - 1]) == NULL)
 	    quote = 1;
 	else
 	    quote = 0;
 
 	if (encoding) {
 	    fmt = encoding;
 	    flen = elen;
 	} else {
 	    char *enc  = alloca(flen + 1);
 	    memcpy(enc, fmt, flen);
 	    enc[flen] = '\0';
 	    fmt = xo_fix_encoding(xop, enc);
 	    flen = strlen(fmt);
 	}
 
 	if (nlen == 0) {
 	    static char missing[] = "missing-field-name";
 	    xo_failure(xop, "missing field name: %s", fmt);
 	    name = missing;
 	    nlen = sizeof(missing) - 1;
 	}
 
 	ssize_t name_offset = xo_buf_offset(&xop->xo_data);
 	xo_data_append(xop, name, nlen);
 	xo_data_append(xop, "", 1);
 
 	ssize_t value_offset = xo_buf_offset(&xop->xo_data);
 
 	xo_simple_field(xop, FALSE, value, vlen, fmt, flen, flags);
 
 	xo_data_append(xop, "", 1);
 
 	xo_encoder_handle(xop, quote ? XO_OP_STRING : XO_OP_CONTENT,
 			  xo_buf_data(&xop->xo_data, name_offset),
 			  xo_buf_data(&xop->xo_data, value_offset), flags);
 	xo_buf_reset(&xop->xo_data);
 	break;
     }
 }
 
 static void
 xo_set_gettext_domain (xo_handle_t *xop, xo_field_info_t *xfip,
 		       const char *str, ssize_t len)
 {
     const char *fmt = xfip->xfi_format;
     ssize_t flen = xfip->xfi_flen;
 
     /* Start by discarding previous domain */
     if (xop->xo_gt_domain) {
 	xo_free(xop->xo_gt_domain);
 	xop->xo_gt_domain = NULL;
     }
 
     /* An empty {G:} means no domainname */
     if (len == 0 && flen == 0)
 	return;
 
     ssize_t start_offset = -1;
     if (len == 0 && flen != 0) {
 	/* Need to do format the data to get the domainname from args */
 	start_offset = xop->xo_data.xb_curp - xop->xo_data.xb_bufp;
 	xo_do_format_field(xop, NULL, fmt, flen, 0);
 
 	ssize_t end_offset = xop->xo_data.xb_curp - xop->xo_data.xb_bufp;
 	len = end_offset - start_offset;
 	str = xop->xo_data.xb_bufp + start_offset;
     }
 
     xop->xo_gt_domain = xo_strndup(str, len);
 
     /* Reset the current buffer point to avoid emitting the name as output */
     if (start_offset >= 0)
 	xop->xo_data.xb_curp = xop->xo_data.xb_bufp + start_offset;
 }
 
 static void
 xo_format_content (xo_handle_t *xop, const char *class_name,
 		   const char *tag_name,
 		   const char *value, ssize_t vlen,
 		   const char *fmt, ssize_t flen,
 		   xo_xff_flags_t flags)
 {
     switch (xo_style(xop)) {
     case XO_STYLE_TEXT:
 	xo_simple_field(xop, FALSE, value, vlen, fmt, flen, flags);
 	break;
 
     case XO_STYLE_HTML:
 	xo_buf_append_div(xop, class_name, flags, NULL, 0,
 			  value, vlen, fmt, flen, NULL, 0);
 	break;
 
     case XO_STYLE_XML:
     case XO_STYLE_JSON:
     case XO_STYLE_SDPARAMS:
 	if (tag_name) {
 	    xo_open_container_h(xop, tag_name);
 	    xo_format_value(xop, "message", 7, value, vlen,
 			    fmt, flen, NULL, 0, flags);
 	    xo_close_container_h(xop, tag_name);
 
 	} else {
 	    /*
 	     * Even though we don't care about labels, we need to do
 	     * enough parsing work to skip over the right bits of xo_vap.
 	     */
 	    xo_simple_field(xop, TRUE, value, vlen, fmt, flen, flags);
 	}
 	break;
 
     case XO_STYLE_ENCODER:
 	xo_simple_field(xop, TRUE, value, vlen, fmt, flen, flags);
 	break;
     }
 }
 
 static const char *xo_color_names[] = {
     "default",	/* XO_COL_DEFAULT */
     "black",	/* XO_COL_BLACK */
     "red",	/* XO_CLOR_RED */
     "green",	/* XO_COL_GREEN */
     "yellow",	/* XO_COL_YELLOW */
     "blue",	/* XO_COL_BLUE */
     "magenta",	/* XO_COL_MAGENTA */
     "cyan",	/* XO_COL_CYAN */
     "white",	/* XO_COL_WHITE */
     NULL
 };
 
 static int
 xo_color_find (const char *str)
 {
     int i;
 
     for (i = 0; xo_color_names[i]; i++) {
 	if (xo_streq(xo_color_names[i], str))
 	    return i;
     }
 
     return -1;
 }
 
 static const char *xo_effect_names[] = {
     "reset",			/* XO_EFF_RESET */
     "normal",			/* XO_EFF_NORMAL */
     "bold",			/* XO_EFF_BOLD */
     "underline",		/* XO_EFF_UNDERLINE */
     "inverse",			/* XO_EFF_INVERSE */
     NULL
 };
 
 static const char *xo_effect_on_codes[] = {
     "0",			/* XO_EFF_RESET */
     "0",			/* XO_EFF_NORMAL */
     "1",			/* XO_EFF_BOLD */
     "4",			/* XO_EFF_UNDERLINE */
     "7",			/* XO_EFF_INVERSE */
     NULL
 };
 
 #if 0
 /*
  * See comment below re: joy of terminal standards.  These can
  * be use by just adding:
  * +	if (newp->xoc_effects & bit)
  *	    code = xo_effect_on_codes[i];
  * +	else
  * +	    code = xo_effect_off_codes[i];
  * in xo_color_handle_text.
  */
 static const char *xo_effect_off_codes[] = {
     "0",			/* XO_EFF_RESET */
     "0",			/* XO_EFF_NORMAL */
     "21",			/* XO_EFF_BOLD */
     "24",			/* XO_EFF_UNDERLINE */
     "27",			/* XO_EFF_INVERSE */
     NULL
 };
 #endif /* 0 */
 
 static int
 xo_effect_find (const char *str)
 {
     int i;
 
     for (i = 0; xo_effect_names[i]; i++) {
 	if (xo_streq(xo_effect_names[i], str))
 	    return i;
     }
 
     return -1;
 }
 
 static void
 xo_colors_parse (xo_handle_t *xop, xo_colors_t *xocp, char *str)
 {
     if (xo_text_only())
 	return;
 
     char *cp, *ep, *np, *xp;
     ssize_t len = strlen(str);
     int rc;
 
     /*
      * Possible tokens: colors, bg-colors, effects, no-effects, "reset".
      */
     for (cp = str, ep = cp + len - 1; cp && cp < ep; cp = np) {
 	/* Trim leading whitespace */
 	while (isspace((int) *cp))
 	    cp += 1;
 
 	np = strchr(cp, ',');
 	if (np)
 	    *np++ = '\0';
 
 	/* Trim trailing whitespace */
 	xp = cp + strlen(cp) - 1;
 	while (isspace(*xp) && xp > cp)
 	    *xp-- = '\0';
 
 	if (cp[0] == 'f' && cp[1] == 'g' && cp[2] == '-') {
 	    rc = xo_color_find(cp + 3);
 	    if (rc < 0)
 		goto unknown;
 
 	    xocp->xoc_col_fg = rc;
 
 	} else if (cp[0] == 'b' && cp[1] == 'g' && cp[2] == '-') {
 	    rc = xo_color_find(cp + 3);
 	    if (rc < 0)
 		goto unknown;
 	    xocp->xoc_col_bg = rc;
 
 	} else if (cp[0] == 'n' && cp[1] == 'o' && cp[2] == '-') {
 	    rc = xo_effect_find(cp + 3);
 	    if (rc < 0)
 		goto unknown;
 	    xocp->xoc_effects &= ~(1 << rc);
 
 	} else {
 	    rc = xo_effect_find(cp);
 	    if (rc < 0)
 		goto unknown;
 	    xocp->xoc_effects |= 1 << rc;
 
 	    switch (1 << rc) {
 	    case XO_EFF_RESET:
 		xocp->xoc_col_fg = xocp->xoc_col_bg = 0;
 		/* Note: not "|=" since we want to wipe out the old value */
 		xocp->xoc_effects = XO_EFF_RESET;
 		break;
 
 	    case XO_EFF_NORMAL:
 		xocp->xoc_effects &= ~(XO_EFF_BOLD | XO_EFF_UNDERLINE
 				      | XO_EFF_INVERSE | XO_EFF_NORMAL);
 		break;
 	    }
 	}
 	continue;
 
     unknown:
 	if (XOF_ISSET(xop, XOF_WARN))
 	    xo_failure(xop, "unknown color/effect string detected: '%s'", cp);
     }
 }
 
 static inline int
 xo_colors_enabled (xo_handle_t *xop UNUSED)
 {
 #ifdef LIBXO_TEXT_ONLY
     return 0;
 #else /* LIBXO_TEXT_ONLY */
     return XOF_ISSET(xop, XOF_COLOR);
 #endif /* LIBXO_TEXT_ONLY */
 }
 
 /*
  * If the color map is in use (--libxo colors=xxxx), then update
  * the incoming foreground and background colors from the map.
  */
 static void
 xo_colors_update (xo_handle_t *xop UNUSED, xo_colors_t *newp UNUSED)
 {
 #ifndef LIBXO_TEXT_ONLY
     xo_color_t fg = newp->xoc_col_fg;
     if (XOF_ISSET(xop, XOF_COLOR_MAP) && fg < XO_NUM_COLORS)
 	fg = xop->xo_color_map_fg[fg]; /* Fetch from color map */
     newp->xoc_col_fg = fg;
 
     xo_color_t bg = newp->xoc_col_bg;
     if (XOF_ISSET(xop, XOF_COLOR_MAP) && bg < XO_NUM_COLORS)
 	bg = xop->xo_color_map_bg[bg]; /* Fetch from color map */
     newp->xoc_col_bg = bg;
 #endif /* LIBXO_TEXT_ONLY */
 }
 
 static void
 xo_colors_handle_text (xo_handle_t *xop, xo_colors_t *newp)
 {
     char buf[BUFSIZ];
     char *cp = buf, *ep = buf + sizeof(buf);
     unsigned i, bit;
     xo_colors_t *oldp = &xop->xo_colors;
     const char *code = NULL;
 
     /*
      * Start the buffer with an escape.  We don't want to add the '['
      * now, since we let xo_effect_text_add unconditionally add the ';'.
      * We'll replace the first ';' with a '[' when we're done.
      */
     *cp++ = 0x1b;		/* Escape */
 
     /*
      * Terminals were designed back in the age before "certainty" was
      * invented, when standards were more what you'd call "guidelines"
      * than actual rules.  Anyway we can't depend on them to operate
      * correctly.  So when display attributes are changed, we punt,
      * reseting them all and turning back on the ones we want to keep.
      * Longer, but should be completely reliable.  Savvy?
      */
     if (oldp->xoc_effects != (newp->xoc_effects & oldp->xoc_effects)) {
 	newp->xoc_effects |= XO_EFF_RESET;
 	oldp->xoc_effects = 0;
     }
 
     for (i = 0, bit = 1; xo_effect_names[i]; i++, bit <<= 1) {
 	if ((newp->xoc_effects & bit) == (oldp->xoc_effects & bit))
 	    continue;
 
 	code = xo_effect_on_codes[i];
 
 	cp += snprintf(cp, ep - cp, ";%s", code);
 	if (cp >= ep)
 	    return;		/* Should not occur */
 
 	if (bit == XO_EFF_RESET) {
 	    /* Mark up the old value so we can detect current values as new */
 	    oldp->xoc_effects = 0;
 	    oldp->xoc_col_fg = oldp->xoc_col_bg = XO_COL_DEFAULT;
 	}
     }
 
     xo_color_t fg = newp->xoc_col_fg;
     if (fg != oldp->xoc_col_fg) {
 	cp += snprintf(cp, ep - cp, ";3%u",
 		       (fg != XO_COL_DEFAULT) ? fg - 1 : 9);
     }
 
     xo_color_t bg = newp->xoc_col_bg;
     if (bg != oldp->xoc_col_bg) {
 	cp += snprintf(cp, ep - cp, ";4%u",
 		       (bg != XO_COL_DEFAULT) ? bg - 1 : 9);
     }
 
     if (cp - buf != 1 && cp < ep - 3) {
 	buf[1] = '[';		/* Overwrite leading ';' */
 	*cp++ = 'm';
 	*cp = '\0';
 	xo_buf_append(&xop->xo_data, buf, cp - buf);
     }
 }
 
 static void
 xo_colors_handle_html (xo_handle_t *xop, xo_colors_t *newp)
 {
     xo_colors_t *oldp = &xop->xo_colors;
 
     /*
      * HTML colors are mostly trivial: fill in xo_color_buf with
      * a set of class tags representing the colors and effects.
      */
 
     /* If nothing changed, then do nothing */
     if (oldp->xoc_effects == newp->xoc_effects
 	&& oldp->xoc_col_fg == newp->xoc_col_fg
 	&& oldp->xoc_col_bg == newp->xoc_col_bg)
 	return;
 
     unsigned i, bit;
     xo_buffer_t *xbp = &xop->xo_color_buf;
 
     xo_buf_reset(xbp);		/* We rebuild content after each change */
 
     for (i = 0, bit = 1; xo_effect_names[i]; i++, bit <<= 1) {
 	if (!(newp->xoc_effects & bit))
 	    continue;
 
 	xo_buf_append_str(xbp, " effect-");
 	xo_buf_append_str(xbp, xo_effect_names[i]);
     }
 
     const char *fg = NULL;
     const char *bg = NULL;
 
     if (newp->xoc_col_fg != XO_COL_DEFAULT)
 	fg = xo_color_names[newp->xoc_col_fg];
     if (newp->xoc_col_bg != XO_COL_DEFAULT)
 	bg = xo_color_names[newp->xoc_col_bg];
 
     if (newp->xoc_effects & XO_EFF_INVERSE) {
 	const char *tmp = fg;
 	fg = bg;
 	bg = tmp;
 	if (fg == NULL)
 	    fg = "inverse";
 	if (bg == NULL)
 	    bg = "inverse";
 
     }
 
     if (fg) {
 	xo_buf_append_str(xbp, " color-fg-");
 	xo_buf_append_str(xbp, fg);
     }
 
     if (bg) {
 	xo_buf_append_str(xbp, " color-bg-");
 	xo_buf_append_str(xbp, bg);
     }
 }
 
 static void
 xo_format_colors (xo_handle_t *xop, xo_field_info_t *xfip,
 		  const char *value, ssize_t vlen)
 {
     const char *fmt = xfip->xfi_format;
     ssize_t flen = xfip->xfi_flen;
 
     xo_buffer_t xb;
 
     /* If the string is static and we've in an encoding style, bail */
     if (vlen != 0 && xo_style_is_encoding(xop))
 	return;
 
     xo_buf_init(&xb);
 
     if (vlen)
 	xo_buf_append(&xb, value, vlen);
     else if (flen)
 	xo_do_format_field(xop, &xb, fmt, flen, 0);
     else
 	xo_buf_append(&xb, "reset", 6); /* Default if empty */
 
     if (xo_colors_enabled(xop)) {
 	switch (xo_style(xop)) {
 	case XO_STYLE_TEXT:
 	case XO_STYLE_HTML:
 	    xo_buf_append(&xb, "", 1);
 
 	    xo_colors_t xoc = xop->xo_colors;
 	    xo_colors_parse(xop, &xoc, xb.xb_bufp);
 	    xo_colors_update(xop, &xoc);
 
 	    if (xo_style(xop) == XO_STYLE_TEXT) {
 		/*
 		 * Text mode means emitting the colors as ANSI character
 		 * codes.  This will allow people who like colors to have
 		 * colors.  The issue is, of course conflicting with the
 		 * user's perfectly reasonable color scheme.  Which leads
 		 * to the hell of LSCOLORS, where even app need to have
 		 * customization hooks for adjusting colors.  Instead we
 		 * provide a simpler-but-still-annoying answer where one
 		 * can map colors to other colors.
 		 */
 		xo_colors_handle_text(xop, &xoc);
 		xoc.xoc_effects &= ~XO_EFF_RESET; /* After handling it */
 
 	    } else {
 		/*
 		 * HTML output is wrapped in divs, so the color information
 		 * must appear in every div until cleared.  Most pathetic.
 		 * Most unavoidable.
 		 */
 		xoc.xoc_effects &= ~XO_EFF_RESET; /* Before handling effects */
 		xo_colors_handle_html(xop, &xoc);
 	    }
 
 	    xop->xo_colors = xoc;
 	    break;
 
 	case XO_STYLE_XML:
 	case XO_STYLE_JSON:
 	case XO_STYLE_SDPARAMS:
 	case XO_STYLE_ENCODER:
 	    /*
 	     * Nothing to do; we did all that work just to clear the stack of
 	     * formatting arguments.
 	     */
 	    break;
 	}
     }
 
     xo_buf_cleanup(&xb);
 }
 
 static void
 xo_format_units (xo_handle_t *xop, xo_field_info_t *xfip,
 		 const char *value, ssize_t vlen)
 {
     const char *fmt = xfip->xfi_format;
     ssize_t flen = xfip->xfi_flen;
     xo_xff_flags_t flags = xfip->xfi_flags;
 
     static char units_start_xml[] = " units=\"";
     static char units_start_html[] = " data-units=\"";
 
     if (!XOIF_ISSET(xop, XOIF_UNITS_PENDING)) {
 	xo_format_content(xop, "units", NULL, value, vlen, fmt, flen, flags);
 	return;
     }
 
     xo_buffer_t *xbp = &xop->xo_data;
     ssize_t start = xop->xo_units_offset;
     ssize_t stop = xbp->xb_curp - xbp->xb_bufp;
 
     if (xo_style(xop) == XO_STYLE_XML)
 	xo_buf_append(xbp, units_start_xml, sizeof(units_start_xml) - 1);
     else if (xo_style(xop) == XO_STYLE_HTML)
 	xo_buf_append(xbp, units_start_html, sizeof(units_start_html) - 1);
     else
 	return;
 
     if (vlen)
 	xo_data_escape(xop, value, vlen);
     else
 	xo_do_format_field(xop, NULL, fmt, flen, flags);
 
     xo_buf_append(xbp, "\"", 1);
 
     ssize_t now = xbp->xb_curp - xbp->xb_bufp;
     ssize_t delta = now - stop;
     if (delta <= 0) {		/* Strange; no output to move */
 	xbp->xb_curp = xbp->xb_bufp + stop; /* Reset buffer to prior state */
 	return;
     }
 
     /*
      * Now we're in it alright.  We've need to insert the unit value
      * we just created into the right spot.  We make a local copy,
      * move it and then insert our copy.  We know there's room in the
      * buffer, since we're just moving this around.
      */
     char *buf = alloca(delta);
 
     memcpy(buf, xbp->xb_bufp + stop, delta);
     memmove(xbp->xb_bufp + start + delta, xbp->xb_bufp + start, stop - start);
     memmove(xbp->xb_bufp + start, buf, delta);
 }
 
 static ssize_t
 xo_find_width (xo_handle_t *xop, xo_field_info_t *xfip,
 	       const char *value, ssize_t vlen)
 {
     const char *fmt = xfip->xfi_format;
     ssize_t flen = xfip->xfi_flen;
 
     long width = 0;
     char *bp;
     char *cp;
 
     if (vlen) {
 	bp = alloca(vlen + 1);	/* Make local NUL-terminated copy of value */
 	memcpy(bp, value, vlen);
 	bp[vlen] = '\0';
 
 	width = strtol(bp, &cp, 0);
 	if (width == LONG_MIN || width == LONG_MAX || bp == cp || *cp != '\0') {
 	    width = 0;
 	    xo_failure(xop, "invalid width for anchor: '%s'", bp);
 	}
     } else if (flen) {
 	/*
 	 * We really expect the format for width to be "{:/%d}" or
 	 * "{:/%u}", so if that's the case, we just grab our width off
 	 * the argument list.  But we need to avoid optimized logic if
 	 * there's a custom formatter.
 	 */
 	if (xop->xo_formatter == NULL && flen == 2
 	        && strncmp("%d", fmt, flen) == 0) {
 	    if (!XOF_ISSET(xop, XOF_NO_VA_ARG))
 		width = va_arg(xop->xo_vap, int);
 	} else if (xop->xo_formatter == NULL && flen == 2
 		   && strncmp("%u", fmt, flen) == 0) {
 	    if (!XOF_ISSET(xop, XOF_NO_VA_ARG))
 		width = va_arg(xop->xo_vap, unsigned);
 	} else {
 	    /*
 	     * So we have a format and it's not a simple one like
 	     * "{:/%d}".  That means we need to format the field,
 	     * extract the value from the formatted output, and then
 	     * discard that output.
 	     */
 	    int anchor_was_set = FALSE;
 	    xo_buffer_t *xbp = &xop->xo_data;
 	    ssize_t start_offset = xo_buf_offset(xbp);
 	    bp = xo_buf_cur(xbp);	/* Save start of the string */
 	    cp = NULL;
 
 	    if (XOIF_ISSET(xop, XOIF_ANCHOR)) {
 		XOIF_CLEAR(xop, XOIF_ANCHOR);
 		anchor_was_set = TRUE;
 	    }
 
 	    ssize_t rc = xo_do_format_field(xop, xbp, fmt, flen, 0);
 	    if (rc >= 0) {
 		xo_buf_append(xbp, "", 1); /* Append a NUL */
 
 		width = strtol(bp, &cp, 0);
 		if (width == LONG_MIN || width == LONG_MAX
 		        || bp == cp || *cp != '\0') {
 		    width = 0;
 		    xo_failure(xop, "invalid width for anchor: '%s'", bp);
 		}
 	    }
 
 	    /* Reset the cur pointer to where we found it */
 	    xbp->xb_curp = xbp->xb_bufp + start_offset;
 	    if (anchor_was_set)
 		XOIF_SET(xop, XOIF_ANCHOR);
 	}
     }
 
     return width;
 }
 
 static void
 xo_anchor_clear (xo_handle_t *xop)
 {
     XOIF_CLEAR(xop, XOIF_ANCHOR);
     xop->xo_anchor_offset = 0;
     xop->xo_anchor_columns = 0;
     xop->xo_anchor_min_width = 0;
 }
 
 /*
  * An anchor is a marker used to delay field width implications.
  * Imagine the format string "{[:10}{min:%d}/{cur:%d}/{max:%d}{:]}".
  * We are looking for output like "     1/4/5"
  *
  * To make this work, we record the anchor and then return to
  * format it when the end anchor tag is seen.
  */
 static void
 xo_anchor_start (xo_handle_t *xop, xo_field_info_t *xfip,
 		 const char *value, ssize_t vlen)
 {
     if (XOIF_ISSET(xop, XOIF_ANCHOR))
 	xo_failure(xop, "the anchor already recording is discarded");
 
     XOIF_SET(xop, XOIF_ANCHOR);
     xo_buffer_t *xbp = &xop->xo_data;
     xop->xo_anchor_offset = xbp->xb_curp - xbp->xb_bufp;
     xop->xo_anchor_columns = 0;
 
     /*
      * Now we find the width, if possible.  If it's not there,
      * we'll get it on the end anchor.
      */
     xop->xo_anchor_min_width = xo_find_width(xop, xfip, value, vlen);
 }
 
 static void
 xo_anchor_stop (xo_handle_t *xop, xo_field_info_t *xfip,
 		 const char *value, ssize_t vlen)
 {
     if (!XOIF_ISSET(xop, XOIF_ANCHOR)) {
 	xo_failure(xop, "no start anchor");
 	return;
     }
 
     XOIF_CLEAR(xop, XOIF_UNITS_PENDING);
 
     ssize_t width = xo_find_width(xop, xfip, value, vlen);
     if (width == 0)
 	width = xop->xo_anchor_min_width;
 
     if (width == 0)		/* No width given; nothing to do */
 	goto done;
 
     xo_buffer_t *xbp = &xop->xo_data;
     ssize_t start = xop->xo_anchor_offset;
     ssize_t stop = xbp->xb_curp - xbp->xb_bufp;
     ssize_t abswidth = (width > 0) ? width : -width;
     ssize_t blen = abswidth - xop->xo_anchor_columns;
 
     if (blen <= 0)		/* Already over width */
 	goto done;
 
     if (abswidth > XO_MAX_ANCHOR_WIDTH) {
 	xo_failure(xop, "width over %u are not supported",
 		   XO_MAX_ANCHOR_WIDTH);
 	goto done;
     }
 
     /* Make a suitable padding field and emit it */
     char *buf = alloca(blen);
     memset(buf, ' ', blen);
     xo_format_content(xop, "padding", NULL, buf, blen, NULL, 0, 0);
 
     if (width < 0)		/* Already left justified */
 	goto done;
 
     ssize_t now = xbp->xb_curp - xbp->xb_bufp;
     ssize_t delta = now - stop;
     if (delta <= 0)		/* Strange; no output to move */
 	goto done;
 
     /*
      * Now we're in it alright.  We've need to insert the padding data
      * we just created (which might be an HTML <div> or text) before
      * the formatted data.  We make a local copy, move it and then
      * insert our copy.  We know there's room in the buffer, since
      * we're just moving this around.
      */
     if (delta > blen)
 	buf = alloca(delta);	/* Expand buffer if needed */
 
     memcpy(buf, xbp->xb_bufp + stop, delta);
     memmove(xbp->xb_bufp + start + delta, xbp->xb_bufp + start, stop - start);
     memmove(xbp->xb_bufp + start, buf, delta);
 
  done:
     xo_anchor_clear(xop);
 }
 
 static const char *
 xo_class_name (int ftype)
 {
     switch (ftype) {
     case 'D': return "decoration";
     case 'E': return "error";
     case 'L': return "label";
     case 'N': return "note";
     case 'P': return "padding";
     case 'W': return "warning";
     }
 
     return NULL;
 }
 
 static const char *
 xo_tag_name (int ftype)
 {
     switch (ftype) {
     case 'E': return "__error";
     case 'W': return "__warning";
     }
 
     return NULL;
 }
 
 static int
 xo_role_wants_default_format (int ftype)
 {
     switch (ftype) {
 	/* These roles can be completely empty and/or without formatting */
     case 'C':
     case 'G':
     case '[':
     case ']':
 	return 0;
     }
 
     return 1;
 }
 
 static xo_mapping_t xo_role_names[] = {
     { 'C', "color" },
     { 'D', "decoration" },
     { 'E', "error" },
     { 'L', "label" },
     { 'N', "note" },
     { 'P', "padding" },
     { 'T', "title" },
     { 'U', "units" },
     { 'V', "value" },
     { 'W', "warning" },
     { '[', "start-anchor" },
     { ']', "stop-anchor" },
     { 0, NULL }
 };
 
 #define XO_ROLE_EBRACE	'{'	/* Escaped braces */
 #define XO_ROLE_TEXT	'+'
 #define XO_ROLE_NEWLINE	'\n'
 
 static xo_mapping_t xo_modifier_names[] = {
     { XFF_ARGUMENT, "argument" },
     { XFF_COLON, "colon" },
     { XFF_COMMA, "comma" },
     { XFF_DISPLAY_ONLY, "display" },
     { XFF_ENCODE_ONLY, "encoding" },
     { XFF_GT_FIELD, "gettext" },
     { XFF_HUMANIZE, "humanize" },
     { XFF_HUMANIZE, "hn" },
     { XFF_HN_SPACE, "hn-space" },
     { XFF_HN_DECIMAL, "hn-decimal" },
     { XFF_HN_1000, "hn-1000" },
     { XFF_KEY, "key" },
     { XFF_LEAF_LIST, "leaf-list" },
     { XFF_LEAF_LIST, "list" },
     { XFF_NOQUOTE, "no-quotes" },
     { XFF_NOQUOTE, "no-quote" },
     { XFF_GT_PLURAL, "plural" },
     { XFF_QUOTE, "quotes" },
     { XFF_QUOTE, "quote" },
     { XFF_TRIM_WS, "trim" },
     { XFF_WS, "white" },
     { 0, NULL }
 };
 
 #ifdef NOT_NEEDED_YET
 static xo_mapping_t xo_modifier_short_names[] = {
     { XFF_COLON, "c" },
     { XFF_DISPLAY_ONLY, "d" },
     { XFF_ENCODE_ONLY, "e" },
     { XFF_GT_FIELD, "g" },
     { XFF_HUMANIZE, "h" },
     { XFF_KEY, "k" },
     { XFF_LEAF_LIST, "l" },
     { XFF_NOQUOTE, "n" },
     { XFF_GT_PLURAL, "p" },
     { XFF_QUOTE, "q" },
     { XFF_TRIM_WS, "t" },
     { XFF_WS, "w" },
     { 0, NULL }
 };
 #endif /* NOT_NEEDED_YET */
 
 static int
 xo_count_fields (xo_handle_t *xop UNUSED, const char *fmt)
 {
     int rc = 1;
     const char *cp;
 
     for (cp = fmt; *cp; cp++)
 	if (*cp == '{' || *cp == '\n')
 	    rc += 1;
 
     return rc * 2 + 1;
 }
 
 /*
  * The field format is:
  *  '{' modifiers ':' content [ '/' print-fmt [ '/' encode-fmt ]] '}'
  * Roles are optional and include the following field types:
  *   'D': decoration; something non-text and non-data (colons, commmas)
  *   'E': error message
  *   'G': gettext() the entire string; optional domainname as content
  *   'L': label; text preceding data
  *   'N': note; text following data
  *   'P': padding; whitespace
  *   'T': Title, where 'content' is a column title
  *   'U': Units, where 'content' is the unit label
  *   'V': value, where 'content' is the name of the field (the default)
  *   'W': warning message
  *   '[': start a section of anchored text
  *   ']': end a section of anchored text
  * The following modifiers are also supported:
  *   'a': content is provided via argument (const char *), not descriptor
  *   'c': flag: emit a colon after the label
  *   'd': field is only emitted for display styles (text and html)
  *   'e': field is only emitted for encoding styles (xml and json)
  *   'g': gettext() the field
  *   'h': humanize a numeric value (only for display styles)
  *   'k': this field is a key, suitable for XPath predicates
  *   'l': a leaf-list, a simple list of values
  *   'n': no quotes around this field
  *   'p': the field has plural gettext semantics (ngettext)
  *   'q': add quotes around this field
  *   't': trim whitespace around the value
  *   'w': emit a blank after the label
  * The print-fmt and encode-fmt strings is the printf-style formating
  * for this data.  JSON and XML will use the encoding-fmt, if present.
  * If the encode-fmt is not provided, it defaults to the print-fmt.
  * If the print-fmt is not provided, it defaults to 's'.
  */
 static const char *
 xo_parse_roles (xo_handle_t *xop, const char *fmt,
 		const char *basep, xo_field_info_t *xfip)
 {
     const char *sp;
     unsigned ftype = 0;
     xo_xff_flags_t flags = 0;
     uint8_t fnum = 0;
 
     for (sp = basep; sp && *sp; sp++) {
 	if (*sp == ':' || *sp == '/' || *sp == '}')
 	    break;
 
 	if (*sp == '\\') {
 	    if (sp[1] == '\0') {
 		xo_failure(xop, "backslash at the end of string");
 		return NULL;
 	    }
 
 	    /* Anything backslashed is ignored */
 	    sp += 1;
 	    continue;
 	}
 
 	if (*sp == ',') {
 	    const char *np;
 	    for (np = ++sp; *np; np++)
 		if (*np == ':' || *np == '/' || *np == '}' || *np == ',')
 		    break;
 
 	    ssize_t slen = np - sp;
 	    if (slen > 0) {
 		xo_xff_flags_t value;
 
 		value = xo_name_lookup(xo_role_names, sp, slen);
 		if (value)
 		    ftype = value;
 		else {
 		    value = xo_name_lookup(xo_modifier_names, sp, slen);
 		    if (value)
 			flags |= value;
 		    else
 			xo_failure(xop, "unknown keyword ignored: '%.*s'",
 				   slen, sp);
 		}
 	    }
 
 	    sp = np - 1;
 	    continue;
 	}
 
 	switch (*sp) {
 	case 'C':
 	case 'D':
 	case 'E':
 	case 'G':
 	case 'L':
 	case 'N':
 	case 'P':
 	case 'T':
 	case 'U':
 	case 'V':
 	case 'W':
 	case '[':
 	case ']':
 	    if (ftype != 0) {
 		xo_failure(xop, "field descriptor uses multiple types: '%s'",
 			   xo_printable(fmt));
 		return NULL;
 	    }
 	    ftype = *sp;
 	    break;
 
 	case '0':
 	case '1':
 	case '2':
 	case '3':
 	case '4':
 	case '5':
 	case '6':
 	case '7':
 	case '8':
 	case '9':
 	    fnum = (fnum * 10) + (*sp - '0');
 	    break;
 
 	case 'a':
 	    flags |= XFF_ARGUMENT;
 	    break;
 
 	case 'c':
 	    flags |= XFF_COLON;
 	    break;
 
 	case 'd':
 	    flags |= XFF_DISPLAY_ONLY;
 	    break;
 
 	case 'e':
 	    flags |= XFF_ENCODE_ONLY;
 	    break;
 
 	case 'g':
 	    flags |= XFF_GT_FIELD;
 	    break;
 
 	case 'h':
 	    flags |= XFF_HUMANIZE;
 	    break;
 
 	case 'k':
 	    flags |= XFF_KEY;
 	    break;
 
 	case 'l':
 	    flags |= XFF_LEAF_LIST;
 	    break;
 
 	case 'n':
 	    flags |= XFF_NOQUOTE;
 	    break;
 
 	case 'p':
 	    flags |= XFF_GT_PLURAL;
 	    break;
 
 	case 'q':
 	    flags |= XFF_QUOTE;
 	    break;
 
 	case 't':
 	    flags |= XFF_TRIM_WS;
 	    break;
 
 	case 'w':
 	    flags |= XFF_WS;
 	    break;
 
 	default:
 	    xo_failure(xop, "field descriptor uses unknown modifier: '%s'",
 		       xo_printable(fmt));
 	    /*
 	     * No good answer here; a bad format will likely
 	     * mean a core file.  We just return and hope
 	     * the caller notices there's no output, and while
 	     * that seems, well, bad, there's nothing better.
 	     */
 	    return NULL;
 	}
 
 	if (ftype == 'N' || ftype == 'U') {
 	    if (flags & XFF_COLON) {
 		xo_failure(xop, "colon modifier on 'N' or 'U' field ignored: "
 			   "'%s'", xo_printable(fmt));
 		flags &= ~XFF_COLON;
 	    }
 	}
     }
 
     xfip->xfi_flags = flags;
     xfip->xfi_ftype = ftype ?: 'V';
     xfip->xfi_fnum = fnum;
 
     return sp;
 }
 
 /*
  * Number any remaining fields that need numbers.  Note that some
  * field types (text, newline, escaped braces) never get numbers.
  */
 static void
 xo_gettext_finish_numbering_fields (xo_handle_t *xop UNUSED,
 				    const char *fmt UNUSED,
 				    xo_field_info_t *fields)
 {
     xo_field_info_t *xfip;
     unsigned fnum, max_fields;
     uint64_t bits = 0;
     const uint64_t one = 1;	/* Avoid "1ULL" */
 
     /* First make a list of add the explicitly used bits */
     for (xfip = fields, fnum = 0; xfip->xfi_ftype; xfip++) {
 	switch (xfip->xfi_ftype) {
 	case XO_ROLE_NEWLINE:	/* Don't get numbered */
 	case XO_ROLE_TEXT:
 	case XO_ROLE_EBRACE:
 	case 'G':
 	    continue;
 	}
 
 	fnum += 1;
 	if (fnum >= 63)
 	    break;
 
 	if (xfip->xfi_fnum)
 	    bits |= one << xfip->xfi_fnum;
     }
 
     max_fields = fnum;
 
     for (xfip = fields, fnum = 0; xfip->xfi_ftype; xfip++) {
 	switch (xfip->xfi_ftype) {
 	case XO_ROLE_NEWLINE:	/* Don't get numbered */
 	case XO_ROLE_TEXT:
 	case XO_ROLE_EBRACE:
 	case 'G':
 	    continue;
 	}
 
 	if (xfip->xfi_fnum != 0)
 	    continue;
 
 	/* Find the next unassigned field */
 	for (fnum++; bits & (one << fnum); fnum++)
 	    continue;
 
 	if (fnum > max_fields)
 	    break;
 
 	xfip->xfi_fnum = fnum;	/* Mark the field number */
 	bits |= one << fnum;	/* Mark it used */
     }
 }
 
 /*
  * The format string uses field numbers, so we need to whiffle through it
  * and make sure everything's sane and lovely.
  */
 static int
 xo_parse_field_numbers (xo_handle_t *xop, const char *fmt,
 			xo_field_info_t *fields, unsigned num_fields)
 {
     xo_field_info_t *xfip;
     unsigned field, fnum;
     uint64_t bits = 0;
     const uint64_t one = 1;	/* Avoid 1ULL */
 
     for (xfip = fields, field = 0; field < num_fields; xfip++, field++) {
 	/* Fields default to 1:1 with natural position */
 	if (xfip->xfi_fnum == 0)
 	    xfip->xfi_fnum = field + 1;
 	else if (xfip->xfi_fnum > num_fields) {
 	    xo_failure(xop, "field number exceeds number of fields: '%s'", fmt);
 	    return -1;
 	}
 
 	fnum = xfip->xfi_fnum - 1; /* Move to zero origin */
 	if (fnum < 64) {	/* Only test what fits */
 	    if (bits & (one << fnum)) {
 		xo_failure(xop, "field number %u reused: '%s'",
 			   xfip->xfi_fnum, fmt);
 		return -1;
 	    }
 	    bits |= one << fnum;
 	}
     }
 
     return 0;
 }
 
 static int
 xo_parse_fields (xo_handle_t *xop, xo_field_info_t *fields,
 		 unsigned num_fields, const char *fmt)
 {
     const char *cp, *sp, *ep, *basep;
     unsigned field = 0;
     xo_field_info_t *xfip = fields;
     unsigned seen_fnum = 0;
 
     for (cp = fmt; *cp && field < num_fields; field++, xfip++) {
 	xfip->xfi_start = cp;
 
 	if (*cp == '\n') {
 	    xfip->xfi_ftype = XO_ROLE_NEWLINE;
 	    xfip->xfi_len = 1;
 	    cp += 1;
 	    continue;
 	}
 
 	if (*cp != '{') {
 	    /* Normal text */
 	    for (sp = cp; *sp; sp++) {
 		if (*sp == '{' || *sp == '\n')
 		    break;
 	    }
 
 	    xfip->xfi_ftype = XO_ROLE_TEXT;
 	    xfip->xfi_content = cp;
 	    xfip->xfi_clen = sp - cp;
 	    xfip->xfi_next = sp;
 
 	    cp = sp;
 	    continue;
 	}
 
 	if (cp[1] == '{') {	/* Start of {{escaped braces}} */
 	    xfip->xfi_start = cp + 1; /* Start at second brace */
 	    xfip->xfi_ftype = XO_ROLE_EBRACE;
 
 	    cp += 2;	/* Skip over _both_ characters */
 	    for (sp = cp; *sp; sp++) {
 		if (*sp == '}' && sp[1] == '}')
 		    break;
 	    }
 	    if (*sp == '\0') {
 		xo_failure(xop, "missing closing '}}': '%s'",
 			   xo_printable(fmt));
 		return -1;
 	    }
 
 	    xfip->xfi_len = sp - xfip->xfi_start + 1;
 
 	    /* Move along the string, but don't run off the end */
 	    if (*sp == '}' && sp[1] == '}') /* Paranoid; must be true */
 		sp += 2;
 
 	    cp = sp;
 	    xfip->xfi_next = cp;
 	    continue;
 	}
 
 	/* We are looking at the start of a field definition */
 	xfip->xfi_start = basep = cp + 1;
 
 	const char *format = NULL;
 	ssize_t flen = 0;
 
 	/* Looking at roles and modifiers */
 	sp = xo_parse_roles(xop, fmt, basep, xfip);
 	if (sp == NULL) {
 	    /* xo_failure has already been called */
 	    return -1;
 	}
 
 	if (xfip->xfi_fnum)
 	    seen_fnum = 1;
 
 	/* Looking at content */
 	if (*sp == ':') {
 	    for (ep = ++sp; *sp; sp++) {
 		if (*sp == '}' || *sp == '/')
 		    break;
 		if (*sp == '\\') {
 		    if (sp[1] == '\0') {
 			xo_failure(xop, "backslash at the end of string");
 			return -1;
 		    }
 		    sp += 1;
 		    continue;
 		}
 	    }
 	    if (ep != sp) {
 		xfip->xfi_clen = sp - ep;
 		xfip->xfi_content = ep;
 	    }
 	} else {
 	    xo_failure(xop, "missing content (':'): '%s'", xo_printable(fmt));
 	    return -1;
 	}
 
 	/* Looking at main (display) format */
 	if (*sp == '/') {
 	    for (ep = ++sp; *sp; sp++) {
 		if (*sp == '}' || *sp == '/')
 		    break;
 		if (*sp == '\\') {
 		    if (sp[1] == '\0') {
 			xo_failure(xop, "backslash at the end of string");
 			return -1;
 		    }
 		    sp += 1;
 		    continue;
 		}
 	    }
 	    flen = sp - ep;
 	    format = ep;
 	}
 
 	/* Looking at encoding format */
 	if (*sp == '/') {
 	    for (ep = ++sp; *sp; sp++) {
 		if (*sp == '}')
 		    break;
 	    }
 
 	    xfip->xfi_encoding = ep;
 	    xfip->xfi_elen = sp - ep;
 	}
 
 	if (*sp != '}') {
 	    xo_failure(xop, "missing closing '}': %s", xo_printable(fmt));
 	    return -1;
 	}
 
 	xfip->xfi_len = sp - xfip->xfi_start;
 	xfip->xfi_next = ++sp;
 
 	/* If we have content, then we have a default format */
 	if (xfip->xfi_clen || format || (xfip->xfi_flags & XFF_ARGUMENT)) {
 	    if (format) {
 		xfip->xfi_format = format;
 		xfip->xfi_flen = flen;
 	    } else if (xo_role_wants_default_format(xfip->xfi_ftype)) {
 		xfip->xfi_format = xo_default_format;
 		xfip->xfi_flen = 2;
 	    }
 	}
 
 	cp = sp;
     }
 
     int rc = 0;
 
     /*
      * If we saw a field number on at least one field, then we need
      * to enforce some rules and/or guidelines.
      */
     if (seen_fnum)
 	rc = xo_parse_field_numbers(xop, fmt, fields, field);
 
     return rc;
 }
 
 /*
  * We are passed a pointer to a format string just past the "{G:}"
  * field.  We build a simplified version of the format string.
  */
 static int
 xo_gettext_simplify_format (xo_handle_t *xop UNUSED,
 		       xo_buffer_t *xbp,
 		       xo_field_info_t *fields,
 		       int this_field,
 		       const char *fmt UNUSED,
 		       xo_simplify_field_func_t field_cb)
 {
     unsigned ftype;
     xo_xff_flags_t flags;
     int field = this_field + 1;
     xo_field_info_t *xfip;
     char ch;
 
     for (xfip = &fields[field]; xfip->xfi_ftype; xfip++, field++) {
 	ftype = xfip->xfi_ftype;
 	flags = xfip->xfi_flags;
 
 	if ((flags & XFF_GT_FIELD) && xfip->xfi_content && ftype != 'V') {
 	    if (field_cb)
 		field_cb(xfip->xfi_content, xfip->xfi_clen,
 			 (flags & XFF_GT_PLURAL) ? 1 : 0);
 	}
 
 	switch (ftype) {
 	case 'G':
 	    /* Ignore gettext roles */
 	    break;
 
 	case XO_ROLE_NEWLINE:
 	    xo_buf_append(xbp, "\n", 1);
 	    break;
 
 	case XO_ROLE_EBRACE:
 	    xo_buf_append(xbp, "{", 1);
 	    xo_buf_append(xbp, xfip->xfi_content, xfip->xfi_clen);
 	    xo_buf_append(xbp, "}", 1);
 	    break;
 
 	case XO_ROLE_TEXT:
 	    xo_buf_append(xbp, xfip->xfi_content, xfip->xfi_clen);
 	    break;
 
 	default:
 	    xo_buf_append(xbp, "{", 1);
 	    if (ftype != 'V') {
 		ch = ftype;
 		xo_buf_append(xbp, &ch, 1);
 	    }
 
 	    unsigned fnum = xfip->xfi_fnum ?: 0;
 	    if (fnum) {
 		char num[12];
 		/* Field numbers are origin 1, not 0, following printf(3) */
 		snprintf(num, sizeof(num), "%u", fnum);
 		xo_buf_append(xbp, num, strlen(num));
 	    }
 
 	    xo_buf_append(xbp, ":", 1);
 	    xo_buf_append(xbp, xfip->xfi_content, xfip->xfi_clen);
 	    xo_buf_append(xbp, "}", 1);
 	}
     }
 
     xo_buf_append(xbp, "", 1);
     return 0;
 }
 
 void
 xo_dump_fields (xo_field_info_t *); /* Fake prototype for debug function */
 void
 xo_dump_fields (xo_field_info_t *fields)
 {
     xo_field_info_t *xfip;
 
     for (xfip = fields; xfip->xfi_ftype; xfip++) {
 	printf("%lu(%u): %lx [%c/%u] [%.*s] [%.*s] [%.*s]\n",
 	       (unsigned long) (xfip - fields), xfip->xfi_fnum,
 	       (unsigned long) xfip->xfi_flags,
 	       isprint((int) xfip->xfi_ftype) ? xfip->xfi_ftype : ' ',
 	       xfip->xfi_ftype,
 	       (int) xfip->xfi_clen, xfip->xfi_content ?: "", 
 	       (int) xfip->xfi_flen, xfip->xfi_format ?: "", 
 	       (int) xfip->xfi_elen, xfip->xfi_encoding ?: "");
     }
 }
 
 #ifdef HAVE_GETTEXT
 /*
  * Find the field that matches the given field number
  */
 static xo_field_info_t *
 xo_gettext_find_field (xo_field_info_t *fields, unsigned fnum)
 {
     xo_field_info_t *xfip;
 
     for (xfip = fields; xfip->xfi_ftype; xfip++)
 	if (xfip->xfi_fnum == fnum)
 	    return xfip;
 
     return NULL;
 }
 
 /*
  * At this point, we need to consider if the fields have been reordered,
  * such as "The {:adjective} {:noun}" to "La {:noun} {:adjective}".
  *
  * We need to rewrite the new_fields using the old fields order,
  * so that we can render the message using the arguments as they
  * appear on the stack.  It's a lot of work, but we don't really
  * want to (eventually) fall into the standard printf code which
  * means using the arguments straight (and in order) from the
  * varargs we were originally passed.
  */
 static void
 xo_gettext_rewrite_fields (xo_handle_t *xop UNUSED,
 			   xo_field_info_t *fields, unsigned max_fields)
 {
     xo_field_info_t tmp[max_fields];
     bzero(tmp, max_fields * sizeof(tmp[0]));
 
     unsigned fnum = 0;
     xo_field_info_t *newp, *outp, *zp;
     for (newp = fields, outp = tmp; newp->xfi_ftype; newp++, outp++) {
 	switch (newp->xfi_ftype) {
 	case XO_ROLE_NEWLINE:	/* Don't get numbered */
 	case XO_ROLE_TEXT:
 	case XO_ROLE_EBRACE:
 	case 'G':
 	    *outp = *newp;
 	    outp->xfi_renum = 0;
 	    continue;
 	}
 
 	zp = xo_gettext_find_field(fields, ++fnum);
 	if (zp == NULL) { 	/* Should not occur */
 	    *outp = *newp;
 	    outp->xfi_renum = 0;
 	    continue;
 	}
 
 	*outp = *zp;
 	outp->xfi_renum = newp->xfi_fnum;
     }
 
     memcpy(fields, tmp, max_fields * sizeof(tmp[0]));
 }
 
 /*
  * We've got two lists of fields, the old list from the original
  * format string and the new one from the parsed gettext reply.  The
  * new list has the localized words, where the old list has the
  * formatting information.  We need to combine them into a single list
  * (the new list).
  *
  * If the list needs to be reordered, then we've got more serious work
  * to do.
  */
 static int
 xo_gettext_combine_formats (xo_handle_t *xop, const char *fmt UNUSED,
 		    const char *gtfmt, xo_field_info_t *old_fields,
 		    xo_field_info_t *new_fields, unsigned new_max_fields,
 		    int *reorderedp)
 {
     int reordered = 0;
     xo_field_info_t *newp, *oldp, *startp = old_fields;
 
     xo_gettext_finish_numbering_fields(xop, fmt, old_fields);
 
     for (newp = new_fields; newp->xfi_ftype; newp++) {
 	switch (newp->xfi_ftype) {
 	case XO_ROLE_NEWLINE:
 	case XO_ROLE_TEXT:
 	case XO_ROLE_EBRACE:
 	    continue;
 
 	case 'V':
 	    for (oldp = startp; oldp->xfi_ftype; oldp++) {
 		if (oldp->xfi_ftype != 'V')
 		    continue;
 		if (newp->xfi_clen != oldp->xfi_clen
 		    || strncmp(newp->xfi_content, oldp->xfi_content,
 			       oldp->xfi_clen) != 0) {
 		    reordered = 1;
 		    continue;
 		}
 		startp = oldp + 1;
 		break;
 	    }
 
 	    /* Didn't find it on the first pass (starting from start) */
 	    if (oldp->xfi_ftype == 0) {
 		for (oldp = old_fields; oldp < startp; oldp++) {
 		    if (oldp->xfi_ftype != 'V')
 			continue;
 		    if (newp->xfi_clen != oldp->xfi_clen)
 			continue;
 		    if (strncmp(newp->xfi_content, oldp->xfi_content,
 				oldp->xfi_clen) != 0)
 			continue;
 		    reordered = 1;
 		    break;
 		}
 		if (oldp == startp) {
 		    /* Field not found */
 		    xo_failure(xop, "post-gettext format can't find field "
 			       "'%.*s' in format '%s'",
 			       newp->xfi_clen, newp->xfi_content,
 			       xo_printable(gtfmt));
 		    return -1;
 		}
 	    }
 	    break;
 
 	default:
 	    /*
 	     * Other fields don't have names for us to use, so if
 	     * the types aren't the same, then we'll have to assume
 	     * the original field is a match.
 	     */
 	    for (oldp = startp; oldp->xfi_ftype; oldp++) {
 		if (oldp->xfi_ftype == 'V') /* Can't go past these */
 		    break;
 		if (oldp->xfi_ftype == newp->xfi_ftype)
 		    goto copy_it; /* Assumably we have a match */
 	    }
 	    continue;
 	}
 
 	/*
 	 * Found a match; copy over appropriate fields
 	 */
     copy_it:
 	newp->xfi_flags = oldp->xfi_flags;
 	newp->xfi_fnum = oldp->xfi_fnum;
 	newp->xfi_format = oldp->xfi_format;
 	newp->xfi_flen = oldp->xfi_flen;
 	newp->xfi_encoding = oldp->xfi_encoding;
 	newp->xfi_elen = oldp->xfi_elen;
     }
 
     *reorderedp = reordered;
     if (reordered) {
 	xo_gettext_finish_numbering_fields(xop, fmt, new_fields);
 	xo_gettext_rewrite_fields(xop, new_fields, new_max_fields);
     }
 
     return 0;
 }
 
 /*
  * We don't want to make gettext() calls here with a complete format
  * string, since that means changing a flag would mean a
  * labor-intensive re-translation expense.  Instead we build a
  * simplified form with a reduced level of detail, perform a lookup on
  * that string and then re-insert the formating info.
  *
  * So something like:
  *   xo_emit("{G:}close {:fd/%ld} returned {g:error/%m} {:test/%6.6s}\n", ...)
  * would have a lookup string of:
  *   "close {:fd} returned {:error} {:test}\n"
  *
  * We also need to handling reordering of fields, where the gettext()
  * reply string uses fields in a different order than the original
  * format string:
  *   "cluse-a {:fd} retoorned {:test}.  Bork {:error} Bork. Bork.\n"
  * If we have to reorder fields within the message, then things get
  * complicated.  See xo_gettext_rewrite_fields.
  *
  * Summary: i18n aighn't cheap.
  */
 static const char *
 xo_gettext_build_format (xo_handle_t *xop,
 			 xo_field_info_t *fields, int this_field,
 			 const char *fmt, char **new_fmtp)
 {
     if (xo_style_is_encoding(xop))
 	goto bail;
 
     xo_buffer_t xb;
     xo_buf_init(&xb);
 
     if (xo_gettext_simplify_format(xop, &xb, fields,
 				   this_field, fmt, NULL))
 	goto bail2;
 
     const char *gtfmt = xo_dgettext(xop, xb.xb_bufp);
     if (gtfmt == NULL || gtfmt == fmt || xo_streq(gtfmt, fmt))
 	goto bail2;
 
     char *new_fmt = xo_strndup(gtfmt, -1);
     if (new_fmt == NULL)
 	goto bail2;
 
     xo_buf_cleanup(&xb);
 
     *new_fmtp = new_fmt;
     return new_fmt;
 
  bail2:
 	xo_buf_cleanup(&xb);
  bail:
     *new_fmtp = NULL;
     return fmt;
 }
 
 static void
 xo_gettext_rebuild_content (xo_handle_t *xop, xo_field_info_t *fields,
 			    ssize_t *fstart, unsigned min_fstart,
 			    ssize_t *fend, unsigned max_fend)
 {
     xo_field_info_t *xfip;
     char *buf;
     ssize_t base = fstart[min_fstart];
     ssize_t blen = fend[max_fend] - base;
     xo_buffer_t *xbp = &xop->xo_data;
 
     if (blen == 0)
 	return;
 
     buf = xo_realloc(NULL, blen);
     if (buf == NULL)
 	return;
 
     memcpy(buf, xbp->xb_bufp + fstart[min_fstart], blen); /* Copy our data */
 
     unsigned field = min_fstart, len, fnum;
     ssize_t soff, doff = base;
     xo_field_info_t *zp;
 
     /*
      * Be aware there are two competing views of "field number": we
      * want the user to thing in terms of "The {1:size}" where {G:},
      * newlines, escaped braces, and text don't have numbers.  But is
      * also the internal view, where we have an array of
      * xo_field_info_t and every field have an index.  fnum, fstart[]
      * and fend[] are the latter, but xfi_renum is the former.
      */
     for (xfip = fields + field; xfip->xfi_ftype; xfip++, field++) {
 	fnum = field;
 	if (xfip->xfi_renum) {
 	    zp = xo_gettext_find_field(fields, xfip->xfi_renum);
 	    fnum = zp ? zp - fields : field;
 	}
 
 	soff = fstart[fnum];
 	len = fend[fnum] - soff;
 
 	if (len > 0) {
 	    soff -= base;
 	    memcpy(xbp->xb_bufp + doff, buf + soff, len);
 	    doff += len;
 	}
     }
 
     xo_free(buf);
 }
 #else  /* HAVE_GETTEXT */
 static const char *
 xo_gettext_build_format (xo_handle_t *xop UNUSED,
 			 xo_field_info_t *fields UNUSED,
 			 int this_field UNUSED,
 			 const char *fmt UNUSED, char **new_fmtp)
 {
     *new_fmtp = NULL;
     return fmt;
 }
 
 static int
 xo_gettext_combine_formats (xo_handle_t *xop UNUSED, const char *fmt UNUSED,
 		    const char *gtfmt UNUSED,
 		    xo_field_info_t *old_fields UNUSED,
 		    xo_field_info_t *new_fields UNUSED,
 		    unsigned new_max_fields UNUSED,
 		    int *reorderedp UNUSED)
 {
     return -1;
 }
 
 static void
 xo_gettext_rebuild_content (xo_handle_t *xop UNUSED,
 		    xo_field_info_t *fields UNUSED,
 		    ssize_t *fstart UNUSED, unsigned min_fstart UNUSED,
 		    ssize_t *fend UNUSED, unsigned max_fend UNUSED)
 {
     return;
 }
 #endif /* HAVE_GETTEXT */
 
 /*
  * Emit a set of fields.  This is really the core of libxo.
  */
 static ssize_t
 xo_do_emit_fields (xo_handle_t *xop, xo_field_info_t *fields,
 		   unsigned max_fields, const char *fmt)
 {
     int gettext_inuse = 0;
     int gettext_changed = 0;
     int gettext_reordered = 0;
     unsigned ftype;
     xo_xff_flags_t flags;
     xo_field_info_t *new_fields = NULL;
     xo_field_info_t *xfip;
     unsigned field;
     ssize_t rc = 0;
 
     int flush = XOF_ISSET(xop, XOF_FLUSH);
     int flush_line = XOF_ISSET(xop, XOF_FLUSH_LINE);
     char *new_fmt = NULL;
 
     if (XOIF_ISSET(xop, XOIF_REORDER) || xo_style(xop) == XO_STYLE_ENCODER)
 	flush_line = 0;
 
     /*
      * Some overhead for gettext; if the fields in the msgstr returned
      * by gettext are reordered, then we need to record start and end
      * for each field.  We'll go ahead and render the fields in the
      * normal order, but later we can then reconstruct the reordered
      * fields using these fstart/fend values.
      */
     unsigned flimit = max_fields * 2; /* Pessimistic limit */
     unsigned min_fstart = flimit - 1;
     unsigned max_fend = 0;	      /* Highest recorded fend[] entry */
     ssize_t fstart[flimit];
     bzero(fstart, flimit * sizeof(fstart[0]));
     ssize_t fend[flimit];
     bzero(fend, flimit * sizeof(fend[0]));
 
     for (xfip = fields, field = 0; field < max_fields && xfip->xfi_ftype;
 	 xfip++, field++) {
 	ftype = xfip->xfi_ftype;
 	flags = xfip->xfi_flags;
 
 	/* Record field start offset */
 	if (gettext_reordered) {
 	    fstart[field] = xo_buf_offset(&xop->xo_data);
 	    if (min_fstart > field)
 		min_fstart = field;
 	}
 
 	const char *content = xfip->xfi_content;
 	ssize_t clen = xfip->xfi_clen;
 
 	if (flags & XFF_ARGUMENT) {
 	    /*
 	     * Argument flag means the content isn't given in the descriptor,
 	     * but as a UTF-8 string ('const char *') argument in xo_vap.
 	     */
 	    content = va_arg(xop->xo_vap, char *);
 	    clen = content ? strlen(content) : 0;
 	}
 
 	if (ftype == XO_ROLE_NEWLINE) {
 	    xo_line_close(xop);
 	    if (flush_line && xo_flush_h(xop) < 0)
 		return -1;
 	    goto bottom;
 
 	} else if (ftype == XO_ROLE_EBRACE) {
 	    xo_format_text(xop, xfip->xfi_start, xfip->xfi_len);
 	    goto bottom;
 
 	} else if (ftype == XO_ROLE_TEXT) {
 	    /* Normal text */
 	    xo_format_text(xop, xfip->xfi_content, xfip->xfi_clen);
 	    goto bottom;
 	}
 
 	/*
 	 * Notes and units need the 'w' flag handled before the content.
 	 */
 	if (ftype == 'N' || ftype == 'U') {
 	    if (flags & XFF_WS) {
 		xo_format_content(xop, "padding", NULL, " ", 1,
 				  NULL, 0, flags);
 		flags &= ~XFF_WS; /* Prevent later handling of this flag */
 	    }
 	}
 
 	if (ftype == 'V')
 	    xo_format_value(xop, content, clen, NULL, 0,
 			    xfip->xfi_format, xfip->xfi_flen,
 			    xfip->xfi_encoding, xfip->xfi_elen, flags);
 	else if (ftype == '[')
 	    xo_anchor_start(xop, xfip, content, clen);
 	else if (ftype == ']')
 	    xo_anchor_stop(xop, xfip, content, clen);
 	else if (ftype == 'C')
 	    xo_format_colors(xop, xfip, content, clen);
 
 	else if (ftype == 'G') {
 	    /*
 	     * A {G:domain} field; disect the domain name and translate
 	     * the remaining portion of the input string.  If the user
 	     * didn't put the {G:} at the start of the format string, then
 	     * assumably they just want us to translate the rest of it.
 	     * Since gettext returns strings in a static buffer, we make
 	     * a copy in new_fmt.
 	     */
 	    xo_set_gettext_domain(xop, xfip, content, clen);
 
 	    if (!gettext_inuse) { /* Only translate once */
 		gettext_inuse = 1;
 		if (new_fmt) {
 		    xo_free(new_fmt);
 		    new_fmt = NULL;
 		}
 
 		xo_gettext_build_format(xop, fields, field,
 					xfip->xfi_next, &new_fmt);
 		if (new_fmt) {
 		    gettext_changed = 1;
 
 		    unsigned new_max_fields = xo_count_fields(xop, new_fmt);
 
 		    if (++new_max_fields < max_fields)
 			new_max_fields = max_fields;
 
 		    /* Leave a blank slot at the beginning */
 		    ssize_t sz = (new_max_fields + 1) * sizeof(xo_field_info_t);
 		    new_fields = alloca(sz);
 		    bzero(new_fields, sz);
 
 		    if (!xo_parse_fields(xop, new_fields + 1,
 					 new_max_fields, new_fmt)) {
 			gettext_reordered = 0;
 
 			if (!xo_gettext_combine_formats(xop, fmt, new_fmt,
 					fields, new_fields + 1,
 					new_max_fields, &gettext_reordered)) {
 
 			    if (gettext_reordered) {
 				if (XOF_ISSET(xop, XOF_LOG_GETTEXT))
 				    xo_failure(xop, "gettext finds reordered "
 					       "fields in '%s' and '%s'",
 					       xo_printable(fmt),
 					       xo_printable(new_fmt));
 				flush_line = 0; /* Must keep at content */
 				XOIF_SET(xop, XOIF_REORDER);
 			    }
 
 			    field = -1; /* Will be incremented at top of loop */
 			    xfip = new_fields;
 			    max_fields = new_max_fields;
 			}
 		    }
 		}
 	    }
 	    continue;
 
 	} else  if (clen || xfip->xfi_format) {
 
 	    const char *class_name = xo_class_name(ftype);
 	    if (class_name)
 		xo_format_content(xop, class_name, xo_tag_name(ftype),
 				  content, clen,
 				  xfip->xfi_format, xfip->xfi_flen, flags);
 	    else if (ftype == 'T')
 		xo_format_title(xop, xfip, content, clen);
 	    else if (ftype == 'U')
 		xo_format_units(xop, xfip, content, clen);
 	    else
 		xo_failure(xop, "unknown field type: '%c'", ftype);
 	}
 
 	if (flags & XFF_COLON)
 	    xo_format_content(xop, "decoration", NULL, ":", 1, NULL, 0, 0);
 
 	if (flags & XFF_WS)
 	    xo_format_content(xop, "padding", NULL, " ", 1, NULL, 0, 0);
 
     bottom:
 	/* Record the end-of-field offset */
 	if (gettext_reordered) {
 	    fend[field] = xo_buf_offset(&xop->xo_data);
 	    max_fend = field;
 	}
     }
 
     if (gettext_changed && gettext_reordered) {
 	/* Final step: rebuild the content using the rendered fields */
 	xo_gettext_rebuild_content(xop, new_fields + 1, fstart, min_fstart,
 				   fend, max_fend);
     }
 
     XOIF_CLEAR(xop, XOIF_REORDER);
 
     /*
      * If we've got enough data, flush it.
      */
     if (xo_buf_offset(&xop->xo_data) > XO_BUF_HIGH_WATER)
 	flush = 1;
 
     /* If we don't have an anchor, write the text out */
     if (flush && !XOIF_ISSET(xop, XOIF_ANCHOR)) {
 	if (xo_flush_h(xop) < 0)
 	    rc = -1;
     }
 
     if (new_fmt)
 	xo_free(new_fmt);
 
     /*
      * We've carried the gettext domainname inside our handle just for
      * convenience, but we need to ensure it doesn't survive across
      * xo_emit calls.
      */
     if (xop->xo_gt_domain) {
 	xo_free(xop->xo_gt_domain);
 	xop->xo_gt_domain = NULL;
     }
 
     return (rc < 0) ? rc : xop->xo_columns;
 }
 
 /*
  * Parse and emit a set of fields
  */
 static int
 xo_do_emit (xo_handle_t *xop, xo_emit_flags_t flags, const char *fmt)
 {
     xop->xo_columns = 0;	/* Always reset it */
     xop->xo_errno = errno;	/* Save for "%m" */
 
     if (fmt == NULL)
 	return 0;
 
     unsigned max_fields;
     xo_field_info_t *fields = NULL;
 
     /* Adjust XOEF_RETAIN based on global flags */
     if (XOF_ISSET(xop, XOF_RETAIN_ALL))
 	flags |= XOEF_RETAIN;
     if (XOF_ISSET(xop, XOF_RETAIN_NONE))
 	flags &= ~XOEF_RETAIN;
 
     /*
      * Check for 'retain' flag, telling us to retain the field
      * information.  If we've already saved it, then we can avoid
      * re-parsing the format string.
      */
     if (!(flags & XOEF_RETAIN)
 	|| xo_retain_find(fmt, &fields, &max_fields) != 0
 	|| fields == NULL) {
 
 	/* Nothing retained; parse the format string */
 	max_fields = xo_count_fields(xop, fmt);
 	fields = alloca(max_fields * sizeof(fields[0]));
 	bzero(fields, max_fields * sizeof(fields[0]));
 
 	if (xo_parse_fields(xop, fields, max_fields, fmt))
 	    return -1;		/* Warning already displayed */
 
 	if (flags & XOEF_RETAIN) {
 	    /* Retain the info */
 	    xo_retain_add(fmt, fields, max_fields);
 	}
     }
 
     return xo_do_emit_fields(xop, fields, max_fields, fmt);
 }
 
 /*
  * Rebuild a format string in a gettext-friendly format.  This function
  * is exposed to tools can perform this function.  See xo(1).
  */
 char *
 xo_simplify_format (xo_handle_t *xop, const char *fmt, int with_numbers,
 		    xo_simplify_field_func_t field_cb)
 {
     xop = xo_default(xop);
 
     xop->xo_columns = 0;	/* Always reset it */
     xop->xo_errno = errno;	/* Save for "%m" */
 
     unsigned max_fields = xo_count_fields(xop, fmt);
     xo_field_info_t fields[max_fields];
 
     bzero(fields, max_fields * sizeof(fields[0]));
 
     if (xo_parse_fields(xop, fields, max_fields, fmt))
 	return NULL;		/* Warning already displayed */
 
     xo_buffer_t xb;
     xo_buf_init(&xb);
 
     if (with_numbers)
 	xo_gettext_finish_numbering_fields(xop, fmt, fields);
 
     if (xo_gettext_simplify_format(xop, &xb, fields, -1, fmt, field_cb))
 	return NULL;
 
     return xb.xb_bufp;
 }
 
 xo_ssize_t
 xo_emit_hv (xo_handle_t *xop, const char *fmt, va_list vap)
 {
     ssize_t rc;
 
     xop = xo_default(xop);
     va_copy(xop->xo_vap, vap);
     rc = xo_do_emit(xop, 0, fmt);
     va_end(xop->xo_vap);
     bzero(&xop->xo_vap, sizeof(xop->xo_vap));
 
     return rc;
 }
 
 xo_ssize_t
 xo_emit_h (xo_handle_t *xop, const char *fmt, ...)
 {
     ssize_t rc;
 
     xop = xo_default(xop);
     va_start(xop->xo_vap, fmt);
     rc = xo_do_emit(xop, 0, fmt);
     va_end(xop->xo_vap);
     bzero(&xop->xo_vap, sizeof(xop->xo_vap));
 
     return rc;
 }
 
 xo_ssize_t
 xo_emit (const char *fmt, ...)
 {
     xo_handle_t *xop = xo_default(NULL);
     ssize_t rc;
 
     va_start(xop->xo_vap, fmt);
     rc = xo_do_emit(xop, 0, fmt);
     va_end(xop->xo_vap);
     bzero(&xop->xo_vap, sizeof(xop->xo_vap));
 
     return rc;
 }
 
 xo_ssize_t
 xo_emit_hvf (xo_handle_t *xop, xo_emit_flags_t flags,
 	     const char *fmt, va_list vap)
 {
     ssize_t rc;
 
     xop = xo_default(xop);
     va_copy(xop->xo_vap, vap);
     rc = xo_do_emit(xop, flags, fmt);
     va_end(xop->xo_vap);
     bzero(&xop->xo_vap, sizeof(xop->xo_vap));
 
     return rc;
 }
 
 xo_ssize_t
 xo_emit_hf (xo_handle_t *xop, xo_emit_flags_t flags, const char *fmt, ...)
 {
     ssize_t rc;
 
     xop = xo_default(xop);
     va_start(xop->xo_vap, fmt);
     rc = xo_do_emit(xop, flags, fmt);
     va_end(xop->xo_vap);
     bzero(&xop->xo_vap, sizeof(xop->xo_vap));
 
     return rc;
 }
 
 xo_ssize_t
 xo_emit_f (xo_emit_flags_t flags, const char *fmt, ...)
 {
     xo_handle_t *xop = xo_default(NULL);
     ssize_t rc;
 
     va_start(xop->xo_vap, fmt);
     rc = xo_do_emit(xop, flags, fmt);
     va_end(xop->xo_vap);
     bzero(&xop->xo_vap, sizeof(xop->xo_vap));
 
     return rc;
 }
 
 /*
  * Emit a single field by providing the info information typically provided
  * inside the field description (role, modifiers, and formats).  This is
  * a convenience function to avoid callers using snprintf to build field
  * descriptions.
  */
 xo_ssize_t
 xo_emit_field_hv (xo_handle_t *xop, const char *rolmod, const char *contents,
 		  const char *fmt, const char *efmt,
 		  va_list vap)
 {
     ssize_t rc;
 
     xop = xo_default(xop);
 
     if (rolmod == NULL)
 	rolmod = "V";
 
     xo_field_info_t xfi;
 
     bzero(&xfi, sizeof(xfi));
 
     const char *cp;
     cp = xo_parse_roles(xop, rolmod, rolmod, &xfi);
     if (cp == NULL)
 	return -1;
 
     xfi.xfi_start = fmt;
     xfi.xfi_content = contents;
     xfi.xfi_format = fmt;
     xfi.xfi_encoding = efmt;
     xfi.xfi_clen = contents ? strlen(contents) : 0;
     xfi.xfi_flen = fmt ? strlen(fmt) : 0;
     xfi.xfi_elen = efmt ? strlen(efmt) : 0;
 
     /* If we have content, then we have a default format */
     if (contents && fmt == NULL
 		&& xo_role_wants_default_format(xfi.xfi_ftype)) {
 	xfi.xfi_format = xo_default_format;
 	xfi.xfi_flen = 2;
     }
 
     va_copy(xop->xo_vap, vap);
 
     rc = xo_do_emit_fields(xop, &xfi, 1, fmt ?: contents ?: "field");
 
     va_end(xop->xo_vap);
 
     return rc;
 }
 
 xo_ssize_t
 xo_emit_field_h (xo_handle_t *xop, const char *rolmod, const char *contents,
 		 const char *fmt, const char *efmt, ...)
 {
     ssize_t rc;
     va_list vap;
 
     va_start(vap, efmt);
     rc = xo_emit_field_hv(xop, rolmod, contents, fmt, efmt, vap);
     va_end(vap);
 
     return rc;
 }
 
 xo_ssize_t
 xo_emit_field (const char *rolmod, const char *contents,
 	       const char *fmt, const char *efmt, ...)
 {
     ssize_t rc;
     va_list vap;
 
     va_start(vap, efmt);
     rc = xo_emit_field_hv(NULL, rolmod, contents, fmt, efmt, vap);
     va_end(vap);
 
     return rc;
 }
 
 xo_ssize_t
 xo_attr_hv (xo_handle_t *xop, const char *name, const char *fmt, va_list vap)
 {
     const ssize_t extra = 5; 	/* space, equals, quote, quote, and nul */
     xop = xo_default(xop);
 
     ssize_t rc = 0;
     ssize_t nlen = strlen(name);
     xo_buffer_t *xbp = &xop->xo_attrs;
     ssize_t name_offset, value_offset;
 
     switch (xo_style(xop)) {
     case XO_STYLE_XML:
 	if (!xo_buf_has_room(xbp, nlen + extra))
 	    return -1;
 
 	*xbp->xb_curp++ = ' ';
 	memcpy(xbp->xb_curp, name, nlen);
 	xbp->xb_curp += nlen;
 	*xbp->xb_curp++ = '=';
 	*xbp->xb_curp++ = '"';
 
 	rc = xo_vsnprintf(xop, xbp, fmt, vap);
 
 	if (rc >= 0) {
 	    rc = xo_escape_xml(xbp, rc, 1);
 	    xbp->xb_curp += rc;
 	}
 
 	if (!xo_buf_has_room(xbp, 2))
 	    return -1;
 
 	*xbp->xb_curp++ = '"';
 	*xbp->xb_curp = '\0';
 
 	rc += nlen + extra;
 	break;
 
     case XO_STYLE_ENCODER:
 	name_offset = xo_buf_offset(xbp);
 	xo_buf_append(xbp, name, nlen);
 	xo_buf_append(xbp, "", 1);
 
 	value_offset = xo_buf_offset(xbp);
 	rc = xo_vsnprintf(xop, xbp, fmt, vap);
 	if (rc >= 0) {
 	    xbp->xb_curp += rc;
 	    *xbp->xb_curp = '\0';
 	    rc = xo_encoder_handle(xop, XO_OP_ATTRIBUTE,
 				   xo_buf_data(xbp, name_offset),
 				   xo_buf_data(xbp, value_offset), 0);
 	}
     }
 
     return rc;
 }
 
 xo_ssize_t
 xo_attr_h (xo_handle_t *xop, const char *name, const char *fmt, ...)
 {
     ssize_t rc;
     va_list vap;
 
     va_start(vap, fmt);
     rc = xo_attr_hv(xop, name, fmt, vap);
     va_end(vap);
 
     return rc;
 }
 
 xo_ssize_t
 xo_attr (const char *name, const char *fmt, ...)
 {
     ssize_t rc;
     va_list vap;
 
     va_start(vap, fmt);
     rc = xo_attr_hv(NULL, name, fmt, vap);
     va_end(vap);
 
     return rc;
 }
 
 static void
 xo_depth_change (xo_handle_t *xop, const char *name,
 		 int delta, int indent, xo_state_t state, xo_xsf_flags_t flags)
 {
     if (xo_style(xop) == XO_STYLE_HTML || xo_style(xop) == XO_STYLE_TEXT)
 	indent = 0;
 
     if (XOF_ISSET(xop, XOF_DTRT))
 	flags |= XSF_DTRT;
 
     if (delta >= 0) {			/* Push operation */
 	if (xo_depth_check(xop, xop->xo_depth + delta))
 	    return;
 
 	xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth + delta];
 	xsp->xs_flags = flags;
 	xsp->xs_state = state;
 	xo_stack_set_flags(xop);
 
 	if (name == NULL)
 	    name = XO_FAILURE_NAME;
 
 	xsp->xs_name = xo_strndup(name, -1);
 
     } else {			/* Pop operation */
 	if (xop->xo_depth == 0) {
 	    if (!XOF_ISSET(xop, XOF_IGNORE_CLOSE))
 		xo_failure(xop, "close with empty stack: '%s'", name);
 	    return;
 	}
 
 	xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
 	if (XOF_ISSET(xop, XOF_WARN)) {
 	    const char *top = xsp->xs_name;
 	    if (top != NULL && name != NULL && !xo_streq(name, top)) {
 		xo_failure(xop, "incorrect close: '%s' .vs. '%s'",
 			      name, top);
 		return;
 	    } 
 	    if ((xsp->xs_flags & XSF_LIST) != (flags & XSF_LIST)) {
 		xo_failure(xop, "list close on list confict: '%s'",
 			      name);
 		return;
 	    }
 	    if ((xsp->xs_flags & XSF_INSTANCE) != (flags & XSF_INSTANCE)) {
 		xo_failure(xop, "list close on instance confict: '%s'",
 			      name);
 		return;
 	    }
 	}
 
 	if (xsp->xs_name) {
 	    xo_free(xsp->xs_name);
 	    xsp->xs_name = NULL;
 	}
 	if (xsp->xs_keys) {
 	    xo_free(xsp->xs_keys);
 	    xsp->xs_keys = NULL;
 	}
     }
 
     xop->xo_depth += delta;	/* Record new depth */
     xop->xo_indent += indent;
 }
 
 void
 xo_set_depth (xo_handle_t *xop, int depth)
 {
     xop = xo_default(xop);
 
     if (xo_depth_check(xop, depth))
 	return;
 
     xop->xo_depth += depth;
     xop->xo_indent += depth;
 
     /*
      * Handling the "top wrapper" for JSON is a bit of a pain.  Here
      * we need to detect that the depth has been changed to set the
      * "XOIF_TOP_EMITTED" flag correctly.
      */
     if (xop->xo_style == XO_STYLE_JSON
 	&& !XOF_ISSET(xop, XOF_NO_TOP) && xop->xo_depth > 0)
 	XOIF_SET(xop, XOIF_TOP_EMITTED);
 }
 
 static xo_xsf_flags_t
 xo_stack_flags (xo_xof_flags_t xflags)
 {
     if (xflags & XOF_DTRT)
 	return XSF_DTRT;
     return 0;
 }
 
 static void
 xo_emit_top (xo_handle_t *xop, const char *ppn)
 {
     xo_printf(xop, "%*s{%s", xo_indent(xop), "", ppn);
     XOIF_SET(xop, XOIF_TOP_EMITTED);
 
     if (xop->xo_version) {
 	xo_printf(xop, "%*s\"__version\": \"%s\", %s",
 		  xo_indent(xop), "", xop->xo_version, ppn);
 	xo_free(xop->xo_version);
 	xop->xo_version = NULL;
     }
 }
 
 static ssize_t
 xo_do_open_container (xo_handle_t *xop, xo_xof_flags_t flags, const char *name)
 {
     ssize_t rc = 0;
     const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
     const char *pre_nl = "";
 
     if (name == NULL) {
 	xo_failure(xop, "NULL passed for container name");
 	name = XO_FAILURE_NAME;
     }
 
     const char *leader = xo_xml_leader(xop, name);
     flags |= xop->xo_flags;	/* Pick up handle flags */
 
     switch (xo_style(xop)) {
     case XO_STYLE_XML:
 	rc = xo_printf(xop, "%*s<%s%s", xo_indent(xop), "", leader, name);
 
 	if (xop->xo_attrs.xb_curp != xop->xo_attrs.xb_bufp) {
 	    rc += xop->xo_attrs.xb_curp - xop->xo_attrs.xb_bufp;
 	    xo_data_append(xop, xop->xo_attrs.xb_bufp,
 			   xop->xo_attrs.xb_curp - xop->xo_attrs.xb_bufp);
 	    xop->xo_attrs.xb_curp = xop->xo_attrs.xb_bufp;
 	}
 
 	rc += xo_printf(xop, ">%s", ppn);
 	break;
 
     case XO_STYLE_JSON:
 	xo_stack_set_flags(xop);
 
 	if (!XOF_ISSET(xop, XOF_NO_TOP)
 	        && !XOIF_ISSET(xop, XOIF_TOP_EMITTED))
 	    xo_emit_top(xop, ppn);
 
 	if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST)
 	    pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? ",\n" : ", ";
 	xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
 
 	rc = xo_printf(xop, "%s%*s\"%s\": {%s",
 		       pre_nl, xo_indent(xop), "", name, ppn);
 	break;
 
     case XO_STYLE_SDPARAMS:
 	break;
 
     case XO_STYLE_ENCODER:
 	rc = xo_encoder_handle(xop, XO_OP_OPEN_CONTAINER, name, NULL, flags);
 	break;
     }
 
     xo_depth_change(xop, name, 1, 1, XSS_OPEN_CONTAINER,
 		    xo_stack_flags(flags));
 
     return rc;
 }
 
 xo_ssize_t
 xo_open_container_hf (xo_handle_t *xop, xo_xof_flags_t flags, const char *name)
 {
     return xo_transition(xop, flags, name, XSS_OPEN_CONTAINER);
 }
 
 xo_ssize_t
 xo_open_container_h (xo_handle_t *xop, const char *name)
 {
     return xo_open_container_hf(xop, 0, name);
 }
 
 xo_ssize_t
 xo_open_container (const char *name)
 {
     return xo_open_container_hf(NULL, 0, name);
 }
 
 xo_ssize_t
 xo_open_container_hd (xo_handle_t *xop, const char *name)
 {
     return xo_open_container_hf(xop, XOF_DTRT, name);
 }
 
 xo_ssize_t
 xo_open_container_d (const char *name)
 {
     return xo_open_container_hf(NULL, XOF_DTRT, name);
 }
 
 static int
 xo_do_close_container (xo_handle_t *xop, const char *name)
 {
     xop = xo_default(xop);
 
     ssize_t rc = 0;
     const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
     const char *pre_nl = "";
 
     if (name == NULL) {
 	xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
 
 	name = xsp->xs_name;
 	if (name) {
 	    ssize_t len = strlen(name) + 1;
 	    /* We need to make a local copy; xo_depth_change will free it */
 	    char *cp = alloca(len);
 	    memcpy(cp, name, len);
 	    name = cp;
 	} else if (!(xsp->xs_flags & XSF_DTRT)) {
 	    xo_failure(xop, "missing name without 'dtrt' mode");
 	    name = XO_FAILURE_NAME;
 	}
     }
 
     const char *leader = xo_xml_leader(xop, name);
 
     switch (xo_style(xop)) {
     case XO_STYLE_XML:
 	xo_depth_change(xop, name, -1, -1, XSS_CLOSE_CONTAINER, 0);
 	rc = xo_printf(xop, "%*s</%s%s>%s", xo_indent(xop), "", leader, name, ppn);
 	break;
 
     case XO_STYLE_JSON:
 	xo_stack_set_flags(xop);
 
 	pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
 	ppn = "";
 
 	xo_depth_change(xop, name, -1, -1, XSS_CLOSE_CONTAINER, 0);
 	rc = xo_printf(xop, "%s%*s}%s", pre_nl, xo_indent(xop), "", ppn);
 	xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
 	break;
 
     case XO_STYLE_HTML:
     case XO_STYLE_TEXT:
 	xo_depth_change(xop, name, -1, 0, XSS_CLOSE_CONTAINER, 0);
 	break;
 
     case XO_STYLE_SDPARAMS:
 	break;
 
     case XO_STYLE_ENCODER:
 	xo_depth_change(xop, name, -1, 0, XSS_CLOSE_CONTAINER, 0);
 	rc = xo_encoder_handle(xop, XO_OP_CLOSE_CONTAINER, name, NULL, 0);
 	break;
     }
 
     return rc;
 }
 
 xo_ssize_t
 xo_close_container_h (xo_handle_t *xop, const char *name)
 {
     return xo_transition(xop, 0, name, XSS_CLOSE_CONTAINER);
 }
 
 xo_ssize_t
 xo_close_container (const char *name)
 {
     return xo_close_container_h(NULL, name);
 }
 
 xo_ssize_t
 xo_close_container_hd (xo_handle_t *xop)
 {
     return xo_close_container_h(xop, NULL);
 }
 
 xo_ssize_t
 xo_close_container_d (void)
 {
     return xo_close_container_h(NULL, NULL);
 }
 
 static int
 xo_do_open_list (xo_handle_t *xop, xo_xof_flags_t flags, const char *name)
 {
     ssize_t rc = 0;
     int indent = 0;
 
     xop = xo_default(xop);
 
     const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
     const char *pre_nl = "";
 
     switch (xo_style(xop)) {
     case XO_STYLE_JSON:
 
 	indent = 1;
 	if (!XOF_ISSET(xop, XOF_NO_TOP)
 		&& !XOIF_ISSET(xop, XOIF_TOP_EMITTED))
 	    xo_emit_top(xop, ppn);
 
 	if (name == NULL) {
 	    xo_failure(xop, "NULL passed for list name");
 	    name = XO_FAILURE_NAME;
 	}
 
 	xo_stack_set_flags(xop);
 
 	if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST)
 	    pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? ",\n" : ", ";
 	xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
 
 	rc = xo_printf(xop, "%s%*s\"%s\": [%s",
 		       pre_nl, xo_indent(xop), "", name, ppn);
 	break;
 
     case XO_STYLE_ENCODER:
 	rc = xo_encoder_handle(xop, XO_OP_OPEN_LIST, name, NULL, flags);
 	break;
     }
 
     xo_depth_change(xop, name, 1, indent, XSS_OPEN_LIST,
 		    XSF_LIST | xo_stack_flags(flags));
 
     return rc;
 }
 
 xo_ssize_t
 xo_open_list_hf (xo_handle_t *xop, xo_xof_flags_t flags, const char *name)
 {
     return xo_transition(xop, flags, name, XSS_OPEN_LIST);
 }
 
 xo_ssize_t
 xo_open_list_h (xo_handle_t *xop, const char *name)
 {
     return xo_open_list_hf(xop, 0, name);
 }
 
 xo_ssize_t
 xo_open_list (const char *name)
 {
     return xo_open_list_hf(NULL, 0, name);
 }
 
 xo_ssize_t
 xo_open_list_hd (xo_handle_t *xop, const char *name)
 {
     return xo_open_list_hf(xop, XOF_DTRT, name);
 }
 
 xo_ssize_t
 xo_open_list_d (const char *name)
 {
     return xo_open_list_hf(NULL, XOF_DTRT, name);
 }
 
 static int
 xo_do_close_list (xo_handle_t *xop, const char *name)
 {
     ssize_t rc = 0;
     const char *pre_nl = "";
 
     if (name == NULL) {
 	xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
 
 	name = xsp->xs_name;
 	if (name) {
 	    ssize_t len = strlen(name) + 1;
 	    /* We need to make a local copy; xo_depth_change will free it */
 	    char *cp = alloca(len);
 	    memcpy(cp, name, len);
 	    name = cp;
 	} else if (!(xsp->xs_flags & XSF_DTRT)) {
 	    xo_failure(xop, "missing name without 'dtrt' mode");
 	    name = XO_FAILURE_NAME;
 	}
     }
 
     switch (xo_style(xop)) {
     case XO_STYLE_JSON:
 	if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST)
 	    pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
 	xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
 
 	xo_depth_change(xop, name, -1, -1, XSS_CLOSE_LIST, XSF_LIST);
 	rc = xo_printf(xop, "%s%*s]", pre_nl, xo_indent(xop), "");
 	xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
 	break;
 
     case XO_STYLE_ENCODER:
 	xo_depth_change(xop, name, -1, 0, XSS_CLOSE_LIST, XSF_LIST);
 	rc = xo_encoder_handle(xop, XO_OP_CLOSE_LIST, name, NULL, 0);
 	break;
 
     default:
 	xo_depth_change(xop, name, -1, 0, XSS_CLOSE_LIST, XSF_LIST);
 	xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
 	break;
     }
 
     return rc;
 }
 
 xo_ssize_t
 xo_close_list_h (xo_handle_t *xop, const char *name)
 {
     return xo_transition(xop, 0, name, XSS_CLOSE_LIST);
 }
 
 xo_ssize_t
 xo_close_list (const char *name)
 {
     return xo_close_list_h(NULL, name);
 }
 
 xo_ssize_t
 xo_close_list_hd (xo_handle_t *xop)
 {
     return xo_close_list_h(xop, NULL);
 }
 
 xo_ssize_t
 xo_close_list_d (void)
 {
     return xo_close_list_h(NULL, NULL);
 }
 
 static int
 xo_do_open_leaf_list (xo_handle_t *xop, xo_xof_flags_t flags, const char *name)
 {
     ssize_t rc = 0;
     int indent = 0;
 
     xop = xo_default(xop);
 
     const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
     const char *pre_nl = "";
 
     switch (xo_style(xop)) {
     case XO_STYLE_JSON:
 	indent = 1;
 
 	if (!XOF_ISSET(xop, XOF_NO_TOP)) {
 	    if (!XOIF_ISSET(xop, XOIF_TOP_EMITTED)) {
 		xo_printf(xop, "%*s{%s", xo_indent(xop), "", ppn);
 		XOIF_SET(xop, XOIF_TOP_EMITTED);
 	    }
 	}
 
 	if (name == NULL) {
 	    xo_failure(xop, "NULL passed for list name");
 	    name = XO_FAILURE_NAME;
 	}
 
 	xo_stack_set_flags(xop);
 
 	if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST)
 	    pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? ",\n" : ", ";
 	xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
 
 	rc = xo_printf(xop, "%s%*s\"%s\": [%s",
 		       pre_nl, xo_indent(xop), "", name, ppn);
 	break;
 
     case XO_STYLE_ENCODER:
 	rc = xo_encoder_handle(xop, XO_OP_OPEN_LEAF_LIST, name, NULL, flags);
 	break;
     }
 
     xo_depth_change(xop, name, 1, indent, XSS_OPEN_LEAF_LIST,
 		    XSF_LIST | xo_stack_flags(flags));
 
     return rc;
 }
 
 static int
 xo_do_close_leaf_list (xo_handle_t *xop, const char *name)
 {
     ssize_t rc = 0;
     const char *pre_nl = "";
 
     if (name == NULL) {
 	xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
 
 	name = xsp->xs_name;
 	if (name) {
 	    ssize_t len = strlen(name) + 1;
 	    /* We need to make a local copy; xo_depth_change will free it */
 	    char *cp = alloca(len);
 	    memcpy(cp, name, len);
 	    name = cp;
 	} else if (!(xsp->xs_flags & XSF_DTRT)) {
 	    xo_failure(xop, "missing name without 'dtrt' mode");
 	    name = XO_FAILURE_NAME;
 	}
     }
 
     switch (xo_style(xop)) {
     case XO_STYLE_JSON:
 	if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST)
 	    pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
 	xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
 
 	xo_depth_change(xop, name, -1, -1, XSS_CLOSE_LEAF_LIST, XSF_LIST);
 	rc = xo_printf(xop, "%s%*s]", pre_nl, xo_indent(xop), "");
 	xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
 	break;
 
     case XO_STYLE_ENCODER:
 	rc = xo_encoder_handle(xop, XO_OP_CLOSE_LEAF_LIST, name, NULL, 0);
 	/* FALLTHRU */
 
     default:
 	xo_depth_change(xop, name, -1, 0, XSS_CLOSE_LEAF_LIST, XSF_LIST);
 	xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
 	break;
     }
 
     return rc;
 }
 
 static int
 xo_do_open_instance (xo_handle_t *xop, xo_xof_flags_t flags, const char *name)
 {
     xop = xo_default(xop);
 
     ssize_t rc = 0;
     const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
     const char *pre_nl = "";
 
     if (name == NULL) {
 	xo_failure(xop, "NULL passed for instance name");
 	name = XO_FAILURE_NAME;
     }
 
     const char *leader = xo_xml_leader(xop, name);
     flags |= xop->xo_flags;
 
     switch (xo_style(xop)) {
     case XO_STYLE_XML:
 	rc = xo_printf(xop, "%*s<%s%s", xo_indent(xop), "", leader, name);
 
 	if (xop->xo_attrs.xb_curp != xop->xo_attrs.xb_bufp) {
 	    rc += xop->xo_attrs.xb_curp - xop->xo_attrs.xb_bufp;
 	    xo_data_append(xop, xop->xo_attrs.xb_bufp,
 			   xop->xo_attrs.xb_curp - xop->xo_attrs.xb_bufp);
 	    xop->xo_attrs.xb_curp = xop->xo_attrs.xb_bufp;
 	}
 
 	rc += xo_printf(xop, ">%s", ppn);
 	break;
 
     case XO_STYLE_JSON:
 	xo_stack_set_flags(xop);
 
 	if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST)
 	    pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? ",\n" : ", ";
 	xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
 
 	rc = xo_printf(xop, "%s%*s{%s",
 		       pre_nl, xo_indent(xop), "", ppn);
 	break;
 
     case XO_STYLE_SDPARAMS:
 	break;
 
     case XO_STYLE_ENCODER:
 	rc = xo_encoder_handle(xop, XO_OP_OPEN_INSTANCE, name, NULL, flags);
 	break;
     }
 
     xo_depth_change(xop, name, 1, 1, XSS_OPEN_INSTANCE, xo_stack_flags(flags));
 
     return rc;
 }
 
 xo_ssize_t
 xo_open_instance_hf (xo_handle_t *xop, xo_xof_flags_t flags, const char *name)
 {
     return xo_transition(xop, flags, name, XSS_OPEN_INSTANCE);
 }
 
 xo_ssize_t
 xo_open_instance_h (xo_handle_t *xop, const char *name)
 {
     return xo_open_instance_hf(xop, 0, name);
 }
 
 xo_ssize_t
 xo_open_instance (const char *name)
 {
     return xo_open_instance_hf(NULL, 0, name);
 }
 
 xo_ssize_t
 xo_open_instance_hd (xo_handle_t *xop, const char *name)
 {
     return xo_open_instance_hf(xop, XOF_DTRT, name);
 }
 
 xo_ssize_t
 xo_open_instance_d (const char *name)
 {
     return xo_open_instance_hf(NULL, XOF_DTRT, name);
 }
 
 static int
 xo_do_close_instance (xo_handle_t *xop, const char *name)
 {
     xop = xo_default(xop);
 
     ssize_t rc = 0;
     const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
     const char *pre_nl = "";
 
     if (name == NULL) {
 	xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
 
 	name = xsp->xs_name;
 	if (name) {
 	    ssize_t len = strlen(name) + 1;
 	    /* We need to make a local copy; xo_depth_change will free it */
 	    char *cp = alloca(len);
 	    memcpy(cp, name, len);
 	    name = cp;
 	} else if (!(xsp->xs_flags & XSF_DTRT)) {
 	    xo_failure(xop, "missing name without 'dtrt' mode");
 	    name = XO_FAILURE_NAME;
 	}
     }
 
     const char *leader = xo_xml_leader(xop, name);
 
     switch (xo_style(xop)) {
     case XO_STYLE_XML:
 	xo_depth_change(xop, name, -1, -1, XSS_CLOSE_INSTANCE, 0);
 	rc = xo_printf(xop, "%*s</%s%s>%s", xo_indent(xop), "", leader, name, ppn);
 	break;
 
     case XO_STYLE_JSON:
 	pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
 
 	xo_depth_change(xop, name, -1, -1, XSS_CLOSE_INSTANCE, 0);
 	rc = xo_printf(xop, "%s%*s}", pre_nl, xo_indent(xop), "");
 	xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
 	break;
 
     case XO_STYLE_HTML:
     case XO_STYLE_TEXT:
 	xo_depth_change(xop, name, -1, 0, XSS_CLOSE_INSTANCE, 0);
 	break;
 
     case XO_STYLE_SDPARAMS:
 	break;
 
     case XO_STYLE_ENCODER:
 	xo_depth_change(xop, name, -1, 0, XSS_CLOSE_INSTANCE, 0);
 	rc = xo_encoder_handle(xop, XO_OP_CLOSE_INSTANCE, name, NULL, 0);
 	break;
     }
 
     return rc;
 }
 
 xo_ssize_t
 xo_close_instance_h (xo_handle_t *xop, const char *name)
 {
     return xo_transition(xop, 0, name, XSS_CLOSE_INSTANCE);
 }
 
 xo_ssize_t
 xo_close_instance (const char *name)
 {
     return xo_close_instance_h(NULL, name);
 }
 
 xo_ssize_t
 xo_close_instance_hd (xo_handle_t *xop)
 {
     return xo_close_instance_h(xop, NULL);
 }
 
 xo_ssize_t
 xo_close_instance_d (void)
 {
     return xo_close_instance_h(NULL, NULL);
 }
 
 static int
 xo_do_close_all (xo_handle_t *xop, xo_stack_t *limit)
 {
     xo_stack_t *xsp;
     ssize_t rc = 0;
     xo_xsf_flags_t flags;
 
     for (xsp = &xop->xo_stack[xop->xo_depth]; xsp >= limit; xsp--) {
 	switch (xsp->xs_state) {
 	case XSS_INIT:
 	    /* Nothing */
 	    rc = 0;
 	    break;
 
 	case XSS_OPEN_CONTAINER:
 	    rc = xo_do_close_container(xop, NULL);
 	    break;
 
 	case XSS_OPEN_LIST:
 	    rc = xo_do_close_list(xop, NULL);
 	    break;
 
 	case XSS_OPEN_INSTANCE:
 	    rc = xo_do_close_instance(xop, NULL);
 	    break;
 
 	case XSS_OPEN_LEAF_LIST:
 	    rc = xo_do_close_leaf_list(xop, NULL);
 	    break;
 
 	case XSS_MARKER:
 	    flags = xsp->xs_flags & XSF_MARKER_FLAGS;
 	    xo_depth_change(xop, xsp->xs_name, -1, 0, XSS_MARKER, 0);
 	    xop->xo_stack[xop->xo_depth].xs_flags |= flags;
 	    rc = 0;
 	    break;
 	}
 
 	if (rc < 0)
 	    xo_failure(xop, "close %d failed: %d", xsp->xs_state, rc);
     }
 
     return 0;
 }
 
 /*
  * This function is responsible for clearing out whatever is needed
  * to get to the desired state, if possible.
  */
 static int
 xo_do_close (xo_handle_t *xop, const char *name, xo_state_t new_state)
 {
     xo_stack_t *xsp, *limit = NULL;
     ssize_t rc;
     xo_state_t need_state = new_state;
 
     if (new_state == XSS_CLOSE_CONTAINER)
 	need_state = XSS_OPEN_CONTAINER;
     else if (new_state == XSS_CLOSE_LIST)
 	need_state = XSS_OPEN_LIST;
     else if (new_state == XSS_CLOSE_INSTANCE)
 	need_state = XSS_OPEN_INSTANCE;
     else if (new_state == XSS_CLOSE_LEAF_LIST)
 	need_state = XSS_OPEN_LEAF_LIST;
     else if (new_state == XSS_MARKER)
 	need_state = XSS_MARKER;
     else
 	return 0; /* Unknown or useless new states are ignored */
 
     for (xsp = &xop->xo_stack[xop->xo_depth]; xsp > xop->xo_stack; xsp--) {
 	/*
 	 * Marker's normally stop us from going any further, unless
 	 * we are popping a marker (new_state == XSS_MARKER).
 	 */
 	if (xsp->xs_state == XSS_MARKER && need_state != XSS_MARKER) {
 	    if (name) {
 		xo_failure(xop, "close (xo_%s) fails at marker '%s'; "
 			   "not found '%s'",
 			   xo_state_name(new_state),
 			   xsp->xs_name, name);
 		return 0;
 
 	    } else {
 		limit = xsp;
 		xo_failure(xop, "close stops at marker '%s'", xsp->xs_name);
 	    }
 	    break;
 	}
 	
 	if (xsp->xs_state != need_state)
 	    continue;
 
 	if (name && xsp->xs_name && !xo_streq(name, xsp->xs_name))
 	    continue;
 
 	limit = xsp;
 	break;
     }
 
     if (limit == NULL) {
 	xo_failure(xop, "xo_%s can't find match for '%s'",
 		   xo_state_name(new_state), name);
 	return 0;
     }
 
     rc = xo_do_close_all(xop, limit);
 
     return rc;
 }
 
 /*
  * We are in a given state and need to transition to the new state.
  */
 static ssize_t
 xo_transition (xo_handle_t *xop, xo_xof_flags_t flags, const char *name,
 	       xo_state_t new_state)
 {
     xo_stack_t *xsp;
     ssize_t rc = 0;
     int old_state, on_marker;
 
     xop = xo_default(xop);
 
     xsp = &xop->xo_stack[xop->xo_depth];
     old_state = xsp->xs_state;
     on_marker = (old_state == XSS_MARKER);
 
     /* If there's a marker on top of the stack, we need to find a real state */
     while (old_state == XSS_MARKER) {
 	if (xsp == xop->xo_stack)
 	    break;
 	xsp -= 1;
 	old_state = xsp->xs_state;
     }
 
     /*
      * At this point, the list of possible states are:
      *   XSS_INIT, XSS_OPEN_CONTAINER, XSS_OPEN_LIST,
      *   XSS_OPEN_INSTANCE, XSS_OPEN_LEAF_LIST, XSS_DISCARDING
      */
     switch (XSS_TRANSITION(old_state, new_state)) {
 
     open_container:
     case XSS_TRANSITION(XSS_INIT, XSS_OPEN_CONTAINER):
     case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_OPEN_CONTAINER):
     case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_OPEN_CONTAINER):
        rc = xo_do_open_container(xop, flags, name);
        break;
 
     case XSS_TRANSITION(XSS_OPEN_LIST, XSS_OPEN_CONTAINER):
     case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_OPEN_CONTAINER):
 	if (on_marker)
 	    goto marker_prevents_close;
 	rc = xo_do_close_leaf_list(xop, NULL);
 	if (rc >= 0)
 	    goto open_container;
 	break;
 
     case XSS_TRANSITION(XSS_INIT, XSS_CLOSE_CONTAINER):
 	/* This is an exception for "xo --close" */
 	rc = xo_do_close_container(xop, name);
 	break;
 
     /*close_container:*/
     case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_CLOSE_CONTAINER):
     case XSS_TRANSITION(XSS_OPEN_LIST, XSS_CLOSE_CONTAINER):
     case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_CLOSE_CONTAINER):
 	if (on_marker)
 	    goto marker_prevents_close;
 	rc = xo_do_close(xop, name, new_state);
 	break;
 
     case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_CLOSE_CONTAINER):
 	if (on_marker)
 	    goto marker_prevents_close;
 	rc = xo_do_close_leaf_list(xop, NULL);
 	if (rc >= 0)
 	    rc = xo_do_close(xop, name, new_state);
 	break;
 
     open_list:
     case XSS_TRANSITION(XSS_INIT, XSS_OPEN_LIST):
     case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_OPEN_LIST):
     case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_OPEN_LIST):
 	rc = xo_do_open_list(xop, flags, name);
 	break;
 
     case XSS_TRANSITION(XSS_OPEN_LIST, XSS_OPEN_LIST):
 	if (on_marker)
 	    goto marker_prevents_close;
 	rc = xo_do_close_list(xop, NULL);
 	if (rc >= 0)
 	    goto open_list;
 	break;
 
     case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_OPEN_LIST):
 	if (on_marker)
 	    goto marker_prevents_close;
 	rc = xo_do_close_leaf_list(xop, NULL);
 	if (rc >= 0)
 	    goto open_list;
 	break;
 
     /*close_list:*/
     case XSS_TRANSITION(XSS_OPEN_LIST, XSS_CLOSE_LIST):
 	if (on_marker)
 	    goto marker_prevents_close;
 	rc = xo_do_close(xop, name, new_state);
 	break;
 
     case XSS_TRANSITION(XSS_INIT, XSS_CLOSE_LIST):
     case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_CLOSE_LIST):
     case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_CLOSE_LIST):
     case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_CLOSE_LIST):
 	rc = xo_do_close(xop, name, new_state);
 	break;
 
     open_instance:
     case XSS_TRANSITION(XSS_OPEN_LIST, XSS_OPEN_INSTANCE):
 	rc = xo_do_open_instance(xop, flags, name);
 	break;
 
     case XSS_TRANSITION(XSS_INIT, XSS_OPEN_INSTANCE):
     case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_OPEN_INSTANCE):
 	rc = xo_do_open_list(xop, flags, name);
 	if (rc >= 0)
 	    goto open_instance;
 	break;
 
     case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_OPEN_INSTANCE):
 	if (on_marker) {
 	    rc = xo_do_open_list(xop, flags, name);
 	} else {
 	    rc = xo_do_close_instance(xop, NULL);
 	}
 	if (rc >= 0)
 	    goto open_instance;
 	break;
 
     case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_OPEN_INSTANCE):
 	if (on_marker)
 	    goto marker_prevents_close;
 	rc = xo_do_close_leaf_list(xop, NULL);
 	if (rc >= 0)
 	    goto open_instance;
 	break;
 
     /*close_instance:*/
     case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_CLOSE_INSTANCE):
 	if (on_marker)
 	    goto marker_prevents_close;
 	rc = xo_do_close_instance(xop, name);
 	break;
 
     case XSS_TRANSITION(XSS_INIT, XSS_CLOSE_INSTANCE):
 	/* This one makes no sense; ignore it */
 	xo_failure(xop, "xo_close_instance ignored when called from "
 		   "initial state ('%s')", name ?: "(unknown)");
 	break;
 
     case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_CLOSE_INSTANCE):
     case XSS_TRANSITION(XSS_OPEN_LIST, XSS_CLOSE_INSTANCE):
 	if (on_marker)
 	    goto marker_prevents_close;
 	rc = xo_do_close(xop, name, new_state);
 	break;
 
     case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_CLOSE_INSTANCE):
 	if (on_marker)
 	    goto marker_prevents_close;
 	rc = xo_do_close_leaf_list(xop, NULL);
 	if (rc >= 0)
 	    rc = xo_do_close(xop, name, new_state);
 	break;
 
     open_leaf_list:
     case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_OPEN_LEAF_LIST):
     case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_OPEN_LEAF_LIST):
     case XSS_TRANSITION(XSS_INIT, XSS_OPEN_LEAF_LIST):
 	rc = xo_do_open_leaf_list(xop, flags, name);
 	break;
 
     case XSS_TRANSITION(XSS_OPEN_LIST, XSS_OPEN_LEAF_LIST):
     case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_OPEN_LEAF_LIST):
 	if (on_marker)
 	    goto marker_prevents_close;
 	rc = xo_do_close_list(xop, NULL);
 	if (rc >= 0)
 	    goto open_leaf_list;
 	break;
 
     /*close_leaf_list:*/
     case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_CLOSE_LEAF_LIST):
 	if (on_marker)
 	    goto marker_prevents_close;
 	rc = xo_do_close_leaf_list(xop, name);
 	break;
 
     case XSS_TRANSITION(XSS_INIT, XSS_CLOSE_LEAF_LIST):
 	/* Makes no sense; ignore */
 	xo_failure(xop, "xo_close_leaf_list ignored when called from "
 		   "initial state ('%s')", name ?: "(unknown)");
 	break;
 
     case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_CLOSE_LEAF_LIST):
     case XSS_TRANSITION(XSS_OPEN_LIST, XSS_CLOSE_LEAF_LIST):
     case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_CLOSE_LEAF_LIST):
 	if (on_marker)
 	    goto marker_prevents_close;
 	rc = xo_do_close(xop, name, new_state);
 	break;
 
     /*emit:*/
     case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_EMIT):
     case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_EMIT):
 	break;
 
     case XSS_TRANSITION(XSS_OPEN_LIST, XSS_EMIT):
 	if (on_marker)
 	    goto marker_prevents_close;
 	rc = xo_do_close(xop, NULL, XSS_CLOSE_LIST);
 	break;
 
     case XSS_TRANSITION(XSS_INIT, XSS_EMIT):
 	break;
 
     case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_EMIT):
 	if (on_marker)
 	    goto marker_prevents_close;
 	rc = xo_do_close_leaf_list(xop, NULL);
 	break;
 
     /*emit_leaf_list:*/
     case XSS_TRANSITION(XSS_INIT, XSS_EMIT_LEAF_LIST):
     case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_EMIT_LEAF_LIST):
     case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_EMIT_LEAF_LIST):
 	rc = xo_do_open_leaf_list(xop, flags, name);
 	break;
 
     case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_EMIT_LEAF_LIST):
 	break;
 
     case XSS_TRANSITION(XSS_OPEN_LIST, XSS_EMIT_LEAF_LIST):
 	/*
 	 * We need to be backward compatible with the pre-xo_open_leaf_list
 	 * API, where both lists and leaf-lists were opened as lists.  So
 	 * if we find an open list that hasn't had anything written to it,
 	 * we'll accept it.
 	 */
 	break;
 
     default:
 	xo_failure(xop, "unknown transition: (%u -> %u)",
 		   xsp->xs_state, new_state);
     }
 
     /* Handle the flush flag */
     if (rc >= 0 && XOF_ISSET(xop, XOF_FLUSH))
 	if (xo_flush_h(xop) < 0)
 	    rc = -1;
 
     /* We have now official made output */
     XOIF_SET(xop, XOIF_MADE_OUTPUT);
 
     return rc;
 
  marker_prevents_close:
     xo_failure(xop, "marker '%s' prevents transition from %s to %s",
 	       xop->xo_stack[xop->xo_depth].xs_name,
 	       xo_state_name(old_state), xo_state_name(new_state));
     return -1;
 }
 
 xo_ssize_t
 xo_open_marker_h (xo_handle_t *xop, const char *name)
 {
     xop = xo_default(xop);
 
     xo_depth_change(xop, name, 1, 0, XSS_MARKER,
 		    xop->xo_stack[xop->xo_depth].xs_flags & XSF_MARKER_FLAGS);
 
     return 0;
 }
 
 xo_ssize_t
 xo_open_marker (const char *name)
 {
     return xo_open_marker_h(NULL, name);
 }
 
 xo_ssize_t
 xo_close_marker_h (xo_handle_t *xop, const char *name)
 {
     xop = xo_default(xop);
 
     return xo_do_close(xop, name, XSS_MARKER);
 }
 
 xo_ssize_t
 xo_close_marker (const char *name)
 {
     return xo_close_marker_h(NULL, name);
 }
 
 /*
  * Record custom output functions into the xo handle, allowing
  * integration with a variety of output frameworks.
  */
 void
 xo_set_writer (xo_handle_t *xop, void *opaque, xo_write_func_t write_func,
 	       xo_close_func_t close_func, xo_flush_func_t flush_func)
 {
     xop = xo_default(xop);
 
     xop->xo_opaque = opaque;
     xop->xo_write = write_func;
     xop->xo_close = close_func;
     xop->xo_flush = flush_func;
 }
 
 void
 xo_set_allocator (xo_realloc_func_t realloc_func, xo_free_func_t free_func)
 {
     xo_realloc = realloc_func;
     xo_free = free_func;
 }
 
 xo_ssize_t
 xo_flush_h (xo_handle_t *xop)
 {
     ssize_t rc;
 
     xop = xo_default(xop);
 
     switch (xo_style(xop)) {
     case XO_STYLE_ENCODER:
 	xo_encoder_handle(xop, XO_OP_FLUSH, NULL, NULL, 0);
     }
 
     rc = xo_write(xop);
     if (rc >= 0 && xop->xo_flush)
 	if (xop->xo_flush(xop->xo_opaque) < 0)
 	    return -1;
 
     return rc;
 }
 
 xo_ssize_t
 xo_flush (void)
 {
     return xo_flush_h(NULL);
 }
 
 xo_ssize_t
 xo_finish_h (xo_handle_t *xop)
 {
     const char *open_if_empty = "";
     xop = xo_default(xop);
 
     if (!XOF_ISSET(xop, XOF_NO_CLOSE))
 	xo_do_close_all(xop, xop->xo_stack);
 
     switch (xo_style(xop)) {
     case XO_STYLE_JSON:
 	if (!XOF_ISSET(xop, XOF_NO_TOP)) {
 	    const char *pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
 
 	    if (XOIF_ISSET(xop, XOIF_TOP_EMITTED))
 		XOIF_CLEAR(xop, XOIF_TOP_EMITTED); /* Turn off before output */
 	    else if (!XOIF_ISSET(xop, XOIF_MADE_OUTPUT)) {
 		open_if_empty = "{ ";
 		pre_nl = "";
 	    }
 
 	    xo_printf(xop, "%s%*s%s}\n",
 		      pre_nl, xo_indent(xop), "", open_if_empty);
 	}
 	break;
 
     case XO_STYLE_ENCODER:
 	xo_encoder_handle(xop, XO_OP_FINISH, NULL, NULL, 0);
 	break;
     }
 
     return xo_flush_h(xop);
 }
 
 xo_ssize_t
 xo_finish (void)
 {
     return xo_finish_h(NULL);
 }
 
 /*
  * xo_finish_atexit is suitable for atexit() calls, to force clear up
  * and finalizing output.
  */
 void
 xo_finish_atexit (void)
 {
     (void) xo_finish_h(NULL);
 }
 
 /*
  * Generate an error message, such as would be displayed on stderr
  */
 void
-xo_error_hv (xo_handle_t *xop, const char *fmt, va_list vap)
+xo_errorn_hv (xo_handle_t *xop, int need_newline, const char *fmt, va_list vap)
 {
     xop = xo_default(xop);
 
     /*
      * If the format string doesn't end with a newline, we pop
      * one on ourselves.
      */
-    ssize_t len = strlen(fmt);
-    if (len > 0 && fmt[len - 1] != '\n') {
-	char *newfmt = alloca(len + 2);
-	memcpy(newfmt, fmt, len);
-	newfmt[len] = '\n';
-	newfmt[len + 1] = '\0';
-	fmt = newfmt;
+    if (need_newline) {
+	ssize_t len = strlen(fmt);
+	if (len > 0 && fmt[len - 1] != '\n') {
+	    char *newfmt = alloca(len + 2);
+	    memcpy(newfmt, fmt, len);
+	    newfmt[len] = '\n';
+	    newfmt[len + 1] = '\0';
+	    fmt = newfmt;
+	}
     }
 
     switch (xo_style(xop)) {
     case XO_STYLE_TEXT:
 	vfprintf(stderr, fmt, vap);
 	break;
 
     case XO_STYLE_HTML:
 	va_copy(xop->xo_vap, vap);
 	
 	xo_buf_append_div(xop, "error", 0, NULL, 0, NULL, 0,
 			  fmt, strlen(fmt), NULL, 0);
 
 	if (XOIF_ISSET(xop, XOIF_DIV_OPEN))
 	    xo_line_close(xop);
 
 	xo_write(xop);
 
 	va_end(xop->xo_vap);
 	bzero(&xop->xo_vap, sizeof(xop->xo_vap));
 	break;
 
     case XO_STYLE_XML:
     case XO_STYLE_JSON:
 	va_copy(xop->xo_vap, vap);
 
 	xo_open_container_h(xop, "error");
 	xo_format_value(xop, "message", 7, NULL, 0,
 			fmt, strlen(fmt), NULL, 0, 0);
 	xo_close_container_h(xop, "error");
 
 	va_end(xop->xo_vap);
 	bzero(&xop->xo_vap, sizeof(xop->xo_vap));
 	break;
 
     case XO_STYLE_SDPARAMS:
     case XO_STYLE_ENCODER:
 	break;
     }
 }
 
 void
 xo_error_h (xo_handle_t *xop, const char *fmt, ...)
 {
     va_list vap;
 
     va_start(vap, fmt);
-    xo_error_hv(xop, fmt, vap);
+    xo_errorn_hv(xop, 0, fmt, vap);
     va_end(vap);
 }
 
 /*
  * Generate an error message, such as would be displayed on stderr
  */
 void
 xo_error (const char *fmt, ...)
 {
     va_list vap;
 
     va_start(vap, fmt);
-    xo_error_hv(NULL, fmt, vap);
+    xo_errorn_hv(NULL, 0, fmt, vap);
     va_end(vap);
 }
 
+void
+xo_errorn_h (xo_handle_t *xop, const char *fmt, ...)
+{
+    va_list vap;
+
+    va_start(vap, fmt);
+    xo_errorn_hv(xop, 1, fmt, vap);
+    va_end(vap);
+}
+
 /*
+ * Generate an error message, such as would be displayed on stderr
+ */
+void
+xo_errorn (const char *fmt, ...)
+{
+    va_list vap;
+
+    va_start(vap, fmt);
+    xo_errorn_hv(NULL, 1, fmt, vap);
+    va_end(vap);
+}
+
+/*
  * Parse any libxo-specific options from the command line, removing them
  * so the main() argument parsing won't see them.  We return the new value
  * for argc or -1 for error.  If an error occurred, the program should
  * exit.  A suitable error message has already been displayed.
  */
 int
 xo_parse_args (int argc, char **argv)
 {
     static char libxo_opt[] = "--libxo";
     char *cp;
     int i, save;
 
-    /* Save our program name for xo_err and friends */
-    xo_program = argv[0];
-    cp = strrchr(xo_program, '/');
-    if (cp)
-	xo_program = ++cp;
-    else
-	cp = argv[0];		/* Reset to front of string */
+    /*
+     * If xo_set_program has always been called, we honor that value
+     */
+    if (xo_program == NULL) {
+	/* Save our program name for xo_err and friends */
+	xo_program = argv[0];
+	cp = strrchr(xo_program, '/');
+	if (cp)
+	    xo_program = ++cp;
+	else
+	    cp = argv[0];		/* Reset to front of string */
 
-    /* GNU tools add an annoying ".test" as the program extension; remove it */
-    size_t len = strlen(xo_program);
-    static const char gnu_ext[] = ".test";
-    if (len >= sizeof(gnu_ext)) {
-	cp += len + 1 - sizeof(gnu_ext);
-	if (xo_streq(cp, gnu_ext))
-	    *cp = '\0';
+	/*
+	 * GNU libtool add an annoying ".test" as the program
+	 * extension; we remove it.  libtool also adds a "lt-" prefix
+	 * that we cannot remove.
+	 */
+	size_t len = strlen(xo_program);
+	static const char gnu_ext[] = ".test";
+	if (len >= sizeof(gnu_ext)) {
+	    cp += len + 1 - sizeof(gnu_ext);
+	    if (xo_streq(cp, gnu_ext))
+		*cp = '\0';
+	}
     }
 
     xo_handle_t *xop = xo_default(NULL);
 
     for (save = i = 1; i < argc; i++) {
 	if (argv[i] == NULL
 	    || strncmp(argv[i], libxo_opt, sizeof(libxo_opt) - 1) != 0) {
 	    if (save != i)
 		argv[save] = argv[i];
 	    save += 1;
 	    continue;
 	}
 
 	cp = argv[i] + sizeof(libxo_opt) - 1;
 	if (*cp == '\0') {
 	    cp = argv[++i];
 	    if (cp == NULL) {
 		xo_warnx("missing libxo option");
 		return -1;
 	    }
 		
 	    if (xo_set_options(xop, cp) < 0)
 		return -1;
 	} else if (*cp == ':') {
 	    if (xo_set_options(xop, cp) < 0)
 		return -1;
 
 	} else if (*cp == '=') {
 	    if (xo_set_options(xop, ++cp) < 0)
 		return -1;
 
 	} else if (*cp == '-') {
 	    cp += 1;
 	    if (xo_streq(cp, "check")) {
 		exit(XO_HAS_LIBXO);
 
 	    } else {
 		xo_warnx("unknown libxo option: '%s'", argv[i]);
 		return -1;
 	    }
 	} else {
 		xo_warnx("unknown libxo option: '%s'", argv[i]);
 	    return -1;
 	}
     }
 
     /*
      * We only want to do color output on terminals, but we only want
      * to do this if the user has asked for color.
      */
     if (XOF_ISSET(xop, XOF_COLOR_ALLOWED) && isatty(1))
 	XOF_SET(xop, XOF_COLOR);
 
     argv[save] = NULL;
     return save;
 }
 
 /*
  * Debugging function that dumps the current stack of open libxo constructs,
  * suitable for calling from the debugger.
  */
 void
 xo_dump_stack (xo_handle_t *xop)
 {
     int i;
     xo_stack_t *xsp;
 
     xop = xo_default(xop);
 
     fprintf(stderr, "Stack dump:\n");
 
     xsp = xop->xo_stack;
     for (i = 1, xsp++; i <= xop->xo_depth; i++, xsp++) {
 	fprintf(stderr, "   [%d] %s '%s' [%x]\n",
 		i, xo_state_name(xsp->xs_state),
 		xsp->xs_name ?: "--", xsp->xs_flags);
     }
 }
 
 /*
  * Record the program name used for error messages
  */
 void
 xo_set_program (const char *name)
 {
     xo_program = name;
 }
 
 void
 xo_set_version_h (xo_handle_t *xop, const char *version)
 {
     xop = xo_default(xop);
 
     if (version == NULL || strchr(version, '"') != NULL)
 	return;
 
     if (!xo_style_is_encoding(xop))
 	return;
 
     switch (xo_style(xop)) {
     case XO_STYLE_XML:
 	/* For XML, we record this as an attribute for the first tag */
 	xo_attr_h(xop, "version", "%s", version);
 	break;
 
     case XO_STYLE_JSON:
 	/*
 	 * For JSON, we record the version string in our handle, and emit
 	 * it in xo_emit_top.
 	 */
 	xop->xo_version = xo_strndup(version, -1);
 	break;
 
     case XO_STYLE_ENCODER:
 	xo_encoder_handle(xop, XO_OP_VERSION, NULL, version, 0);
 	break;
     }
 }
 
 /*
  * Set the version number for the API content being carried through
  * the xo handle.
  */
 void
 xo_set_version (const char *version)
 {
     xo_set_version_h(NULL, version);
 }
 
 /*
  * Generate a warning.  Normally, this is a text message written to
  * standard error.  If the XOF_WARN_XML flag is set, then we generate
  * XMLified content on standard output.
  */
 void
 xo_emit_warn_hcv (xo_handle_t *xop, int as_warning, int code,
 		  const char *fmt, va_list vap)
 {
     xop = xo_default(xop);
 
     if (fmt == NULL)
 	return;
 
     xo_open_marker_h(xop, "xo_emit_warn_hcv");
     xo_open_container_h(xop, as_warning ? "__warning" : "__error");
 
     if (xo_program)
 	xo_emit("{wc:program}", xo_program);
 
     if (xo_style(xop) == XO_STYLE_XML || xo_style(xop) == XO_STYLE_JSON) {
 	va_list ap;
 	xo_handle_t temp;
 
 	bzero(&temp, sizeof(temp));
 	temp.xo_style = XO_STYLE_TEXT;
 	xo_buf_init(&temp.xo_data);
 	xo_depth_check(&temp, XO_DEPTH);
 
 	va_copy(ap, vap);
 	(void) xo_emit_hv(&temp, fmt, ap);
 	va_end(ap);
 
 	xo_buffer_t *src = &temp.xo_data;
 	xo_format_value(xop, "message", 7, src->xb_bufp,
 			src->xb_curp - src->xb_bufp, NULL, 0, NULL, 0, 0);
 
 	xo_free(temp.xo_stack);
 	xo_buf_cleanup(src);
     }
 
     (void) xo_emit_hv(xop, fmt, vap);
 
     ssize_t len = strlen(fmt);
     if (len > 0 && fmt[len - 1] != '\n') {
 	if (code > 0) {
 	    const char *msg = strerror(code);
 	    if (msg)
 		xo_emit_h(xop, ": {G:strerror}{g:error/%s}", msg);
 	}
 	xo_emit("\n");
     }
 
     xo_close_marker_h(xop, "xo_emit_warn_hcv");
     xo_flush_h(xop);
 }
 
 void
 xo_emit_warn_hc (xo_handle_t *xop, int code, const char *fmt, ...)
 {
     va_list vap;
 
     va_start(vap, fmt);
     xo_emit_warn_hcv(xop, 1, code, fmt, vap);
     va_end(vap);
 }
 
 void
 xo_emit_warn_c (int code, const char *fmt, ...)
 {
     va_list vap;
 
     va_start(vap, fmt);
     xo_emit_warn_hcv(NULL, 1, code, fmt, vap);
     va_end(vap);
 }
 
 void
 xo_emit_warn (const char *fmt, ...)
 {
     int code = errno;
     va_list vap;
 
     va_start(vap, fmt);
     xo_emit_warn_hcv(NULL, 1, code, fmt, vap);
     va_end(vap);
 }
 
 void
 xo_emit_warnx (const char *fmt, ...)
 {
     va_list vap;
 
     va_start(vap, fmt);
     xo_emit_warn_hcv(NULL, 1, -1, fmt, vap);
     va_end(vap);
 }
 
 void
 xo_emit_err_v (int eval, int code, const char *fmt, va_list vap)
 {
     xo_emit_warn_hcv(NULL, 0, code, fmt, vap);
     xo_finish();
     exit(eval);
 }
 
 void
 xo_emit_err (int eval, const char *fmt, ...)
 {
     int code = errno;
     va_list vap;
     va_start(vap, fmt);
     xo_emit_err_v(eval, code, fmt, vap);
     /*NOTREACHED*/
 }
 
 void
 xo_emit_errx (int eval, const char *fmt, ...)
 {
     va_list vap;
 
     va_start(vap, fmt);
     xo_emit_err_v(eval, -1, fmt, vap); /* This will exit */
     /*NOTREACHED*/
 }
 
 void
 xo_emit_errc (int eval, int code, const char *fmt, ...)
 {
     va_list vap;
 
     va_start(vap, fmt);
     xo_emit_err_v(eval, code, fmt, vap); /* This will exit */
     /*NOTREACHED*/
 }
 
 /*
  * Get the opaque private pointer for an xo handle
  */
 void *
 xo_get_private (xo_handle_t *xop)
 {
     xop = xo_default(xop);
     return xop->xo_private;
 }
 
 /*
  * Set the opaque private pointer for an xo handle.
  */
 void
 xo_set_private (xo_handle_t *xop, void *opaque)
 {
     xop = xo_default(xop);
     xop->xo_private = opaque;
 }
 
 /*
  * Get the encoder function
  */
 xo_encoder_func_t
 xo_get_encoder (xo_handle_t *xop)
 {
     xop = xo_default(xop);
     return xop->xo_encoder;
 }
 
 /*
  * Record an encoder callback function in an xo handle.
  */
 void
 xo_set_encoder (xo_handle_t *xop, xo_encoder_func_t encoder)
 {
     xop = xo_default(xop);
 
     xop->xo_style = XO_STYLE_ENCODER;
     xop->xo_encoder = encoder;
 }
 
 /*
  * The xo(1) utility needs to be able to open and close lists and
  * instances, but since it's called without "state", we cannot
  * rely on the state transitions (in xo_transition) to DTRT, so
  * we have a mechanism for external parties to "force" transitions
  * that would otherwise be impossible.  This is not a general
  * mechanism, and is really tailored only for xo(1).
  */
 void
 xo_explicit_transition (xo_handle_t *xop, xo_state_t new_state,
 			const char *name, xo_xof_flags_t flags)
 {
     xo_xsf_flags_t xsf_flags;
 
     xop = xo_default(xop);
 
     switch (new_state) {
 
     case XSS_OPEN_LIST:
 	xo_do_open_list(xop, flags, name);
 	break;
 
     case XSS_OPEN_INSTANCE:
 	xo_do_open_instance(xop, flags, name);
 	break;
 
     case XSS_CLOSE_INSTANCE:
 	xo_depth_change(xop, name, 1, 1, XSS_OPEN_INSTANCE,
 			xo_stack_flags(flags));
 	xo_stack_set_flags(xop);
 	xo_do_close_instance(xop, name);
 	break;
 
     case XSS_CLOSE_LIST:
 	xsf_flags = XOF_ISSET(xop, XOF_NOT_FIRST) ? XSF_NOT_FIRST : 0;
 
 	xo_depth_change(xop, name, 1, 1, XSS_OPEN_LIST,
 			XSF_LIST | xsf_flags | xo_stack_flags(flags));
 	xo_do_close_list(xop, name);
 	break;
     }
 }
Index: projects/clang1000-import/contrib/libxo/libxo/xo.h
===================================================================
--- projects/clang1000-import/contrib/libxo/libxo/xo.h	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/libxo/xo.h	(revision 357179)
@@ -1,693 +1,702 @@
 /*
  * Copyright (c) 2014-2018, Juniper Networks, Inc.
  * All rights reserved.
  * This SOFTWARE is licensed under the LICENSE provided in the
  * ../Copyright file. By downloading, installing, copying, or otherwise
  * using the SOFTWARE, you agree to be bound by the terms of that
  * LICENSE.
  * Phil Shafer, July 2014
  */
 
 /**
  * libxo provides a means of generating text, XML, JSON, and HTML output
  * using a single set of function calls, maximizing the value of output
  * while minimizing the cost/impact on the code.
  *
  * Full documentation is available in ./doc/libxo.txt or online at:
  *   http://juniper.github.io/libxo/libxo-manual.html
  */
 
 #ifndef INCLUDE_XO_H
 #define INCLUDE_XO_H
 
 #include <stdio.h>
 #include <sys/types.h>
 #include <stdarg.h>
 #include <limits.h>
 #include <stdlib.h>
 #include <errno.h>
 
 #ifdef __dead2
 #define NORETURN __dead2
 #else
 #define NORETURN
 #endif /* __dead2 */
 
 /*
  * Normally we'd use the HAVE_PRINTFLIKE define triggered by the
  * --enable-printflike option to configure, but we don't install
  * our internal "xoconfig.h", and I'd rather not.  Taking the
  * coward's path, we'll turn it on inside a #if that allows
  * others to turn it off where needed.  Not ideal, but functional.
  */
 #if !defined(NO_PRINTFLIKE)
 #if defined(__linux) && !defined(__printflike)
 #define __printflike(_x, _y) __attribute__((__format__ (__printf__, _x, _y)))
 #endif
 #define PRINTFLIKE(_x, _y) __printflike(_x, _y)
 #else
 #define PRINTFLIKE(_x, _y)
 #endif /* NO_PRINTFLIKE */
 
 /** Formatting types */
 typedef unsigned short xo_style_t;
 #define XO_STYLE_TEXT	0	/** Generate text output */
 #define XO_STYLE_XML	1	/** Generate XML output */
 #define XO_STYLE_JSON	2	/** Generate JSON output */
 #define XO_STYLE_HTML	3	/** Generate HTML output */
 #define XO_STYLE_SDPARAMS 4	/* Generate syslog structured data params */
 #define XO_STYLE_ENCODER 5	/* Generate calls to external encoder */
 
 /** Flags for libxo */
 typedef unsigned long long xo_xof_flags_t;
 #define XOF_BIT(_n) ((xo_xof_flags_t) 1 << (_n))
 #define XOF_CLOSE_FP	XOF_BIT(0) /** Close file pointer on xo_close() */
 #define XOF_PRETTY	XOF_BIT(1) /** Make 'pretty printed' output */
 #define XOF_LOG_SYSLOG	XOF_BIT(2) /** Log (on stderr) our syslog content */
 #define XOF_RESV3	XOF_BIT(3) /* Unused */
 
 #define XOF_WARN	XOF_BIT(4) /** Generate warnings for broken calls */
 #define XOF_XPATH	XOF_BIT(5) /** Emit XPath attributes in HTML  */
 #define XOF_INFO	XOF_BIT(6) /** Emit additional info fields (HTML) */
 #define XOF_WARN_XML	XOF_BIT(7) /** Emit warnings in XML (on stdout) */
 
 #define XOF_NO_ENV	XOF_BIT(8) /** Don't look at LIBXO_OPTIONS env var */
 #define XOF_NO_VA_ARG	XOF_BIT(9) /** Don't advance va_list w/ va_arg() */
 #define XOF_DTRT	XOF_BIT(10) /** Enable "do the right thing" mode */
 #define XOF_KEYS	XOF_BIT(11) /** Flag 'key' fields for xml and json */
 
 #define XOF_IGNORE_CLOSE XOF_BIT(12) /** Ignore errors on close tags */
 #define XOF_NOT_FIRST	XOF_BIT(13) /* Not the first item (JSON)  */
 #define XOF_NO_LOCALE	XOF_BIT(14) /** Don't bother with locale */
 #define XOF_RESV15	XOF_BIT(15) /* Unused */
 
 #define XOF_NO_TOP	XOF_BIT(16) /** Don't emit the top braces in JSON */
 #define XOF_RESV17	XOF_BIT(17) /* Unused  */
 #define XOF_UNITS	XOF_BIT(18) /** Encode units in XML */
 #define XOF_RESV19	XOF_BIT(19) /* Unused */
 
 #define XOF_UNDERSCORES	XOF_BIT(20) /** Replace dashes with underscores (JSON)*/
 #define XOF_COLUMNS	XOF_BIT(21) /** xo_emit should return a column count */
 #define XOF_FLUSH	XOF_BIT(22) /** Flush after each xo_emit call */
 #define XOF_FLUSH_LINE	XOF_BIT(23) /** Flush after each newline */
 
 #define XOF_NO_CLOSE	XOF_BIT(24) /** xo_finish won't close open elements */
 #define XOF_COLOR_ALLOWED XOF_BIT(25) /** Allow color/effects to be enabled */
 #define XOF_COLOR	XOF_BIT(26) /** Enable color and effects */
 #define XOF_NO_HUMANIZE	XOF_BIT(27) /** Block the {h:} modifier */
 
 #define XOF_LOG_GETTEXT	XOF_BIT(28) /** Log (stderr) gettext lookup strings */
 #define XOF_UTF8	XOF_BIT(29) /** Force text output to be UTF8 */
 #define XOF_RETAIN_ALL	XOF_BIT(30) /** Force use of XOEF_RETAIN */
 #define XOF_RETAIN_NONE	XOF_BIT(31) /** Prevent use of XOEF_RETAIN */
 
 #define XOF_COLOR_MAP	XOF_BIT(32) /** Color map has been initialized */
 #define XOF_CONTINUATION XOF_BIT(33) /** Continuation of previous line */
 
 typedef unsigned xo_emit_flags_t; /* Flags to xo_emit() and friends */
 #define XOEF_RETAIN	(1<<0)	  /* Retain parsed formatting information */
 
 /*
  * The xo_info_t structure provides a mapping between names and
  * additional data emitted via HTML.
  */
 typedef struct xo_info_s {
     const char *xi_name;	/* Name of the element */
     const char *xi_type;	/* Type of field */
     const char *xi_help;	/* Description of field */
 } xo_info_t;
 
 #define XO_INFO_NULL NULL, NULL, NULL /* Use '{ XO_INFO_NULL }' to end lists */
 
 struct xo_handle_s;		/* Opaque structure forward */
 typedef struct xo_handle_s xo_handle_t; /* Handle for XO output */
 
 /*
  * Early versions of the API used "int" instead of "size_t" for buffer
  * sizes.  We want to fix this but allow for backwards compatibility
  * where needed.
  */
 #ifdef XO_USE_INT_RETURN_CODES
 typedef int xo_ssize_t;		/* Buffer size */
 #else /* XO_USE_INT_RETURN_CODES */
 typedef ssize_t xo_ssize_t;	/* Buffer size */
 #endif /* XO_USE_INT_RETURN_CODES */
 
 typedef xo_ssize_t (*xo_write_func_t)(void *, const char *);
 typedef void (*xo_close_func_t)(void *);
 typedef int (*xo_flush_func_t)(void *);
 typedef void *(*xo_realloc_func_t)(void *, size_t);
 typedef void (*xo_free_func_t)(void *);
 
 /*
  * The formatter function mirrors "vsnprintf", with an additional argument
  * of the xo handle.  The caller should return the number of bytes _needed_
  * to fit the data, even if this exceeds 'len'.
  */
 typedef xo_ssize_t (*xo_formatter_t)(xo_handle_t *, char *, xo_ssize_t,
 				const char *, va_list);
 typedef void (*xo_checkpointer_t)(xo_handle_t *, va_list, int);
 
 xo_handle_t *
 xo_create (xo_style_t style, xo_xof_flags_t flags);
 
 xo_handle_t *
 xo_create_to_file (FILE *fp, xo_style_t style, xo_xof_flags_t flags);
 
 void
 xo_destroy (xo_handle_t *xop);
 
 void
 xo_set_writer (xo_handle_t *xop, void *opaque, xo_write_func_t write_func,
 	       xo_close_func_t close_func, xo_flush_func_t flush_func);
 
 void
 xo_set_allocator (xo_realloc_func_t realloc_func, xo_free_func_t free_func);
 
 void
 xo_set_style (xo_handle_t *xop, xo_style_t style);
 
 xo_style_t
 xo_get_style (xo_handle_t *xop);
 
 int
 xo_set_style_name (xo_handle_t *xop, const char *style);
 
 int
 xo_set_options (xo_handle_t *xop, const char *input);
 
 xo_xof_flags_t
 xo_get_flags (xo_handle_t *xop);
 
 void
 xo_set_flags (xo_handle_t *xop, xo_xof_flags_t flags);
 
 void
 xo_clear_flags (xo_handle_t *xop, xo_xof_flags_t flags);
 
 int
 xo_set_file_h (xo_handle_t *xop, FILE *fp);
 
 int
 xo_set_file (FILE *fp);
 
 void
 xo_set_info (xo_handle_t *xop, xo_info_t *infop, int count);
 
 void
 xo_set_formatter (xo_handle_t *xop, xo_formatter_t func, xo_checkpointer_t);
 
 void
 xo_set_depth (xo_handle_t *xop, int depth);
 
 xo_ssize_t
 xo_emit_hv (xo_handle_t *xop, const char *fmt, va_list vap);
 
 xo_ssize_t
 xo_emit_h (xo_handle_t *xop, const char *fmt, ...);
 
 xo_ssize_t
 xo_emit (const char *fmt, ...);
 
 xo_ssize_t
 xo_emit_hvf (xo_handle_t *xop, xo_emit_flags_t flags,
 	     const char *fmt, va_list vap);
 
 xo_ssize_t
 xo_emit_hf (xo_handle_t *xop, xo_emit_flags_t flags, const char *fmt, ...);
 
 xo_ssize_t
 xo_emit_f (xo_emit_flags_t flags, const char *fmt, ...);
 
 PRINTFLIKE(2, 0)
 static inline xo_ssize_t
 xo_emit_hvp (xo_handle_t *xop, const char *fmt, va_list vap)
 {
     return xo_emit_hv(xop, fmt, vap);
 }
 
 PRINTFLIKE(2, 3)
 static inline xo_ssize_t
 xo_emit_hp (xo_handle_t *xop, const char *fmt, ...)
 {
     va_list vap;
     va_start(vap, fmt);
     xo_ssize_t rc = xo_emit_hv(xop, fmt, vap);
     va_end(vap);
     return rc;
 }
 
 PRINTFLIKE(1, 2)
 static inline xo_ssize_t
 xo_emit_p (const char *fmt, ...)
 {
     va_list vap;
     va_start(vap, fmt);
     xo_ssize_t rc = xo_emit_hv(NULL, fmt, vap);
     va_end(vap);
     return rc;
 }
 
 PRINTFLIKE(3, 0)
 static inline xo_ssize_t
 xo_emit_hvfp (xo_handle_t *xop, xo_emit_flags_t flags,
 	      const char *fmt, va_list vap)
 {
     return xo_emit_hvf(xop, flags, fmt, vap);
 }
 
 PRINTFLIKE(3, 4)
 static inline xo_ssize_t
 xo_emit_hfp (xo_handle_t *xop, xo_emit_flags_t flags, const char *fmt, ...)
 {
     va_list vap;
     va_start(vap, fmt);
     xo_ssize_t rc = xo_emit_hvf(xop, flags, fmt, vap);
     va_end(vap);
     return rc;
 }
 
 PRINTFLIKE(2, 3)
 static inline xo_ssize_t
 xo_emit_fp (xo_emit_flags_t flags, const char *fmt, ...)
 {
     va_list vap;
     va_start(vap, fmt);
     xo_ssize_t rc = xo_emit_hvf(NULL, flags, fmt, vap);
     va_end(vap);
     return rc;
 }
 
 xo_ssize_t
 xo_open_container_hf (xo_handle_t *xop, xo_xof_flags_t flags, const char *name);
 
 xo_ssize_t
 xo_open_container_h (xo_handle_t *xop, const char *name);
 
 xo_ssize_t
 xo_open_container (const char *name);
 
 xo_ssize_t
 xo_open_container_hd (xo_handle_t *xop, const char *name);
 
 xo_ssize_t
 xo_open_container_d (const char *name);
 
 xo_ssize_t
 xo_close_container_h (xo_handle_t *xop, const char *name);
 
 xo_ssize_t
 xo_close_container (const char *name);
 
 xo_ssize_t
 xo_close_container_hd (xo_handle_t *xop);
 
 xo_ssize_t
 xo_close_container_d (void);
 
 xo_ssize_t
 xo_open_list_hf (xo_handle_t *xop, xo_xof_flags_t flags, const char *name);
 
 xo_ssize_t
 xo_open_list_h (xo_handle_t *xop, const char *name);
 
 xo_ssize_t
 xo_open_list (const char *name);
 
 xo_ssize_t
 xo_open_list_hd (xo_handle_t *xop, const char *name);
 
 xo_ssize_t
 xo_open_list_d (const char *name);
 
 xo_ssize_t
 xo_close_list_h (xo_handle_t *xop, const char *name);
 
 xo_ssize_t
 xo_close_list (const char *name);
 
 xo_ssize_t
 xo_close_list_hd (xo_handle_t *xop);
 
 xo_ssize_t
 xo_close_list_d (void);
 
 xo_ssize_t
 xo_open_instance_hf (xo_handle_t *xop, xo_xof_flags_t flags, const char *name);
 
 xo_ssize_t
 xo_open_instance_h (xo_handle_t *xop, const char *name);
 
 xo_ssize_t
 xo_open_instance (const char *name);
 
 xo_ssize_t
 xo_open_instance_hd (xo_handle_t *xop, const char *name);
 
 xo_ssize_t
 xo_open_instance_d (const char *name);
 
 xo_ssize_t
 xo_close_instance_h (xo_handle_t *xop, const char *name);
 
 xo_ssize_t
 xo_close_instance (const char *name);
 
 xo_ssize_t
 xo_close_instance_hd (xo_handle_t *xop);
 
 xo_ssize_t
 xo_close_instance_d (void);
 
 xo_ssize_t
 xo_open_marker_h (xo_handle_t *xop, const char *name);
 
 xo_ssize_t
 xo_open_marker (const char *name);
 
 xo_ssize_t
 xo_close_marker_h (xo_handle_t *xop, const char *name);
 
 xo_ssize_t
 xo_close_marker (const char *name);
 
 xo_ssize_t
 xo_attr_h (xo_handle_t *xop, const char *name, const char *fmt, ...);
 
 xo_ssize_t
 xo_attr_hv (xo_handle_t *xop, const char *name, const char *fmt, va_list vap);
 
 xo_ssize_t
 xo_attr (const char *name, const char *fmt, ...);
 
 void
 xo_error_hv (xo_handle_t *xop, const char *fmt, va_list vap);
 
 void
 xo_error_h (xo_handle_t *xop, const char *fmt, ...);
 
 void
 xo_error (const char *fmt, ...);
 
+void
+xo_errorn_hv (xo_handle_t *xop, int need_newline, const char *fmt, va_list vap);
+
+void
+xo_errorn_h (xo_handle_t *xop, const char *fmt, ...);
+
+void
+xo_errorn (const char *fmt, ...);
+
 xo_ssize_t
 xo_flush_h (xo_handle_t *xop);
 
 xo_ssize_t
 xo_flush (void);
 
 xo_ssize_t
 xo_finish_h (xo_handle_t *xop);
 
 xo_ssize_t
 xo_finish (void);
 
 void
 xo_finish_atexit (void);
 
 void
 xo_set_leading_xpath (xo_handle_t *xop, const char *path);
 
 void
 xo_warn_hc (xo_handle_t *xop, int code, const char *fmt, ...) PRINTFLIKE(3, 4);
 
 void
 xo_warn_c (int code, const char *fmt, ...) PRINTFLIKE(2, 3);
 
 void
 xo_warn (const char *fmt, ...) PRINTFLIKE(1, 2);
 
 void
 xo_warnx (const char *fmt, ...) PRINTFLIKE(1, 2);
 
 void
 xo_err (int eval, const char *fmt, ...) NORETURN PRINTFLIKE(2, 3);
 
 void
 xo_errx (int eval, const char *fmt, ...) NORETURN PRINTFLIKE(2, 3);
 
 void
 xo_errc (int eval, int code, const char *fmt, ...) NORETURN PRINTFLIKE(3, 4);
 
 void
 xo_message_hcv (xo_handle_t *xop, int code, const char *fmt, va_list vap) PRINTFLIKE(3, 0);
 
 void
 xo_message_hc (xo_handle_t *xop, int code, const char *fmt, ...) PRINTFLIKE(3, 4);
 
 void
 xo_message_c (int code, const char *fmt, ...) PRINTFLIKE(2, 3);
 
 void
 xo_message_e (const char *fmt, ...) PRINTFLIKE(1, 2);
 
 void
 xo_message (const char *fmt, ...) PRINTFLIKE(1, 2);
 
 void
 xo_emit_warn_hcv (xo_handle_t *xop, int as_warning, int code,
 		  const char *fmt, va_list vap);
 
 void
 xo_emit_warn_hc (xo_handle_t *xop, int code, const char *fmt, ...);
 
 void
 xo_emit_warn_c (int code, const char *fmt, ...);
 
 void
 xo_emit_warn (const char *fmt, ...);
 
 void
 xo_emit_warnx (const char *fmt, ...);
 
 void
 xo_emit_err (int eval, const char *fmt, ...) NORETURN;
 
 void
 xo_emit_errx (int eval, const char *fmt, ...) NORETURN;
 
 void
 xo_emit_errc (int eval, int code, const char *fmt, ...) NORETURN;
 
 PRINTFLIKE(4, 0)
 static inline void
 xo_emit_warn_hcvp (xo_handle_t *xop, int as_warning, int code,
 		  const char *fmt, va_list vap)
 {
     xo_emit_warn_hcv(xop, as_warning, code, fmt, vap);
 }
 
 PRINTFLIKE(3, 4)
 static inline void
 xo_emit_warn_hcp (xo_handle_t *xop, int code, const char *fmt, ...)
 {
     va_list vap;
     va_start(vap, fmt);
     xo_emit_warn_hcv(xop, 1, code, fmt, vap);
     va_end(vap);
 }
 
 PRINTFLIKE(2, 3)
 static inline void
 xo_emit_warn_cp (int code, const char *fmt, ...)
 {
     va_list vap;
     va_start(vap, fmt);
     xo_emit_warn_hcv(NULL, 1, code, fmt, vap);
     va_end(vap);
 }
 
 PRINTFLIKE(1, 2)
 static inline void
 xo_emit_warn_p (const char *fmt, ...)
 {
     int code = errno;
     va_list vap;
     va_start(vap, fmt);
     xo_emit_warn_hcv(NULL, 1, code, fmt, vap);
     va_end(vap);
 }
 
 PRINTFLIKE(1, 2)
 static inline void
 xo_emit_warnx_p (const char *fmt, ...)
 {
     va_list vap;
     va_start(vap, fmt);
     xo_emit_warn_hcv(NULL, 1, -1, fmt, vap);
     va_end(vap);
 }
 
 NORETURN PRINTFLIKE(2, 3)
 static inline void
 xo_emit_err_p (int eval, const char *fmt, ...)
 {
     int code = errno;
     va_list vap;
     va_start(vap, fmt);
     xo_emit_warn_hcv(NULL, 0, code, fmt, vap);
     va_end(vap);
 
     exit(eval);
 }
 
 PRINTFLIKE(2, 3)
 static inline void
 xo_emit_errx_p (int eval, const char *fmt, ...)
 {
     va_list vap;
     va_start(vap, fmt);
     xo_emit_warn_hcv(NULL, 0, -1, fmt, vap);
     va_end(vap);
     exit(eval);
 }
 
 PRINTFLIKE(3, 4)
 static inline void
 xo_emit_errc_p (int eval, int code, const char *fmt, ...)
 {
     va_list vap;
     va_start(vap, fmt);
     xo_emit_warn_hcv(NULL, 0, code, fmt, vap);
     va_end(vap);
     exit(eval);
 }
 
 void
 xo_emit_err_v (int eval, int code, const char *fmt, va_list vap) NORETURN PRINTFLIKE(3, 0);
 
 void
 xo_no_setlocale (void);
 
 /**
  * @brief Lift libxo-specific arguments from a set of arguments
  *
  * libxo-enable programs typically use command line options to enable
  * all the nifty-cool libxo features.  xo_parse_args() makes this simple
  * by pre-processing the command line arguments given to main(), handling
  * and removing the libxo-specific ones, meaning anything starting with
  * "--libxo".  A full description of these arguments is in the base
  * documentation.
  * @param[in] argc Number of arguments (ala #main())
  * @param[in] argc Array of argument strings (ala #main())
  * @return New number of arguments, or -1 for failure.
  */
 int
 xo_parse_args (int argc, char **argv);
 
 /**
  * This is the "magic" number returned by libxo-supporting commands
  * when passed the equally magic "--libxo-check" option.  If you
  * return this, we can (unsafely) assume that since you know the magic
  * handshake, you'll happily handle future --libxo options and not do
  * something violent like reboot the box or create another hole in the
  * ozone layer.
  */
 #define XO_HAS_LIBXO	121
 
 /**
  * externs for libxo's version number strings
  */
 extern const char xo_version[];	      /** Base version triple string */
 extern const char xo_version_extra[]; /** Extra version magic content */
 
 /**
  * @brief Dump the internal stack of a libxo handle.
  *
  * This diagnostic function is something I will ask you to call from
  * your program when you write to tell me libxo has gone bat-stink
  * crazy and has discarded your list or container or content.  Output
  * content will be what we lovingly call "developer entertainment".
  * @param[in] xop A valid libxo handle, or NULL for the default handle
  */
 void
 xo_dump_stack (xo_handle_t *xop);
 
 /**
  * @brief Recode the name of the program, suitable for error output.
  *
  * libxo will record the given name for use while generating error
  * messages.  The contents are not copied, so the value must continue
  * to point to a valid memory location.  This allows the caller to change
  * the value, but requires the caller to manage the memory.  Typically
  * this is called with argv[0] from main().
  * @param[in] name The name of the current application program
  */
 void
 xo_set_program (const char *name);
 
 /**
  * @brief Add a version string to the output, where possible.
  *
  * Adds a version number to the output, suitable for tracking
  * changes in the content.  This is only important for the "encoding"
  * format styles (XML and JSON) and allows a user of the data to
  * discern which version of the data model is in use.
  * @param[in] version The version number, encoded as a string
  */
 void
 xo_set_version (const char *version);
 
 /**
  * #xo_set_version with a handle.
  * @param[in] xop A valid libxo handle, or NULL for the default handle
  * @param[in] version The version number, encoded as a string
  */
 void
 xo_set_version_h (xo_handle_t *xop, const char *version);
 
 void
 xo_open_log (const char *ident, int logopt, int facility);
 
 void
 xo_close_log (void);
 
 int
 xo_set_logmask (int maskpri);
 
 void
 xo_set_unit_test_mode (int value);
 
 void
 xo_syslog (int priority, const char *name, const char *message, ...);
 
 void
 xo_vsyslog (int priority, const char *name, const char *message, va_list args);
 
 typedef void (*xo_syslog_open_t)(void);
 typedef void (*xo_syslog_send_t)(const char *full_msg,
 				 const char *v0_hdr, const char *text_only);
 typedef void (*xo_syslog_close_t)(void);
 
 void
 xo_set_syslog_handler (xo_syslog_open_t open_func, xo_syslog_send_t send_func,
 		       xo_syslog_close_t close_func);
 
 void
 xo_set_syslog_enterprise_id (unsigned short eid);
 
 typedef void (*xo_simplify_field_func_t)(const char *, unsigned, int);
 
 char *
 xo_simplify_format (xo_handle_t *xop, const char *fmt, int with_numbers,
 		    xo_simplify_field_func_t field_cb);
 
 xo_ssize_t
 xo_emit_field_hv (xo_handle_t *xop, const char *rolmod, const char *contents,
 		  const char *fmt, const char *efmt,
 		  va_list vap);
 
 xo_ssize_t
 xo_emit_field_h (xo_handle_t *xop, const char *rolmod, const char *contents,
 		 const char *fmt, const char *efmt, ...);
 
 xo_ssize_t
 xo_emit_field (const char *rolmod, const char *contents,
 	       const char *fmt, const char *efmt, ...);
 
 void
 xo_retain_clear_all (void);
 
 void
 xo_retain_clear (const char *fmt);
 
 #endif /* INCLUDE_XO_H */
Index: projects/clang1000-import/contrib/libxo/libxo/xo_encoder.c
===================================================================
--- projects/clang1000-import/contrib/libxo/libxo/xo_encoder.c	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/libxo/xo_encoder.c	(revision 357179)
@@ -1,400 +1,417 @@
 /*
  * Copyright (c) 2015, Juniper Networks, Inc.
  * All rights reserved.
  * This SOFTWARE is licensed under the LICENSE provided in the
  * ../Copyright file. By downloading, installing, copying, or otherwise
  * using the SOFTWARE, you agree to be bound by the terms of that
  * LICENSE.
  * Phil Shafer, August 2015
  */
 
 /**
  * libxo includes a number of fixed encoding styles.  But other
  * external encoders are need to deal with new encoders.  Rather
  * than expose a swarm of libxo internals, we create a distinct
  * API, with a simpler API than we use internally.
  */
 
 #include <stdio.h>
 #include <unistd.h>
 #include <string.h>
 #include <sys/queue.h>
 #include <sys/param.h>
 #include <dlfcn.h>
 
 #include "xo_config.h"
 #include "xo.h"
 #include "xo_encoder.h"
 
 #ifdef HAVE_DLFCN_H
 #include <dlfcn.h>
 #if !defined(HAVE_DLFUNC)
 #define dlfunc(_p, _n)		dlsym(_p, _n)
 #endif
 #else /* HAVE_DLFCN_H */
 #define dlopen(_n, _f)		NULL /* Fail */
 #define dlsym(_p, _n)		NULL /* Fail */
 #define dlfunc(_p, _n)		NULL /* Fail */
 #endif /* HAVE_DLFCN_H */
 
 static void xo_encoder_setup (void); /* Forward decl */
 
 /*
  * Need a simple string collection
  */
 typedef struct xo_string_node_s {
     TAILQ_ENTRY(xo_string_node_s) xs_link; /* Next string */
     char xs_data[0];		      /* String data */
 } xo_string_node_t;
 
 typedef TAILQ_HEAD(xo_string_list_s, xo_string_node_s) xo_string_list_t;
 
 static inline void
 xo_string_list_init (xo_string_list_t *listp)
 {
     if (listp->tqh_last == NULL)
 	TAILQ_INIT(listp);
 }
 
 static inline xo_string_node_t *
 xo_string_add (xo_string_list_t *listp, const char *str)
 {
     if (listp == NULL || str == NULL)
 	return NULL;
 
     xo_string_list_init(listp);
     size_t len = strlen(str);
     xo_string_node_t *xsp;
 
     xsp = xo_realloc(NULL, sizeof(*xsp) + len + 1);
     if (xsp) {
 	memcpy(xsp->xs_data, str, len);
 	xsp->xs_data[len] = '\0';
 	TAILQ_INSERT_TAIL(listp, xsp, xs_link);
     }
 
     return xsp;
 }
 
 #define XO_STRING_LIST_FOREACH(_xsp, _listp) \
     xo_string_list_init(_listp); \
     TAILQ_FOREACH(_xsp, _listp, xs_link)
 
 static inline void
 xo_string_list_clean (xo_string_list_t *listp)
 {
     xo_string_node_t *xsp;
 
     xo_string_list_init(listp);
 
     for (;;) {
 	xsp = TAILQ_FIRST(listp);
         if (xsp == NULL)
             break;
         TAILQ_REMOVE(listp, xsp, xs_link);
 	xo_free(xsp);
     }
 }
 
 static xo_string_list_t xo_encoder_path;
 
 void
 xo_encoder_path_add (const char *path)
 {
     xo_encoder_setup();
 
     if (path)
 	xo_string_add(&xo_encoder_path, path);
 }
 
 /* ---------------------------------------------------------------------- */
 
 typedef struct xo_encoder_node_s {
     TAILQ_ENTRY(xo_encoder_node_s) xe_link; /* Next session */
     char *xe_name;			/* Name for this encoder */
     xo_encoder_func_t xe_handler;	/* Callback function */
     void *xe_dlhandle;			/* dlopen handle */
 } xo_encoder_node_t;
 
 typedef TAILQ_HEAD(xo_encoder_list_s, xo_encoder_node_s) xo_encoder_list_t;
 
 #define XO_ENCODER_LIST_FOREACH(_xep, _listp) \
     xo_encoder_list_init(_listp); \
     TAILQ_FOREACH(_xep, _listp, xe_link)
 
 static xo_encoder_list_t xo_encoders;
 
 static void
 xo_encoder_list_init (xo_encoder_list_t *listp)
 {
     if (listp->tqh_last == NULL)
 	TAILQ_INIT(listp);
 }
 
 static xo_encoder_node_t *
 xo_encoder_list_add (const char *name)
 {
     if (name == NULL)
 	return NULL;
 
     xo_encoder_node_t *xep = xo_realloc(NULL, sizeof(*xep));
     if (xep) {
 	ssize_t len = strlen(name) + 1;
 	xep->xe_name = xo_realloc(NULL, len);
 	if (xep->xe_name == NULL) {
 	    xo_free(xep);
 	    return NULL;
 	}
 
 	memcpy(xep->xe_name, name, len);
 
 	TAILQ_INSERT_TAIL(&xo_encoders, xep, xe_link);
     }
 
     return xep;
 }
 
 void
 xo_encoders_clean (void)
 {
     xo_encoder_node_t *xep;
 
     xo_encoder_setup();
 
     for (;;) {
 	xep = TAILQ_FIRST(&xo_encoders);
         if (xep == NULL)
             break;
 
         TAILQ_REMOVE(&xo_encoders, xep, xe_link);
 
 	if (xep->xe_dlhandle)
 	    dlclose(xep->xe_dlhandle);
 
 	xo_free(xep);
     }
 
     xo_string_list_clean(&xo_encoder_path);
 }
 
 static void
 xo_encoder_setup (void)
 {
     static int initted;
     if (!initted) {
 	initted = 1;
 
 	xo_string_list_init(&xo_encoder_path);
 	xo_encoder_list_init(&xo_encoders);
 
 	xo_encoder_path_add(XO_ENCODERDIR);
     }
 }
 
 static xo_encoder_node_t *
 xo_encoder_find (const char *name)
 {
     xo_encoder_node_t *xep;
 
     xo_encoder_list_init(&xo_encoders);
 
     XO_ENCODER_LIST_FOREACH(xep, &xo_encoders) {
 	if (xo_streq(xep->xe_name, name))
 	    return xep;
     }
 
     return NULL;
 }
 
 static xo_encoder_node_t *
 xo_encoder_discover (const char *name)
 {
     void *dlp = NULL;
     char buf[MAXPATHLEN];
     xo_string_node_t *xsp;
     xo_encoder_node_t *xep = NULL;
 
     XO_STRING_LIST_FOREACH(xsp, &xo_encoder_path) {
 	static const char fmt[] = "%s/%s.enc";
 	char *dir = xsp->xs_data;
 	size_t len = snprintf(buf, sizeof(buf), fmt, dir, name);
 
 	if (len > sizeof(buf))	/* Should not occur */
 	    continue;
 
 	dlp = dlopen((const char *) buf, RTLD_NOW);
 	if (dlp)
 	    break;
     }
 
     if (dlp) {
 	/*
 	 * If the library exists, find the initializer function and
 	 * call it.
 	 */
 	xo_encoder_init_func_t func;
 
 	func = (xo_encoder_init_func_t) dlfunc(dlp, XO_ENCODER_INIT_NAME);
 	if (func) {
 	    xo_encoder_init_args_t xei;
 
 	    bzero(&xei, sizeof(xei));
 
 	    xei.xei_version = XO_ENCODER_VERSION;
 	    ssize_t rc = func(&xei);
 	    if (rc == 0 && xei.xei_handler) {
 		xep = xo_encoder_list_add(name);
 		if (xep) {
 		    xep->xe_handler = xei.xei_handler;
 		    xep->xe_dlhandle = dlp;
 		}
 	    }
 	}
 
 	if (xep == NULL)
 	    dlclose(dlp);
     }
 
     return xep;
 }
 
 void
 xo_encoder_register (const char *name, xo_encoder_func_t func)
 {
     xo_encoder_setup();
 
     xo_encoder_node_t *xep = xo_encoder_find(name);
 
     if (xep)			/* "We alla-ready got one" */
 	return;
 
     xep = xo_encoder_list_add(name);
     if (xep)
 	xep->xe_handler = func;
 }
 
 void
 xo_encoder_unregister (const char *name)
 {
     xo_encoder_setup();
 
     xo_encoder_node_t *xep = xo_encoder_find(name);
     if (xep) {
 	TAILQ_REMOVE(&xo_encoders, xep, xe_link);
 	xo_free(xep);
     }
 }
 
 int
 xo_encoder_init (xo_handle_t *xop, const char *name)
 {
     xo_encoder_setup();
 
-    const char *opts = strchr(name, ':');
+    char opts_char = '\0';
+    const char *col_opts = strchr(name, ':');
+    const char *plus_opts = strchr(name, '+');
+
+    /*
+     * Find the option-separating character (plus or colon) which
+     * appears first in the options string.
+     */
+    const char *opts = (col_opts == NULL) ? plus_opts
+	: (plus_opts == NULL) ? col_opts
+	: (plus_opts < col_opts) ? plus_opts : col_opts;
+
     if (opts) {
+	opts_char = *opts;
+
 	/* Make a writable copy of the name */
 	size_t len = strlen(name);
 	char *copy = alloca(len + 1);
 	memcpy(copy, name, len);
 	copy[len] = '\0';
 
 	char *opts_copy = copy + (opts - name); /* Move to ':' */
 	*opts_copy++ = '\0';			/* Trim it off */
 
 	opts = opts_copy;	/* Use copy as options */
 	name = copy;		/* Use trimmed copy as name */
     }
 
     /* Can't have names containing '/' or ':' */
     if (strchr(name, '/') != NULL || strchr(name, ':') != NULL) {
 	xo_failure(xop, "invalid encoder name: %s", name);
 	return -1;
     }
 
    /*
      * First we look on the list of known (registered) encoders.
      * If we don't find it, we follow the set of paths to find
      * the encoding library.
      */
     xo_encoder_node_t *xep = xo_encoder_find(name);
     if (xep == NULL) {
 	xep = xo_encoder_discover(name);
 	if (xep == NULL) {
 	    xo_failure(xop, "encoder not founde: %s", name);
 	    return -1;
 	}
     }
 
     xo_set_encoder(xop, xep->xe_handler);
 
     int rc = xo_encoder_handle(xop, XO_OP_CREATE, name, NULL, 0);
     if (rc == 0 && opts != NULL) {
-	rc = xo_encoder_handle(xop, XO_OP_OPTIONS, name, opts, 0);
+	xo_encoder_op_t op;
+
+	/* Encoder API is limited, so we're stuck with two different options */
+	op = (opts_char == '+') ? XO_OP_OPTIONS_PLUS : XO_OP_OPTIONS;
+	rc = xo_encoder_handle(xop, op, name, opts, 0);
     }
 
     return rc;
 }
 
 /*
  * A couple of function varieties here, to allow for multiple
  * use cases.  This variant is for when the main program knows
  * its own encoder needs.
  */
 xo_handle_t *
 xo_encoder_create (const char *name, xo_xof_flags_t flags)
 {
     xo_handle_t *xop;
 
     xop = xo_create(XO_STYLE_ENCODER, flags);
     if (xop) {
 	if (xo_encoder_init(xop, name)) {
 	    xo_destroy(xop);
 	    xop = NULL;
 	}
     }
 
     return xop;
 }
 
 int
 xo_encoder_handle (xo_handle_t *xop, xo_encoder_op_t op,
 		   const char *name, const char *value, xo_xff_flags_t flags)
 {
     void *private = xo_get_private(xop);
     xo_encoder_func_t func = xo_get_encoder(xop);
 
     if (func == NULL)
 	return -1;
 
     return func(xop, op, name, value, private, flags);
 }
 
 const char *
 xo_encoder_op_name (xo_encoder_op_t op)
 {
     static const char *names[] = {
 	/*  0 */ "unknown",
 	/*  1 */ "create",
 	/*  2 */ "open_container",
 	/*  3 */ "close_container",
 	/*  4 */ "open_list",
 	/*  5 */ "close_list",
 	/*  6 */ "open_leaf_list",
 	/*  7 */ "close_leaf_list",
 	/*  8 */ "open_instance",
 	/*  9 */ "close_instance",
 	/* 10 */ "string",
 	/* 11 */ "content",
 	/* 12 */ "finish",
 	/* 13 */ "flush",
 	/* 14 */ "destroy",
 	/* 15 */ "attr",
 	/* 16 */ "version",
 	/* 17 */ "options",
     };
 
     if (op > sizeof(names) / sizeof(names[0]))
 	return "unknown";
 
     return names[op];
 }
Index: projects/clang1000-import/contrib/libxo/libxo/xo_encoder.h
===================================================================
--- projects/clang1000-import/contrib/libxo/libxo/xo_encoder.h	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/libxo/xo_encoder.h	(revision 357179)
@@ -1,169 +1,170 @@
 /*
  * Copyright (c) 2015, Juniper Networks, Inc.
  * All rights reserved.
  * This SOFTWARE is licensed under the LICENSE provided in the
  * ../Copyright file. By downloading, installing, copying, or otherwise
  * using the SOFTWARE, you agree to be bound by the terms of that
  * LICENSE.
  * Phil Shafer, August 2015
  */
 
 /*
  * NOTE WELL: This file is needed to software that implements an
  * external encoder for libxo that allows libxo data to be encoded in
  * new and bizarre formats.  General libxo code should _never_
  * include this header file.
  */
 
 #ifndef XO_ENCODER_H
 #define XO_ENCODER_H
 
 #include <string.h>
 
 /*
  * Expose libxo's memory allocation functions
  */
 extern xo_realloc_func_t xo_realloc;
 extern xo_free_func_t xo_free;
 
 /*
  * Simple string comparison function (without the temptation
  * to forget the "== 0").
  */
 static inline int
 xo_streq (const char *one, const char *two)
 {
     return strcmp(one, two) == 0;
 }
 
 /* Flags for formatting functions */
 typedef unsigned long xo_xff_flags_t;
 #define XFF_COLON	(1<<0)	/* Append a ":" */
 #define XFF_COMMA	(1<<1)	/* Append a "," iff there's more output */
 #define XFF_WS		(1<<2)	/* Append a blank */
 #define XFF_ENCODE_ONLY	(1<<3)	/* Only emit for encoding styles (XML, JSON) */
 
 #define XFF_QUOTE	(1<<4)	/* Force quotes */
 #define XFF_NOQUOTE	(1<<5)	/* Force no quotes */
 #define XFF_DISPLAY_ONLY (1<<6)	/* Only emit for display styles (text, html) */
 #define XFF_KEY		(1<<7)	/* Field is a key (for XPath) */
 
 #define XFF_XML		(1<<8)	/* Force XML encoding style (for XPath) */
 #define XFF_ATTR	(1<<9)	/* Escape value using attribute rules (XML) */
 #define XFF_BLANK_LINE	(1<<10)	/* Emit a blank line */
 #define XFF_NO_OUTPUT	(1<<11)	/* Do not make any output */
 
 #define XFF_TRIM_WS	(1<<12)	/* Trim whitespace off encoded values */
 #define XFF_LEAF_LIST	(1<<13)	/* A leaf-list (list of values) */
 #define XFF_UNESCAPE	(1<<14)	/* Need to printf-style unescape the value */
 #define XFF_HUMANIZE	(1<<15)	/* Humanize the value (for display styles) */
 
 #define XFF_HN_SPACE	(1<<16)	/* Humanize: put space before suffix */
 #define XFF_HN_DECIMAL	(1<<17)	/* Humanize: add one decimal place if <10 */
 #define XFF_HN_1000	(1<<18)	/* Humanize: use 1000, not 1024 */
 #define XFF_GT_FIELD	(1<<19) /* Call gettext() on a field */
 
 #define XFF_GT_PLURAL	(1<<20)	/* Call dngettext to find plural form */
 #define XFF_ARGUMENT	(1<<21)	/* Content provided via argument */
 
 /* Flags to turn off when we don't want i18n processing */
 #define XFF_GT_FLAGS (XFF_GT_FIELD | XFF_GT_PLURAL)
 
 typedef unsigned xo_encoder_op_t;
 
 /* Encoder operations; names are in xo_encoder.c:xo_encoder_op_name() */
 #define XO_OP_UNKNOWN		0
 #define XO_OP_CREATE		1 /* Called when the handle is init'd */
 #define XO_OP_OPEN_CONTAINER	2
 #define XO_OP_CLOSE_CONTAINER	3
 #define XO_OP_OPEN_LIST		4
 #define XO_OP_CLOSE_LIST	5
 #define XO_OP_OPEN_LEAF_LIST	6
 #define XO_OP_CLOSE_LEAF_LIST	7
 #define XO_OP_OPEN_INSTANCE	8
 #define XO_OP_CLOSE_INSTANCE	9
 #define XO_OP_STRING		10 /* Quoted UTF-8 string */
 #define XO_OP_CONTENT		11 /* Other content */
 #define XO_OP_FINISH		12 /* Finish any pending output */
 #define XO_OP_FLUSH		13 /* Flush any buffered output */
 #define XO_OP_DESTROY		14 /* Clean up function */
 #define XO_OP_ATTRIBUTE		15 /* Attribute name/value */
 #define XO_OP_VERSION		16 /* Version string */
 #define XO_OP_OPTIONS		17 /* Additional command line options */
+#define XO_OP_OPTIONS_PLUS	18 /* Additional command line options */
 
 #define XO_ENCODER_HANDLER_ARGS					\
 	xo_handle_t *xop __attribute__ ((__unused__)),		\
 	xo_encoder_op_t op __attribute__ ((__unused__)),	\
 	const char *name __attribute__ ((__unused__)),		\
         const char *value __attribute__ ((__unused__)),		\
 	void *private __attribute__ ((__unused__)),		\
 	xo_xff_flags_t flags __attribute__ ((__unused__))
 
 typedef int (*xo_encoder_func_t)(XO_ENCODER_HANDLER_ARGS);
 
 typedef struct xo_encoder_init_args_s {
     unsigned xei_version;	   /* Current version */
     xo_encoder_func_t xei_handler; /* Encoding handler */
 } xo_encoder_init_args_t;
 
 #define XO_ENCODER_VERSION	1 /* Current version */
 
 #define XO_ENCODER_INIT_ARGS \
     xo_encoder_init_args_t *arg __attribute__ ((__unused__))
 
 typedef int (*xo_encoder_init_func_t)(XO_ENCODER_INIT_ARGS);
 /*
  * Each encoder library must define a function named xo_encoder_init
  * that takes the arguments defined in XO_ENCODER_INIT_ARGS.  It
  * should return zero for success.
  */
 #define XO_ENCODER_INIT_NAME_TOKEN xo_encoder_library_init
 #define XO_STRINGIFY(_x) #_x
 #define XO_STRINGIFY2(_x) XO_STRINGIFY(_x)
 #define XO_ENCODER_INIT_NAME XO_STRINGIFY2(XO_ENCODER_INIT_NAME_TOKEN)
 extern int XO_ENCODER_INIT_NAME_TOKEN (XO_ENCODER_INIT_ARGS);
 
 void
 xo_encoder_register (const char *name, xo_encoder_func_t func);
 
 void
 xo_encoder_unregister (const char *name);
 
 void *
 xo_get_private (xo_handle_t *xop);
 
 void
 xo_encoder_path_add (const char *path);
 
 void
 xo_set_private (xo_handle_t *xop, void *opaque);
 
 xo_encoder_func_t
 xo_get_encoder (xo_handle_t *xop);
 
 void
 xo_set_encoder (xo_handle_t *xop, xo_encoder_func_t encoder);
 
 int
 xo_encoder_init (xo_handle_t *xop, const char *name);
 
 xo_handle_t *
 xo_encoder_create (const char *name, xo_xof_flags_t flags);
 
 int
 xo_encoder_handle (xo_handle_t *xop, xo_encoder_op_t op,
 		   const char *name, const char *value, xo_xff_flags_t flags);
 
 void
 xo_encoders_clean (void);
 
 const char *
 xo_encoder_op_name (xo_encoder_op_t op);
 
 /*
  * xo_failure is used to announce internal failures, when "warn" is on
  */
 void
 xo_failure (xo_handle_t *xop, const char *fmt, ...);
 
 #endif /* XO_ENCODER_H */
Index: projects/clang1000-import/contrib/libxo/tests/core/Makefile.am
===================================================================
--- projects/clang1000-import/contrib/libxo/tests/core/Makefile.am	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/tests/core/Makefile.am	(revision 357179)
@@ -1,145 +1,148 @@
 #
 # $Id$
 #
 # Copyright 2014, Juniper Networks, Inc.
 # All rights reserved.
 # This SOFTWARE is licensed under the LICENSE provided in the
 # ../Copyright file. By downloading, installing, copying, or otherwise
 # using the SOFTWARE, you agree to be bound by the terms of that
 # LICENSE.
 
 AM_CFLAGS = -I${top_srcdir} -I${top_srcdir}/libxo
 
 # Ick: maintained by hand!
 TEST_CASES = \
 test_01.c \
 test_02.c \
 test_03.c \
 test_04.c \
 test_05.c \
 test_06.c \
 test_07.c \
 test_08.c \
 test_09.c \
 test_10.c \
 test_11.c \
 test_12.c
 
 test_01_test_SOURCES = test_01.c
 test_02_test_SOURCES = test_02.c
 test_03_test_SOURCES = test_03.c
 test_04_test_SOURCES = test_04.c
 test_05_test_SOURCES = test_05.c
 test_06_test_SOURCES = test_06.c
 test_07_test_SOURCES = test_07.c
 test_08_test_SOURCES = test_08.c
 test_09_test_SOURCES = test_09.c
 test_10_test_SOURCES = test_10.c
 test_11_test_SOURCES = test_11.c
 test_12_test_SOURCES = test_12.c
 
 # TEST_CASES := $(shell cd ${srcdir} ; echo *.c )
 
 noinst_PROGRAMS = ${TEST_CASES:.c=.test}
 
 LDADD = \
     ${top_builddir}/libxo/libxo.la
 
 if HAVE_HUMANIZE_NUMBER
 LDADD += -lutil
 endif
 
 EXTRA_DIST = \
     ${TEST_CASES} \
     ${addprefix saved/, ${TEST_CASES:.c=.T.err}} \
     ${addprefix saved/, ${TEST_CASES:.c=.T.out}} \
     ${addprefix saved/, ${TEST_CASES:.c=.XP.err}} \
     ${addprefix saved/, ${TEST_CASES:.c=.XP.out}} \
     ${addprefix saved/, ${TEST_CASES:.c=.JP.err}} \
     ${addprefix saved/, ${TEST_CASES:.c=.JP.out}} \
     ${addprefix saved/, ${TEST_CASES:.c=.HP.err}} \
     ${addprefix saved/, ${TEST_CASES:.c=.HP.out}} \
     ${addprefix saved/, ${TEST_CASES:.c=.X.err}} \
     ${addprefix saved/, ${TEST_CASES:.c=.X.out}} \
     ${addprefix saved/, ${TEST_CASES:.c=.J.err}} \
     ${addprefix saved/, ${TEST_CASES:.c=.J.out}} \
     ${addprefix saved/, ${TEST_CASES:.c=.H.err}} \
     ${addprefix saved/, ${TEST_CASES:.c=.H.out}} \
     ${addprefix saved/, ${TEST_CASES:.c=.HIPx.err}} \
     ${addprefix saved/, ${TEST_CASES:.c=.HIPx.out}} \
     ${addprefix saved/, ${TEST_CASES:.c=.E.err}} \
     ${addprefix saved/, ${TEST_CASES:.c=.E.out}}
 
 S2O = | ${SED} '1,/@@/d'
 
 all:
 
 valgrind:
 	@echo '## Running the regression tests under Valgrind'
 	${MAKE} CHECKER='valgrind -q' tests
 
 #TEST_TRACE = set -x ; 
 
 TEST_JIG = \
       ${CHECKER} ./$$base.test --libxo$$xoopts ${TEST_OPTS} \
       > out/$$base.$$fmt.out 2> out/$$base.$$fmt.err ; \
  ${DIFF} -Nu ${srcdir}/saved/$$base.$$fmt.out out/$$base.$$fmt.out ${S2O} ; \
  ${DIFF} -Nu ${srcdir}/saved/$$base.$$fmt.err out/$$base.$$fmt.err ${S2O}
 
 TEST_JIG2 = \
 echo "... $$test ... $$fmt ..."; \
-xoopts==warn,encoder=csv$$csv ; \
+xoopts==warn,$$csv ; \
 ${TEST_JIG}; true;
 
 TEST_FORMATS = T XP JP HP X J H HIPx
 
 test tests: ${bin_PROGRAMS}
 	@${MKDIR} -p out
 	-@ ${TEST_TRACE} (for test in ${TEST_CASES} ; do \
 	    base=`${BASENAME} $$test .c` ; \
             (for fmt in ${TEST_FORMATS}; do \
 	        echo "... $$test ... $$fmt ..."; \
                 xoopts=:W$$fmt ; \
 	        ${TEST_JIG}; \
                 true; \
             done) ; \
             (for fmt in E; do \
 	        echo "... $$test ... $$fmt ..."; \
                 xoopts==warn,encoder=test ; \
 	        ${TEST_JIG}; \
                 true; \
             done) \
 	done)
 	-@ (${TEST_TRACE} test=test_01.c; base=test_01; \
-	    (   fmt=Ecsv1; csv= ; ${TEST_JIG2} ); \
-	    (   fmt=Ecsv2; csv=:path=top/data/item+no-header ; ${TEST_JIG2} ); \
-	    (   fmt=Ecsv3; csv=:path=item+leafs=sku.sold+no-quotes ; ${TEST_JIG2} ); \
+	    (   fmt=Ecsv1; csv=encoder=csv ; \
+			${TEST_JIG2} ); \
+	    (   fmt=Ecsv2; csv=encoder=csv:path=top/data/item:no-header ; \
+			${TEST_JIG2} ); \
+	    (   fmt=Ecsv3; csv=@csv:path=item:leafs=sku.sold:no-quotes ; \
+			${TEST_JIG2} ); \
 	)
 
 
 one:
 	-@(test=${TEST_CASE}; data=${TEST_DATA}; ${TEST_ONE} ; true)
 
 accept:
 	-@(for test in ${TEST_CASES} ; do \
 	    base=`${BASENAME} $$test .c` ; \
             (for fmt in ${TEST_FORMATS} E ; do \
 	        echo "... $$test ... $$fmt ..."; \
 	        ${CP} out/$$base.$$fmt.out ${srcdir}/saved/$$base.$$fmt.out ; \
 	        ${CP} out/$$base.$$fmt.err ${srcdir}/saved/$$base.$$fmt.err ; \
 	    done) \
 	done)
 	-@(test=test_01.c; base=test_01; for fmt in Ecsv1 Ecsv2 Ecsv3 ; do \
 	        echo "... $$test ... $$fmt ..."; \
 	        ${CP} out/$$base.$$fmt.out ${srcdir}/saved/$$base.$$fmt.out ; \
 	        ${CP} out/$$base.$$fmt.err ${srcdir}/saved/$$base.$$fmt.err ; \
 	done)
 
 .c.test:
 	$(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -o $@ $<
 
 CLEANFILES = ${TEST_CASES:.c=.test}
 CLEANDIRS = out
 
 clean-local:
 	rm -rf ${CLEANDIRS}
Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.H.out
===================================================================
--- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.H.out	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.H.out	(revision 357179)
@@ -1,7 +1,10 @@
 <div class="line"><div class="data" data-tag="name">em0  </div><div class="data">em0  </div><div class="text">We are </div><div class="text">{emit}</div><div class="text">{ting}</div><div class="text"> some </div><div class="data" data-tag="what">braces</div></div><div class="line"><div class="message">abcdef
 </div></div><div class="line"><div class="message">abcdef: Bad file descriptor
 </div></div><div class="line"><div class="message">improper use of profanity; ten yard penalty; first down
 </div></div><div class="line"><div class="text">length </div><div class="data" data-tag="length">abcdef</div></div><div class="line"><div class="text">close </div><div class="data" data-tag="fd">-1</div><div class="text"> returned </div><div class="data" data-tag="error">Bad file descriptor</div><div class="text"> </div><div class="data" data-tag="test">good</div></div><div class="line"><div class="text">close </div><div class="data" data-tag="fd">-1</div><div class="text"> returned </div><div class="data" data-tag="error">Bad fi</div><div class="text"> </div><div class="data" data-tag="test">good</div></div><div class="line"><div class="message">improper use of profanity; ten yard penalty; first down
 </div></div><div class="line"><div class="text"> </div><div class="data" data-tag="lines">     20</div><div class="text"> </div><div class="data" data-tag="words">     30</div><div class="text"> </div><div class="data" data-tag="characters">     40</div><div class="text"> </div><div class="data" data-tag="filename">file</div></div><div class="line"><div class="data" data-tag="bytes">0</div><div class="padding"> </div><div class="note">bytes</div></div><div class="line"><div class="data" data-tag="bytes">1</div><div class="padding"> </div><div class="note">byte</div></div><div class="line"><div class="data" data-tag="bytes">2</div><div class="padding"> </div><div class="note">bytes</div></div><div class="line"><div class="data" data-tag="bytes">3</div><div class="padding"> </div><div class="note">bytes</div></div><div class="line"><div class="data" data-tag="bytes">4</div><div class="padding"> </div><div class="note">bytes</div></div><div class="line"><div class="data" data-tag="mbuf-current">10</div><div class="text">/</div><div class="data" data-tag="mbuf-cache">20</div><div class="text">/</div><div class="data" data-tag="mbuf-total">30</div><div class="text"> </div><div class="note">mbufs &lt;&amp;&gt; in use (current/cache/total)</div></div><div class="line"><div class="data" data-tag="distance" data-units="miles">50</div><div class="padding"> </div><div class="text"> from </div><div class="data" data-tag="location">Boston</div></div><div class="line"><div class="data" data-tag="memory" data-units="k">64</div><div class="text"> left out of </div><div class="data" data-tag="total" data-units="kb">640</div></div><div class="line"><div class="data" data-tag="memory" data-units="k">64</div><div class="text"> left out of </div><div class="data" data-tag="total" data-units="kilobytes">640</div></div><div class="line"><div class="title">beforeworkingafter:</div></div><div class="line"><div class="data" data-tag="some">string</div><div class="decoration">:</div><div class="padding"> </div><div class="data" data-tag="ten">10</div><div class="data" data-tag="eleven">11</div></div><div class="line"><div class="data" data-tag="unknown">1010</div><div class="text"> </div><div class="note">packets here/there/everywhere</div></div><div class="line"><div class="data" data-tag="unknown">1010</div><div class="text"> </div><div class="note">packets here/there/everywhere</div></div><div class="line"><div class="text">(</div><div class="padding">                     </div><div class="data" data-tag="min">15</div><div class="text">/</div><div class="data" data-tag="cur">20</div><div class="text">/</div><div class="data" data-tag="max">125</div><div class="text">)</div></div><div class="line"><div class="text">(</div><div class="padding">                     </div><div class="data" data-tag="min">15</div><div class="text">/</div><div class="data" data-tag="cur">20</div><div class="text">/</div><div class="data" data-tag="max">125</div><div class="text">)</div></div><div class="line"><div class="text">(</div><div class="data" data-tag="min">15</div><div class="text">/</div><div class="data" data-tag="cur">20</div><div class="text">/</div><div class="data" data-tag="max">125</div><div class="padding">                     </div><div class="text">)</div></div><div class="line"><div class="text">(</div><div class="data" data-tag="min">15</div><div class="text">/</div><div class="data" data-tag="cur">20</div><div class="text">/</div><div class="data" data-tag="max">125</div><div class="padding">                     </div><div class="text">)</div></div><div class="line"><div class="text">Humanize: </div><div class="data" data-tag="val1" data-number="21">21</div><div class="text">, </div><div class="data" data-tag="val2" data-number="58368">57 K</div><div class="text">, </div><div class="data" data-tag="val3" data-number="100663296">96M</div><div class="text">, </div><div class="data" data-tag="val4" data-number="44470272">44M</div><div class="text">, </div><div class="data" data-tag="val5" data-number="1342172800">1.2G</div></div><div class="line"><div class="data" data-tag="flag">one</div><div class="text"> </div><div class="data" data-tag="flag">two</div><div class="text"> </div><div class="data" data-tag="flag">three</div></div><div class="line"><div class="data" data-tag="works">(null)</div></div><div class="line"><div class="text">1:</div><div class="data" data-tag="t1">  1000</div><div class="text"> 2:</div><div class="data" data-tag="t2">test5000    </div><div class="text"> 3:</div><div class="data" data-tag="t3">  ten-longx</div><div class="text"> 4:</div><div class="data" data-tag="t4">xtest      </div></div><div class="line"><div class="error">this is an error</div></div><div class="line"><div class="error">two more errors</div></div><div class="line"><div class="warning">this is an warning</div></div><div class="line"><div class="warning">two more warnings</div></div><div class="line"><div class="label">V1/V2 packets</div><div class="text">: </div><div class="data" data-tag="count">10</div></div><div class="line"><div class="data" data-tag="test">0004</div><div class="text"> </div><div class="label">tries</div></div><div class="line"><div class="message">improper use of profanity; ten yard penalty; first down
 </div></div><div class="line"><div class="error">Shut 'er down, Clancey!  She's a-pumpin' mud!  &lt;&gt;!,"!&lt;&gt;
+</div></div><div class="line"><div class="error">err message (1)</div></div><div class="line"><div class="error">err message (2)
+</div></div><div class="line"><div class="error">err message (1)
+</div></div><div class="line"><div class="error">err message (2)
 </div></div>
\ No newline at end of file
Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.HIPx.out
===================================================================
--- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.HIPx.out	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.HIPx.out	(revision 357179)
@@ -1,227 +1,242 @@
 <div class="line">
   <div class="data" data-tag="name" data-xpath="/top/data/name">em0  </div>
   <div class="data">em0  </div>
   <div class="text">We are </div>
   <div class="text">{emit}</div>
   <div class="text">{ting}</div>
   <div class="text"> some </div>
   <div class="data" data-tag="what" data-xpath="/top/data[name = 'em0  '][name = 'em0']/what">braces</div>
 </div>
 <div class="line">
   <div class="message">abcdef
 </div>
 </div>
 <div class="line">
   <div class="message">abcdef: Bad file descriptor
 </div>
 </div>
 <div class="line">
   <div class="message">improper use of profanity; ten yard penalty; first down
 </div>
 </div>
 <div class="line">
   <div class="text">length </div>
   <div class="data" data-tag="length" data-xpath="/top/data[name = 'em0  '][name = 'em0']/length">abcdef</div>
 </div>
 <div class="line">
   <div class="text">close </div>
   <div class="data" data-tag="fd" data-xpath="/top/data[name = 'em0  '][name = 'em0']/fd">-1</div>
   <div class="text"> returned </div>
   <div class="data" data-tag="error" data-xpath="/top/data[name = 'em0  '][name = 'em0']/error">Bad file descriptor</div>
   <div class="text"> </div>
   <div class="data" data-tag="test" data-xpath="/top/data[name = 'em0  '][name = 'em0']/test">good</div>
 </div>
 <div class="line">
   <div class="text">close </div>
   <div class="data" data-tag="fd" data-xpath="/top/data[name = 'em0  '][name = 'em0']/fd">-1</div>
   <div class="text"> returned </div>
   <div class="data" data-tag="error" data-xpath="/top/data[name = 'em0  '][name = 'em0']/error">Bad fi</div>
   <div class="text"> </div>
   <div class="data" data-tag="test" data-xpath="/top/data[name = 'em0  '][name = 'em0']/test">good</div>
 </div>
 <div class="line">
   <div class="message">improper use of profanity; ten yard penalty; first down
 </div>
 </div>
 <div class="line">
   <div class="text"> </div>
   <div class="data" data-tag="lines" data-xpath="/top/data[name = 'em0  '][name = 'em0']/lines">     20</div>
   <div class="text"> </div>
   <div class="data" data-tag="words" data-xpath="/top/data[name = 'em0  '][name = 'em0']/words">     30</div>
   <div class="text"> </div>
   <div class="data" data-tag="characters" data-xpath="/top/data[name = 'em0  '][name = 'em0']/characters">     40</div>
   <div class="text"> </div>
   <div class="data" data-tag="filename" data-xpath="/top/data[name = 'em0  '][name = 'em0']/filename">file</div>
 </div>
 <div class="line">
   <div class="data" data-tag="bytes" data-xpath="/top/data[name = 'em0  '][name = 'em0']/bytes">0</div>
   <div class="padding"> </div>
   <div class="note">bytes</div>
 </div>
 <div class="line">
   <div class="data" data-tag="bytes" data-xpath="/top/data[name = 'em0  '][name = 'em0']/bytes">1</div>
   <div class="padding"> </div>
   <div class="note">byte</div>
 </div>
 <div class="line">
   <div class="data" data-tag="bytes" data-xpath="/top/data[name = 'em0  '][name = 'em0']/bytes">2</div>
   <div class="padding"> </div>
   <div class="note">bytes</div>
 </div>
 <div class="line">
   <div class="data" data-tag="bytes" data-xpath="/top/data[name = 'em0  '][name = 'em0']/bytes">3</div>
   <div class="padding"> </div>
   <div class="note">bytes</div>
 </div>
 <div class="line">
   <div class="data" data-tag="bytes" data-xpath="/top/data[name = 'em0  '][name = 'em0']/bytes">4</div>
   <div class="padding"> </div>
   <div class="note">bytes</div>
 </div>
 <div class="line">
   <div class="data" data-tag="mbuf-current" data-xpath="/top/data[name = 'em0  '][name = 'em0']/mbuf-current">10</div>
   <div class="text">/</div>
   <div class="data" data-tag="mbuf-cache" data-xpath="/top/data[name = 'em0  '][name = 'em0']/mbuf-cache">20</div>
   <div class="text">/</div>
   <div class="data" data-tag="mbuf-total" data-xpath="/top/data[name = 'em0  '][name = 'em0']/mbuf-total">30</div>
   <div class="text"> </div>
   <div class="note">mbufs &lt;&amp;&gt; in use (current/cache/total)</div>
 </div>
 <div class="line">
   <div class="data" data-tag="distance" data-units="miles" data-xpath="/top/data[name = 'em0  '][name = 'em0']/distance">50</div>
   <div class="padding"> </div>
   <div class="text"> from </div>
   <div class="data" data-tag="location" data-xpath="/top/data[name = 'em0  '][name = 'em0']/location">Boston</div>
 </div>
 <div class="line">
   <div class="data" data-tag="memory" data-units="k" data-xpath="/top/data[name = 'em0  '][name = 'em0']/memory">64</div>
   <div class="text"> left out of </div>
   <div class="data" data-tag="total" data-units="kb" data-xpath="/top/data[name = 'em0  '][name = 'em0']/total">640</div>
 </div>
 <div class="line">
   <div class="data" data-tag="memory" data-units="k" data-xpath="/top/data[name = 'em0  '][name = 'em0']/memory">64</div>
   <div class="text"> left out of </div>
   <div class="data" data-tag="total" data-units="kilobytes" data-xpath="/top/data[name = 'em0  '][name = 'em0']/total">640</div>
 </div>
 <div class="line">
   <div class="title">beforeworkingafter:</div>
 </div>
 <div class="line">
   <div class="data" data-tag="some" data-xpath="/top/data[name = 'em0  '][name = 'em0']/some">string</div>
   <div class="decoration">:</div>
   <div class="padding"> </div>
   <div class="data" data-tag="ten" data-xpath="/top/data[name = 'em0  '][name = 'em0']/ten">10</div>
   <div class="data" data-tag="eleven" data-xpath="/top/data[name = 'em0  '][name = 'em0']/eleven">11</div>
 </div>
 <div class="line">
   <div class="data" data-tag="unknown" data-xpath="/top/data[name = 'em0  '][name = 'em0']/unknown">1010</div>
   <div class="text"> </div>
   <div class="note">packets here/there/everywhere</div>
 </div>
 <div class="line">
   <div class="data" data-tag="unknown" data-xpath="/top/data[name = 'em0  '][name = 'em0']/unknown">1010</div>
   <div class="text"> </div>
   <div class="note">packets here/there/everywhere</div>
 </div>
 <div class="line">
   <div class="text">(</div>
   <div class="padding">                     </div>
   <div class="data" data-tag="min" data-xpath="/top/data[name = 'em0  '][name = 'em0']/min">15</div>
   <div class="text">/</div>
   <div class="data" data-tag="cur" data-xpath="/top/data[name = 'em0  '][name = 'em0']/cur">20</div>
   <div class="text">/</div>
   <div class="data" data-tag="max" data-xpath="/top/data[name = 'em0  '][name = 'em0']/max">125</div>
   <div class="text">)</div>
 </div>
 <div class="line">
   <div class="text">(</div>
   <div class="padding">                     </div>
   <div class="data" data-tag="min" data-xpath="/top/data[name = 'em0  '][name = 'em0']/min">15</div>
   <div class="text">/</div>
   <div class="data" data-tag="cur" data-xpath="/top/data[name = 'em0  '][name = 'em0']/cur">20</div>
   <div class="text">/</div>
   <div class="data" data-tag="max" data-xpath="/top/data[name = 'em0  '][name = 'em0']/max">125</div>
   <div class="text">)</div>
 </div>
 <div class="line">
   <div class="text">(</div>
   <div class="data" data-tag="min" data-xpath="/top/data[name = 'em0  '][name = 'em0']/min">15</div>
   <div class="text">/</div>
   <div class="data" data-tag="cur" data-xpath="/top/data[name = 'em0  '][name = 'em0']/cur">20</div>
   <div class="text">/</div>
   <div class="data" data-tag="max" data-xpath="/top/data[name = 'em0  '][name = 'em0']/max">125</div>
   <div class="padding">                     </div>
   <div class="text">)</div>
 </div>
 <div class="line">
   <div class="text">(</div>
   <div class="data" data-tag="min" data-xpath="/top/data[name = 'em0  '][name = 'em0']/min">15</div>
   <div class="text">/</div>
   <div class="data" data-tag="cur" data-xpath="/top/data[name = 'em0  '][name = 'em0']/cur">20</div>
   <div class="text">/</div>
   <div class="data" data-tag="max" data-xpath="/top/data[name = 'em0  '][name = 'em0']/max">125</div>
   <div class="padding">                     </div>
   <div class="text">)</div>
 </div>
 <div class="line">
   <div class="text">Humanize: </div>
   <div class="data" data-tag="val1" data-xpath="/top/data[name = 'em0  '][name = 'em0']/val1" data-number="21">21</div>
   <div class="text">, </div>
   <div class="data" data-tag="val2" data-xpath="/top/data[name = 'em0  '][name = 'em0']/val2" data-number="58368">57 K</div>
   <div class="text">, </div>
   <div class="data" data-tag="val3" data-xpath="/top/data[name = 'em0  '][name = 'em0']/val3" data-number="100663296">96M</div>
   <div class="text">, </div>
   <div class="data" data-tag="val4" data-xpath="/top/data[name = 'em0  '][name = 'em0']/val4" data-number="44470272">44M</div>
   <div class="text">, </div>
   <div class="data" data-tag="val5" data-xpath="/top/data[name = 'em0  '][name = 'em0']/val5" data-number="1342172800">1.2G</div>
 </div>
 <div class="line">
   <div class="data" data-tag="flag" data-xpath="/top/data[name = 'em0  '][name = 'em0']/flag">one</div>
   <div class="text"> </div>
   <div class="data" data-tag="flag" data-xpath="/top/data[name = 'em0  '][name = 'em0']/flag">two</div>
   <div class="text"> </div>
   <div class="data" data-tag="flag" data-xpath="/top/data[name = 'em0  '][name = 'em0']/flag">three</div>
 </div>
 <div class="line">
   <div class="data" data-tag="works" data-xpath="/top/data[name = 'em0  '][name = 'em0']/works">(null)</div>
 </div>
 <div class="line">
   <div class="text">1:</div>
   <div class="data" data-tag="t1" data-xpath="/top/data[name = 'em0  '][name = 'em0']/t1">  1000</div>
   <div class="text"> 2:</div>
   <div class="data" data-tag="t2" data-xpath="/top/data[name = 'em0  '][name = 'em0']/t2">test5000    </div>
   <div class="text"> 3:</div>
   <div class="data" data-tag="t3" data-xpath="/top/data[name = 'em0  '][name = 'em0']/t3">  ten-longx</div>
   <div class="text"> 4:</div>
   <div class="data" data-tag="t4" data-xpath="/top/data[name = 'em0  '][name = 'em0']/t4">xtest      </div>
 </div>
 <div class="line">
   <div class="error">this is an error</div>
 </div>
 <div class="line">
   <div class="error">two more errors</div>
 </div>
 <div class="line">
   <div class="warning">this is an warning</div>
 </div>
 <div class="line">
   <div class="warning">two more warnings</div>
 </div>
 <div class="line">
   <div class="label">V1/V2 packets</div>
   <div class="text">: </div>
   <div class="data" data-tag="count" data-xpath="/top/data[name = 'em0  '][name = 'em0']/count">10</div>
 </div>
 <div class="line">
   <div class="data" data-tag="test" data-xpath="/top/data[name = 'em0  '][name = 'em0']/test">0004</div>
   <div class="text"> </div>
   <div class="label">tries</div>
 </div>
 <div class="line">
   <div class="message">improper use of profanity; ten yard penalty; first down
 </div>
 </div>
 <div class="line">
   <div class="error">Shut 'er down, Clancey!  She's a-pumpin' mud!  &lt;&gt;!,"!&lt;&gt;
 </div>
 </div>
+<div class="line">
+  <div class="error">err message (1)</div>
+</div>
+<div class="line">
+  <div class="error">err message (2)
+</div>
+</div>
+<div class="line">
+  <div class="error">err message (1)
+</div>
+</div>
+<div class="line">
+  <div class="error">err message (2)
+</div>
+</div>
Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.HP.out
===================================================================
--- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.HP.out	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.HP.out	(revision 357179)
@@ -1,227 +1,242 @@
 <div class="line">
   <div class="data" data-tag="name">em0  </div>
   <div class="data">em0  </div>
   <div class="text">We are </div>
   <div class="text">{emit}</div>
   <div class="text">{ting}</div>
   <div class="text"> some </div>
   <div class="data" data-tag="what">braces</div>
 </div>
 <div class="line">
   <div class="message">abcdef
 </div>
 </div>
 <div class="line">
   <div class="message">abcdef: Bad file descriptor
 </div>
 </div>
 <div class="line">
   <div class="message">improper use of profanity; ten yard penalty; first down
 </div>
 </div>
 <div class="line">
   <div class="text">length </div>
   <div class="data" data-tag="length">abcdef</div>
 </div>
 <div class="line">
   <div class="text">close </div>
   <div class="data" data-tag="fd">-1</div>
   <div class="text"> returned </div>
   <div class="data" data-tag="error">Bad file descriptor</div>
   <div class="text"> </div>
   <div class="data" data-tag="test">good</div>
 </div>
 <div class="line">
   <div class="text">close </div>
   <div class="data" data-tag="fd">-1</div>
   <div class="text"> returned </div>
   <div class="data" data-tag="error">Bad fi</div>
   <div class="text"> </div>
   <div class="data" data-tag="test">good</div>
 </div>
 <div class="line">
   <div class="message">improper use of profanity; ten yard penalty; first down
 </div>
 </div>
 <div class="line">
   <div class="text"> </div>
   <div class="data" data-tag="lines">     20</div>
   <div class="text"> </div>
   <div class="data" data-tag="words">     30</div>
   <div class="text"> </div>
   <div class="data" data-tag="characters">     40</div>
   <div class="text"> </div>
   <div class="data" data-tag="filename">file</div>
 </div>
 <div class="line">
   <div class="data" data-tag="bytes">0</div>
   <div class="padding"> </div>
   <div class="note">bytes</div>
 </div>
 <div class="line">
   <div class="data" data-tag="bytes">1</div>
   <div class="padding"> </div>
   <div class="note">byte</div>
 </div>
 <div class="line">
   <div class="data" data-tag="bytes">2</div>
   <div class="padding"> </div>
   <div class="note">bytes</div>
 </div>
 <div class="line">
   <div class="data" data-tag="bytes">3</div>
   <div class="padding"> </div>
   <div class="note">bytes</div>
 </div>
 <div class="line">
   <div class="data" data-tag="bytes">4</div>
   <div class="padding"> </div>
   <div class="note">bytes</div>
 </div>
 <div class="line">
   <div class="data" data-tag="mbuf-current">10</div>
   <div class="text">/</div>
   <div class="data" data-tag="mbuf-cache">20</div>
   <div class="text">/</div>
   <div class="data" data-tag="mbuf-total">30</div>
   <div class="text"> </div>
   <div class="note">mbufs &lt;&amp;&gt; in use (current/cache/total)</div>
 </div>
 <div class="line">
   <div class="data" data-tag="distance" data-units="miles">50</div>
   <div class="padding"> </div>
   <div class="text"> from </div>
   <div class="data" data-tag="location">Boston</div>
 </div>
 <div class="line">
   <div class="data" data-tag="memory" data-units="k">64</div>
   <div class="text"> left out of </div>
   <div class="data" data-tag="total" data-units="kb">640</div>
 </div>
 <div class="line">
   <div class="data" data-tag="memory" data-units="k">64</div>
   <div class="text"> left out of </div>
   <div class="data" data-tag="total" data-units="kilobytes">640</div>
 </div>
 <div class="line">
   <div class="title">beforeworkingafter:</div>
 </div>
 <div class="line">
   <div class="data" data-tag="some">string</div>
   <div class="decoration">:</div>
   <div class="padding"> </div>
   <div class="data" data-tag="ten">10</div>
   <div class="data" data-tag="eleven">11</div>
 </div>
 <div class="line">
   <div class="data" data-tag="unknown">1010</div>
   <div class="text"> </div>
   <div class="note">packets here/there/everywhere</div>
 </div>
 <div class="line">
   <div class="data" data-tag="unknown">1010</div>
   <div class="text"> </div>
   <div class="note">packets here/there/everywhere</div>
 </div>
 <div class="line">
   <div class="text">(</div>
   <div class="padding">                     </div>
   <div class="data" data-tag="min">15</div>
   <div class="text">/</div>
   <div class="data" data-tag="cur">20</div>
   <div class="text">/</div>
   <div class="data" data-tag="max">125</div>
   <div class="text">)</div>
 </div>
 <div class="line">
   <div class="text">(</div>
   <div class="padding">                     </div>
   <div class="data" data-tag="min">15</div>
   <div class="text">/</div>
   <div class="data" data-tag="cur">20</div>
   <div class="text">/</div>
   <div class="data" data-tag="max">125</div>
   <div class="text">)</div>
 </div>
 <div class="line">
   <div class="text">(</div>
   <div class="data" data-tag="min">15</div>
   <div class="text">/</div>
   <div class="data" data-tag="cur">20</div>
   <div class="text">/</div>
   <div class="data" data-tag="max">125</div>
   <div class="padding">                     </div>
   <div class="text">)</div>
 </div>
 <div class="line">
   <div class="text">(</div>
   <div class="data" data-tag="min">15</div>
   <div class="text">/</div>
   <div class="data" data-tag="cur">20</div>
   <div class="text">/</div>
   <div class="data" data-tag="max">125</div>
   <div class="padding">                     </div>
   <div class="text">)</div>
 </div>
 <div class="line">
   <div class="text">Humanize: </div>
   <div class="data" data-tag="val1" data-number="21">21</div>
   <div class="text">, </div>
   <div class="data" data-tag="val2" data-number="58368">57 K</div>
   <div class="text">, </div>
   <div class="data" data-tag="val3" data-number="100663296">96M</div>
   <div class="text">, </div>
   <div class="data" data-tag="val4" data-number="44470272">44M</div>
   <div class="text">, </div>
   <div class="data" data-tag="val5" data-number="1342172800">1.2G</div>
 </div>
 <div class="line">
   <div class="data" data-tag="flag">one</div>
   <div class="text"> </div>
   <div class="data" data-tag="flag">two</div>
   <div class="text"> </div>
   <div class="data" data-tag="flag">three</div>
 </div>
 <div class="line">
   <div class="data" data-tag="works">(null)</div>
 </div>
 <div class="line">
   <div class="text">1:</div>
   <div class="data" data-tag="t1">  1000</div>
   <div class="text"> 2:</div>
   <div class="data" data-tag="t2">test5000    </div>
   <div class="text"> 3:</div>
   <div class="data" data-tag="t3">  ten-longx</div>
   <div class="text"> 4:</div>
   <div class="data" data-tag="t4">xtest      </div>
 </div>
 <div class="line">
   <div class="error">this is an error</div>
 </div>
 <div class="line">
   <div class="error">two more errors</div>
 </div>
 <div class="line">
   <div class="warning">this is an warning</div>
 </div>
 <div class="line">
   <div class="warning">two more warnings</div>
 </div>
 <div class="line">
   <div class="label">V1/V2 packets</div>
   <div class="text">: </div>
   <div class="data" data-tag="count">10</div>
 </div>
 <div class="line">
   <div class="data" data-tag="test">0004</div>
   <div class="text"> </div>
   <div class="label">tries</div>
 </div>
 <div class="line">
   <div class="message">improper use of profanity; ten yard penalty; first down
 </div>
 </div>
 <div class="line">
   <div class="error">Shut 'er down, Clancey!  She's a-pumpin' mud!  &lt;&gt;!,"!&lt;&gt;
 </div>
 </div>
+<div class="line">
+  <div class="error">err message (1)</div>
+</div>
+<div class="line">
+  <div class="error">err message (2)
+</div>
+</div>
+<div class="line">
+  <div class="error">err message (1)
+</div>
+</div>
+<div class="line">
+  <div class="error">err message (2)
+</div>
+</div>
Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.J.out
===================================================================
--- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.J.out	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.J.out	(revision 357179)
@@ -1 +1 @@
-{"top": {"data": {"name":"em0","flags":"0x8843","name":"em0","flags":"0x8843","what":"braces","length":"abcdef","fd":-1,"error":"Bad file descriptor","test":"good","fd":-1,"error":"Bad fi","test":"good","lines":20,"words":30,"characters":40, "bytes": [0,1,2,3,4],"mbuf-current":10,"mbuf-cache":20,"mbuf-total":30,"distance":50,"location":"Boston","memory":64,"total":640,"memory":64,"total":640,"ten":10,"eleven":11,"unknown":1010,"unknown":1010,"min":15,"cur":20,"max":125,"min":15,"cur":20,"max":125,"min":15,"cur":20,"max":125,"min":15,"cur":20,"max":125,"val1":21,"val2":58368,"val3":100663296,"val4":44470272,"val5":1342172800, "flag": ["one","two","three"],"works":null,"empty-tag":true,"t1":"1000","t2":"test5000","t3":"ten-longx","t4":"xtest", "__error": {"message":"this is an error"}, "__error": {"message":"two more errors"}, "__warning": {"message":"this is an warning"}, "__warning": {"message":"two more warnings"},"count":10,"test":4, "error": {"message":"Shut 'er down, Clancey!  She's a-pumpin' mud!  <>!,\"!<>\n"}}}}
+{"top": {"data": {"name":"em0","flags":"0x8843","name":"em0","flags":"0x8843","what":"braces","length":"abcdef","fd":-1,"error":"Bad file descriptor","test":"good","fd":-1,"error":"Bad fi","test":"good","lines":20,"words":30,"characters":40, "bytes": [0,1,2,3,4],"mbuf-current":10,"mbuf-cache":20,"mbuf-total":30,"distance":50,"location":"Boston","memory":64,"total":640,"memory":64,"total":640,"ten":10,"eleven":11,"unknown":1010,"unknown":1010,"min":15,"cur":20,"max":125,"min":15,"cur":20,"max":125,"min":15,"cur":20,"max":125,"min":15,"cur":20,"max":125,"val1":21,"val2":58368,"val3":100663296,"val4":44470272,"val5":1342172800, "flag": ["one","two","three"],"works":null,"empty-tag":true,"t1":"1000","t2":"test5000","t3":"ten-longx","t4":"xtest", "__error": {"message":"this is an error"}, "__error": {"message":"two more errors"}, "__warning": {"message":"this is an warning"}, "__warning": {"message":"two more warnings"},"count":10,"test":4, "error": {"message":"Shut 'er down, Clancey!  She's a-pumpin' mud!  <>!,\"!<>\n"}, "error": {"message":"err message (1)"}, "error": {"message":"err message (2)\n"}, "error": {"message":"err message (1)\n"}, "error": {"message":"err message (2)\n"}}}}
Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.JP.out
===================================================================
--- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.JP.out	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.JP.out	(revision 357179)
@@ -1,86 +1,98 @@
 {
   "top": {
     "data": {
       "name": "em0",
       "flags": "0x8843",
       "name": "em0",
       "flags": "0x8843",
       "what": "braces",
       "length": "abcdef",
       "fd": -1,
       "error": "Bad file descriptor",
       "test": "good",
       "fd": -1,
       "error": "Bad fi",
       "test": "good",
       "lines": 20,
       "words": 30,
       "characters": 40,
       "bytes": [
         0,
         1,
         2,
         3,
         4
       ],
       "mbuf-current": 10,
       "mbuf-cache": 20,
       "mbuf-total": 30,
       "distance": 50,
       "location": "Boston",
       "memory": 64,
       "total": 640,
       "memory": 64,
       "total": 640,
       "ten": 10,
       "eleven": 11,
       "unknown": 1010,
       "unknown": 1010,
       "min": 15,
       "cur": 20,
       "max": 125,
       "min": 15,
       "cur": 20,
       "max": 125,
       "min": 15,
       "cur": 20,
       "max": 125,
       "min": 15,
       "cur": 20,
       "max": 125,
       "val1": 21,
       "val2": 58368,
       "val3": 100663296,
       "val4": 44470272,
       "val5": 1342172800,
       "flag": [
         "one",
         "two",
         "three"
       ],
       "works": null,
       "empty-tag": true,
       "t1": "1000",
       "t2": "test5000",
       "t3": "ten-longx",
       "t4": "xtest",
       "__error": {
         "message": "this is an error"
       },
       "__error": {
         "message": "two more errors"
       },
       "__warning": {
         "message": "this is an warning"
       },
       "__warning": {
         "message": "two more warnings"
       },
       "count": 10,
       "test": 4,
       "error": {
         "message": "Shut 'er down, Clancey!  She's a-pumpin' mud!  <>!,\"!<>\n"
+      },
+      "error": {
+        "message": "err message (1)"
+      },
+      "error": {
+        "message": "err message (2)\n"
+      },
+      "error": {
+        "message": "err message (1)\n"
+      },
+      "error": {
+        "message": "err message (2)\n"
       }
     }
   }
 }
Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.T.err
===================================================================
--- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.T.err	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.T.err	(revision 357179)
@@ -1,2 +1,5 @@
 test_02: key field emitted after normal value field: 'name'
 Shut 'er down, Clancey!  She's a-pumpin' mud!  <>!,"!<>
+err message (1)err message (2)
+err message (1)
+err message (2)
Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.X.out
===================================================================
--- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.X.out	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.X.out	(revision 357179)
@@ -1,7 +1,10 @@
 <top><data><name>em0</name><flags>0x8843</flags><name>em0</name><flags>0x8843</flags><what>braces</what><message>abcdef
 </message><message>abcdef: Bad file descriptor
 </message><message>improper use of profanity; ten yard penalty; first down
 </message><length>abcdef</length><fd>-1</fd><error>Bad file descriptor</error><test>good</test><fd>-1</fd><error>Bad fi</error><test>good</test><message>improper use of profanity; ten yard penalty; first down
 </message><lines>20</lines><words>30</words><characters>40</characters><bytes>0</bytes><bytes>1</bytes><bytes>2</bytes><bytes>3</bytes><bytes>4</bytes><mbuf-current>10</mbuf-current><mbuf-cache>20</mbuf-cache><mbuf-total>30</mbuf-total><distance units="miles">50</distance><location>Boston</location><memory units="k">64</memory><total units="kb">640</total><memory units="k">64</memory><total units="kilobytes">640</total><ten>10</ten><eleven>11</eleven><unknown>1010</unknown><unknown>1010</unknown><min>15</min><cur>20</cur><max>125</max><min>15</min><cur>20</cur><max>125</max><min>15</min><cur>20</cur><max>125</max><min>15</min><cur>20</cur><max>125</max><val1>21</val1><val2>58368</val2><val3>100663296</val3><val4>44470272</val4><val5>1342172800</val5><flag>one</flag><flag>two</flag><flag>three</flag><works>null</works><empty-tag></empty-tag><t1>1000</t1><t2>test5000</t2><t3>ten-longx</t3><t4>xtest</t4><__error><message>this is an error</message></__error><__error><message>two more errors</message></__error><__warning><message>this is an warning</message></__warning><__warning><message>two more warnings</message></__warning><count>10</count><test>4</test><message>improper use of profanity; ten yard penalty; first down
 </message><error><message>Shut 'er down, Clancey!  She's a-pumpin' mud!  &lt;&gt;!,"!&lt;&gt;
+</message></error><error><message>err message (1)</message></error><error><message>err message (2)
+</message></error><error><message>err message (1)
+</message></error><error><message>err message (2)
 </message></error></data></top>
\ No newline at end of file
Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.XP.out
===================================================================
--- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.XP.out	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.XP.out	(revision 357179)
@@ -1,91 +1,106 @@
 <top>
   <data>
     <name>em0</name>
     <flags>0x8843</flags>
     <name>em0</name>
     <flags>0x8843</flags>
     <what>braces</what>
   <message>abcdef
 </message>
   <message>abcdef: Bad file descriptor
 </message>
   <message>improper use of profanity; ten yard penalty; first down
 </message>
     <length>abcdef</length>
     <fd>-1</fd>
     <error>Bad file descriptor</error>
     <test>good</test>
     <fd>-1</fd>
     <error>Bad fi</error>
     <test>good</test>
   <message>improper use of profanity; ten yard penalty; first down
 </message>
     <lines>20</lines>
     <words>30</words>
     <characters>40</characters>
     <bytes>0</bytes>
     <bytes>1</bytes>
     <bytes>2</bytes>
     <bytes>3</bytes>
     <bytes>4</bytes>
     <mbuf-current>10</mbuf-current>
     <mbuf-cache>20</mbuf-cache>
     <mbuf-total>30</mbuf-total>
     <distance units="miles">50</distance>
     <location>Boston</location>
     <memory units="k">64</memory>
     <total units="kb">640</total>
     <memory units="k">64</memory>
     <total units="kilobytes">640</total>
     <ten>10</ten>
     <eleven>11</eleven>
     <unknown>1010</unknown>
     <unknown>1010</unknown>
     <min>15</min>
     <cur>20</cur>
     <max>125</max>
     <min>15</min>
     <cur>20</cur>
     <max>125</max>
     <min>15</min>
     <cur>20</cur>
     <max>125</max>
     <min>15</min>
     <cur>20</cur>
     <max>125</max>
     <val1>21</val1>
     <val2>58368</val2>
     <val3>100663296</val3>
     <val4>44470272</val4>
     <val5>1342172800</val5>
     <flag>one</flag>
     <flag>two</flag>
     <flag>three</flag>
     <works>null</works>
     <empty-tag></empty-tag>
     <t1>1000</t1>
     <t2>test5000</t2>
     <t3>ten-longx</t3>
     <t4>xtest</t4>
     <__error>
       <message>this is an error</message>
     </__error>
     <__error>
       <message>two more errors</message>
     </__error>
     <__warning>
       <message>this is an warning</message>
     </__warning>
     <__warning>
       <message>two more warnings</message>
     </__warning>
     <count>10</count>
     <test>4</test>
   <message>improper use of profanity; ten yard penalty; first down
 </message>
     <error>
       <message>Shut 'er down, Clancey!  She's a-pumpin' mud!  &lt;&gt;!,"!&lt;&gt;
 </message>
     </error>
+    <error>
+      <message>err message (1)</message>
+    </error>
+    <error>
+      <message>err message (2)
+</message>
+    </error>
+    <error>
+      <message>err message (1)
+</message>
+    </error>
+    <error>
+      <message>err message (2)
+</message>
+    </error>
   </data>
 </top>
Index: projects/clang1000-import/contrib/libxo/tests/core/test_02.c
===================================================================
--- projects/clang1000-import/contrib/libxo/tests/core/test_02.c	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/tests/core/test_02.c	(revision 357179)
@@ -1,155 +1,161 @@
 /*
  * Copyright (c) 2014-2019, Juniper Networks, Inc.
  * All rights reserved.
  * This SOFTWARE is licensed under the LICENSE provided in the
  * ../Copyright file. By downloading, installing, copying, or otherwise
  * using the SOFTWARE, you agree to be bound by the terms of that
  * LICENSE.
  * Phil Shafer, July 2014
  */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 
 #include "xo.h"
 #include "xo_encoder.h"
 
 #include "xo_humanize.h"
 
 int
 main (int argc, char **argv)
 {
+    xo_set_program("test_02");
+
     argc = xo_parse_args(argc, argv);
     if (argc < 0)
 	return 1;
 
     for (argc = 1; argv[argc]; argc++) {
 	if (xo_streq(argv[argc], "xml"))
 	    xo_set_style(NULL, XO_STYLE_XML);
 	else if (xo_streq(argv[argc], "json"))
 	    xo_set_style(NULL, XO_STYLE_JSON);
 	else if (xo_streq(argv[argc], "text"))
 	    xo_set_style(NULL, XO_STYLE_TEXT);
 	else if (xo_streq(argv[argc], "html"))
 	    xo_set_style(NULL, XO_STYLE_HTML);
 	else if (xo_streq(argv[argc], "pretty"))
 	    xo_set_flags(NULL, XOF_PRETTY);
 	else if (xo_streq(argv[argc], "xpath"))
 	    xo_set_flags(NULL, XOF_XPATH);
 	else if (xo_streq(argv[argc], "info"))
 	    xo_set_flags(NULL, XOF_INFO);
     }
 
     xo_set_flags(NULL, XOF_UNITS); /* Always test w/ this */
     xo_set_file(stdout);
 
     xo_open_container_h(NULL, "top");
 
     xo_open_container("data");
 
     xo_emit("{kt:name/%-*.*s}{eq:flags/0x%x}",
 	    5, 5, "em0", 34883);
 
     xo_emit("{d:/%-*.*s}{etk:name}{eq:flags/0x%x}",
 	    5, 5, "em0", "em0", 34883);
 
     xo_emit("We are {{emit}}{{ting}} some {:what}\n", "braces");
 
     xo_message("abcdef");
     close(-1);
     xo_message_e("abcdef");
 
     xo_message("improper use of profanity; %s; %s",
 	       "ten yard penalty", "first down");
 
     xo_emit("length {:length/%6.6s}\n", "abcdefghijklmnopqrstuvwxyz");
 
     close(-1);
     xo_emit("close {:fd/%d} returned {:error/%m} {:test}\n", -1, "good");
     close(-1);
     xo_emit("close {:fd/%d} returned {:error/%6.6m} {:test}\n", -1, "good");
 
 
     xo_message("improper use of profanity; %s; %s",
 	       "ten yard penalty", "first down");
 
     xo_emit(" {:lines/%7ju} {:words/%7ju} "
             "{:characters/%7ju} {d:filename/%s}\n",
             (uintmax_t) 20, (uintmax_t) 30, (uintmax_t) 40, "file");
 
     int i;
     for (i = 0; i < 5; i++)
 	xo_emit("{lw:bytes/%d}{Np:byte,bytes}\n", i);
 
 
     xo_emit("{:mbuf-current/%u}/{:mbuf-cache/%u}/{:mbuf-total/%u} "
 	    "{N:mbufs <&> in use (current\\/cache\\/total)}\n",
 	    10, 20, 30);
 
     xo_emit("{:distance/%u}{Uw:miles} from {:location}\n", 50, "Boston");
     xo_emit("{:memory/%u}{U:k} left out of {:total/%u}{U:kb}\n", 64, 640);
     xo_emit("{:memory/%u}{U:/%s} left out of {:total/%u}{U:/%s}\n",
 	    64, "k", 640, "kilobytes");
 
     xo_emit("{,title:/before%safter:}\n", "working");
 
     xo_emit("{,display,white,colon:some/%s}"
 	    "{,value:ten/%ju}{,value:eleven/%ju}\n",
 	    "string", (uintmax_t) 10, (uintmax_t) 11);
 
     xo_emit("{:unknown/%u} "
 	    "{N:/packet%s here\\/there\\/everywhere}\n",
 	    1010, "s");
 
     xo_emit("{:unknown/%u} "
 	    "{,note:/packet%s here\\/there\\/everywhere}\n",
 	    1010, "s");
 
     xo_emit("({[:/%d}{n:min/15}/{n:cur/20}/{:max/%d}{]:})\n", 30, 125);
     xo_emit("({[:30}{:min/%u}/{:cur/%u}/{:max/%u}{]:})\n", 15, 20, 125);
     xo_emit("({[:-30}{n:min/15}/{n:cur/20}/{n:max/125}{]:})\n");
     xo_emit("({[:}{:min/%u}/{:cur/%u}/{:max/%u}{]:/%d})\n", 15, 20, 125, -30);
 
     xo_emit("Humanize: {h:val1/%u}, {h,hn-space:val2/%u}, "
 	    "{h,hn-decimal:val3/%u}, {h,hn-1000:val4/%u}, "
 	    "{h,hn-decimal:val5/%u}\n",
             21,
 	    57 * 1024,
 	    96 * 1024 * 1024,
 	    (42 * 1024 + 420) * 1024,
 	    1342172800);
 
     xo_open_list("flag");
     xo_emit("{lq:flag/one} {lq:flag/two} {lq:flag/three}\n");
     xo_close_list("flag");
 
     xo_emit("{n:works/%s}\n", NULL);
 
     xo_emit("{e:empty-tag/}");
     xo_emit("1:{qt:t1/%*d} 2:{qt:t2/test%-*u} "
 	    "3:{qt:t3/%10sx} 4:{qt:t4/x%-*.*s}\n",
 	    6, 1000, 8, 5000, "ten-long", 10, 10, "test");
     xo_emit("{E:this is an error}\n");
     xo_emit("{E:/%s more error%s}\n", "two", "s" );
     xo_emit("{W:this is an warning}\n");
     xo_emit("{W:/%s more warning%s}\n", "two", "s" );
     xo_emit("{L:/V1\\/V2 packet%s}: {:count/%u}\n", "s", 10);
 
     int test = 4;
     xo_emit("{:test/%04d} {L:/tr%s}\n", test, (test == 1) ? "y" : "ies");
 
     xo_message("improper use of profanity; %s; %s",
 	       "ten yard penalty", "first down");
 
     xo_error("Shut 'er down, Clancey!  She's a-pumpin' mud!  <>!,\"!<>\n");
+    xo_error("err message (%d)", 1);
+    xo_error("err message (%d)\n", 2);
+    xo_errorn("err message (%d)", 1);
+    xo_errorn("err message (%d)\n", 2);
 
     xo_close_container("data");
 
     xo_close_container_h(NULL, "top");
 
     xo_finish();
 
     return 0;
 }
Index: projects/clang1000-import/contrib/libxo/tests/core/test_12.c
===================================================================
--- projects/clang1000-import/contrib/libxo/tests/core/test_12.c	(revision 357178)
+++ projects/clang1000-import/contrib/libxo/tests/core/test_12.c	(revision 357179)
@@ -1,93 +1,95 @@
 /*
  * Copyright (c) 2014, Juniper Networks, Inc.
  * All rights reserved.
  * This SOFTWARE is licensed under the LICENSE provided in the
  * ../Copyright file. By downloading, installing, copying, or otherwise
  * using the SOFTWARE, you agree to be bound by the terms of that
  * LICENSE.
  * Phil Shafer, July 2014
  */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 
 #include "xo_config.h"
 #include "xo.h"
 #include "xo_encoder.h"
 
 int
 main (int argc, char **argv)
 {
     int i, count = 10;
     int mon = 0;
     xo_emit_flags_t flags = XOEF_RETAIN;
     int opt_color = 1;
 
+    xo_set_program("test_12");
+
     argc = xo_parse_args(argc, argv);
     if (argc < 0)
 	return 1;
 
     for (argc = 1; argv[argc]; argc++) {
 	if (xo_streq(argv[argc], "xml"))
 	    xo_set_style(NULL, XO_STYLE_XML);
 	else if (xo_streq(argv[argc], "json"))
 	    xo_set_style(NULL, XO_STYLE_JSON);
 	else if (xo_streq(argv[argc], "text"))
 	    xo_set_style(NULL, XO_STYLE_TEXT);
 	else if (xo_streq(argv[argc], "html"))
 	    xo_set_style(NULL, XO_STYLE_HTML);
 	else if (xo_streq(argv[argc], "no-color"))
 	    opt_color = 0;
 	else if (xo_streq(argv[argc], "pretty"))
 	    xo_set_flags(NULL, XOF_PRETTY);
 	else if (xo_streq(argv[argc], "xpath"))
 	    xo_set_flags(NULL, XOF_XPATH);
 	else if (xo_streq(argv[argc], "info"))
 	    xo_set_flags(NULL, XOF_INFO);
 	else if (xo_streq(argv[argc], "no-retain"))
 	    flags &= ~XOEF_RETAIN;
 	else if (xo_streq(argv[argc], "big")) {
 	    if (argv[argc + 1])
 		count = atoi(argv[++argc]);
 	}
     }
 
     xo_set_flags(NULL, XOF_UNITS); /* Always test w/ this */
     if (opt_color)
 	xo_set_flags(NULL, XOF_COLOR); /* Force color output */
     xo_set_file(stdout);
 
     xo_open_container("top");
     xo_open_container("data");
 
     xo_emit("{C:fg-red,bg-green}Merry XMas!!{C:}\n");
 
     xo_emit("One {C:fg-yellow,bg-blue}{:animal}{C:}, "
 	    "Two {C:fg-green,bg-yellow}{:animal}{C:}\n",
           "fish", "fish");
 
     const char *fmt1 = "The {C:fg-red}{k:name}{C:reset} is "
 	"{C:/fg-%s}{:color}{C:reset} til {:time/%02d:%02d}\n";
     const char *fmt2 = "My {C:fg-red}{:hand}{C:reset} hand is "
 	"{C:/fg-%s}{:color}{C:reset} til {:time/%02d:%02d}\n";
 
     for (i = 0; i < count; i++) {
 	xo_open_instance("thing");
 	xo_emit_f(flags, fmt1, "thing", "green", "green", 2, 15);
 	xo_emit_f(flags, fmt2, "left", "blue", "blue", 3, 45);
     }
 
     xo_open_container("2by4");
     xo_emit("There is {:4x4} in {:2morrow}\n", "truck", "tomorrow");
     xo_close_container("2by4");
 
 
     xo_close_container("data");
     xo_close_container_h(NULL, "top");
 
     xo_finish();
 
     return 0;
 }
Index: projects/clang1000-import/contrib/libxo
===================================================================
--- projects/clang1000-import/contrib/libxo	(revision 357178)
+++ projects/clang1000-import/contrib/libxo	(revision 357179)

Property changes on: projects/clang1000-import/contrib/libxo
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,2 ##
   Merged /vendor/Juniper/libxo/dist:r354455-357124
   Merged /head/contrib/libxo:r356848-357178
Index: projects/clang1000-import/lib/libxo/add.man
===================================================================
--- projects/clang1000-import/lib/libxo/add.man	(revision 357178)
+++ projects/clang1000-import/lib/libxo/add.man	(revision 357179)
@@ -1,30 +1,30 @@
 .\" $FreeBSD$
 .Sh ADDITIONAL DOCUMENTATION
 .Fx
 uses
 .Nm libxo
-version 1.3.1.
+version 1.4.0.
 Complete documentation can be found on github:
 .Bd -literal -offset indent
-https://juniper.github.io/libxo/1.3.1/html/index.html
+https://juniper.github.io/libxo/1.4.0/html/index.html
 .Ed
 .Pp
 .Nm libxo
 lives on github as:
 .Bd -literal -offset indent
 https://github.com/Juniper/libxo
 .Ed
 .Pp
 The latest release of
 .Nm libxo
 is available at:
 .Bd -literal -offset indent
 https://github.com/Juniper/libxo/releases
 .Ed
 .Sh HISTORY
 The
 .Nm libxo
 library was added in
 .Fx 11.0 .
 .Sh AUTHOR
 Phil Shafer
Index: projects/clang1000-import/lib/libxo/libxo/xo_config.h
===================================================================
--- projects/clang1000-import/lib/libxo/libxo/xo_config.h	(revision 357178)
+++ projects/clang1000-import/lib/libxo/libxo/xo_config.h	(revision 357179)
@@ -1,257 +1,257 @@
 /* $FreeBSD$ */
 /* libxo/xo_config.h.  Generated from xo_config.h.in by configure.  */
 /* libxo/xo_config.h.in.  Generated from configure.ac by autoheader.  */
 
 /* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
    systems. This function is required for `alloca.c' support on those systems.
    */
 /* #undef CRAY_STACKSEG_END */
 
 /* Define to 1 if using `alloca.c'. */
 /* #undef C_ALLOCA */
 
 /* Define to 1 if you have `alloca', as a function or macro. */
 #define HAVE_ALLOCA 1
 
 /* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
    */
 /* #undef HAVE_ALLOCA_H */
 
 /* Define to 1 if you have the `asprintf' function. */
 #define HAVE_ASPRINTF 1
 
 /* Define to 1 if you have the `bzero' function. */
 #define HAVE_BZERO 1
 
 /* Define to 1 if you have the `ctime' function. */
 #define HAVE_CTIME 1
 
 /* Define to 1 if you have the <ctype.h> header file. */
 #define HAVE_CTYPE_H 1
 
 /* Define to 1 if you have the declaration of `__isthreaded', and to 0 if you
    don't. */
 #define HAVE_DECL___ISTHREADED 1
 
 /* Define to 1 if you have the <dlfcn.h> header file. */
 #define HAVE_DLFCN_H 1
 
 /* Define to 1 if you have the `dlfunc' function. */
 #define HAVE_DLFUNC 1
 
 /* Define to 1 if you have the <errno.h> header file. */
 #define HAVE_ERRNO_H 1
 
 /* Define to 1 if you have the `fdopen' function. */
 #define HAVE_FDOPEN 1
 
 /* Define to 1 if you have the `flock' function. */
 #define HAVE_FLOCK 1
 
 /* Define to 1 if you have the `getpass' function. */
 #define HAVE_GETPASS 1
 
 /* Define to 1 if you have the `getprogname' function. */
 #define HAVE_GETPROGNAME 1
 
 /* Define to 1 if you have the `getrusage' function. */
 #define HAVE_GETRUSAGE 1
 
 /* gettext(3) */
 /* #undef HAVE_GETTEXT */
 
 /* Define to 1 if you have the `gettimeofday' function. */
 #define HAVE_GETTIMEOFDAY 1
 
 /* humanize_number(3) */
 #define HAVE_HUMANIZE_NUMBER 1
 
 /* Define to 1 if you have the <inttypes.h> header file. */
 #define HAVE_INTTYPES_H 1
 
 /* Define to 1 if you have the `crypto' library (-lcrypto). */
 #define HAVE_LIBCRYPTO 1
 
 /* Define to 1 if you have the `m' library (-lm). */
 #define HAVE_LIBM 1
 
 /* Define to 1 if you have the <libutil.h> header file. */
 #define HAVE_LIBUTIL_H 1
 
 /* Define to 1 if your system has a GNU libc compatible `malloc' function, and
    to 0 otherwise. */
 #define HAVE_MALLOC 1
 
 /* Define to 1 if you have the `memmove' function. */
 #define HAVE_MEMMOVE 1
 
 /* Define to 1 if you have the <memory.h> header file. */
 #define HAVE_MEMORY_H 1
 
 /* Define to 1 if you have the <monitor.h> header file. */
 /* #undef HAVE_MONITOR_H */
 
 /* Support printflike */
 /* #undef HAVE_PRINTFLIKE */
 
 /* Define to 1 if your system has a GNU libc compatible `realloc' function,
    and to 0 otherwise. */
 #define HAVE_REALLOC 1
 
 /* Define to 1 if you have the `srand' function. */
 #define HAVE_SRAND 1
 
 /* Define to 1 if you have the `sranddev' function. */
 #define HAVE_SRANDDEV 1
 
 /* Define to 1 if you have the <stdint.h> header file. */
 #define HAVE_STDINT_H 1
 
 /* Define to 1 if you have the <stdio_ext.h> header file. */
 /* #undef HAVE_STDIO_EXT_H */
 
 /* Define to 1 if you have the <stdio.h> header file. */
 #define HAVE_STDIO_H 1
 
 /* Define to 1 if you have the <stdlib.h> header file. */
 #define HAVE_STDLIB_H 1
 
 /* Define to 1 if you have the <stdtime/tzfile.h> header file. */
 /* #undef HAVE_STDTIME_TZFILE_H */
 
 /* Define to 1 if you have the `strchr' function. */
 #define HAVE_STRCHR 1
 
 /* Define to 1 if you have the `strcspn' function. */
 #define HAVE_STRCSPN 1
 
 /* Define to 1 if you have the `strerror' function. */
 #define HAVE_STRERROR 1
 
 /* Define to 1 if you have the <strings.h> header file. */
 #define HAVE_STRINGS_H 1
 
 /* Define to 1 if you have the <string.h> header file. */
 #define HAVE_STRING_H 1
 
 /* Define to 1 if you have the `strlcpy' function. */
 #define HAVE_STRLCPY 1
 
 /* Define to 1 if you have the `strspn' function. */
 #define HAVE_STRSPN 1
 
 /* Have struct sockaddr_un.sun_len */
 #define HAVE_SUN_LEN 1
 
 /* Define to 1 if you have the `sysctlbyname' function. */
 #define HAVE_SYSCTLBYNAME 1
 
 /* Define to 1 if you have the <sys/param.h> header file. */
 #define HAVE_SYS_PARAM_H 1
 
 /* Define to 1 if you have the <sys/stat.h> header file. */
 #define HAVE_SYS_STAT_H 1
 
 /* Define to 1 if you have the <sys/sysctl.h> header file. */
 #define HAVE_SYS_SYSCTL_H 1
 
 /* Define to 1 if you have the <sys/time.h> header file. */
 #define HAVE_SYS_TIME_H 1
 
 /* Define to 1 if you have the <sys/types.h> header file. */
 #define HAVE_SYS_TYPES_H 1
 
 /* Define to 1 if you have the <threads.h> header file. */
 #define HAVE_THREADS_H 1
 
 /* thread-local setting */
 #define HAVE_THREAD_LOCAL THREAD_LOCAL_before
 
 /* Define to 1 if you have the <tzfile.h> header file. */
 /* #undef HAVE_TZFILE_H */
 
 /* Define to 1 if you have the <unistd.h> header file. */
 #define HAVE_UNISTD_H 1
 
 /* Define to 1 if you have the `__flbf' function. */
 /* #undef HAVE___FLBF */
 
 /* Enable debugging */
 /* #undef LIBXO_DEBUG */
 
 /* Enable text-only rendering */
 /* #undef LIBXO_TEXT_ONLY */
 
 /* Version number as dotted value */
-#define LIBXO_VERSION "1.3.1"
+#define LIBXO_VERSION "1.4.0"
 
 /* Version number extra information */
 #define LIBXO_VERSION_EXTRA ""
 
 /* Version number as a number */
-#define LIBXO_VERSION_NUMBER 1003001
+#define LIBXO_VERSION_NUMBER 1004000
 
 /* Version number as string */
-#define LIBXO_VERSION_STRING "1003001"
+#define LIBXO_VERSION_STRING "1004000"
 
 /* Enable local wcwidth implementation */
 #define LIBXO_WCWIDTH 1
 
 /* Define to the sub-directory where libtool stores uninstalled libraries. */
 #define LT_OBJDIR ".libs/"
 
 /* Name of package */
 #define PACKAGE "libxo"
 
 /* Define to the address where bug reports for this package should be sent. */
 #define PACKAGE_BUGREPORT "phil@juniper.net"
 
 /* Define to the full name of this package. */
 #define PACKAGE_NAME "libxo"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "libxo 1.3.1"
+#define PACKAGE_STRING "libxo 1.4.0"
 
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "libxo"
 
 /* Define to the home page for this package. */
 #define PACKAGE_URL ""
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "1.3.1"
+#define PACKAGE_VERSION "1.4.0"
 
 /* If using the C implementation of alloca, define if you know the
    direction of stack growth for your system; otherwise it will be
    automatically deduced at runtime.
 	STACK_DIRECTION > 0 => grows toward higher addresses
 	STACK_DIRECTION < 0 => grows toward lower addresses
 	STACK_DIRECTION = 0 => direction of growth unknown */
 /* #undef STACK_DIRECTION */
 
 /* Define to 1 if you have the ANSI C header files. */
 #define STDC_HEADERS 1
 
 /* Use int return codes */
 /* #undef USE_INT_RETURN_CODES */
 
 /* Version number of package */
-#define VERSION "1.3.1"
+#define VERSION "1.4.0"
 
 /* Retain hash bucket size */
 /* #undef XO_RETAIN_SIZE */
 
 /* Define to `__inline__' or `__inline' if that's what the C compiler
    calls it, or to nothing if 'inline' is not supported under any name.  */
 #ifndef __cplusplus
 /* #undef inline */
 #endif
 
 /* Define to rpl_malloc if the replacement function should be used. */
 /* #undef malloc */
 
 /* Define to rpl_realloc if the replacement function should be used. */
 /* #undef realloc */
 
 /* Define to `unsigned int' if <sys/types.h> does not define. */
 /* #undef size_t */
Index: projects/clang1000-import/sbin/newfs_msdos/mkfs_msdos.c
===================================================================
--- projects/clang1000-import/sbin/newfs_msdos/mkfs_msdos.c	(revision 357178)
+++ projects/clang1000-import/sbin/newfs_msdos/mkfs_msdos.c	(revision 357179)
@@ -1,1000 +1,1033 @@
 /*
  * Copyright (c) 1998 Robert Nordier
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS
  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char rcsid[] =
   "$FreeBSD$";
 #endif /* not lint */
 
 #include <sys/param.h>
+#ifdef MAKEFS
+/* In the makefs case we only want struct disklabel */
+#include <sys/disk/bsd.h>
+#else
 #include <sys/fdcio.h>
 #include <sys/disk.h>
 #include <sys/disklabel.h>
 #include <sys/mount.h>
+#endif
 #include <sys/stat.h>
 #include <sys/time.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <inttypes.h>
 #include <paths.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
 
 #include "mkfs_msdos.h"
 
 #define	MAXU16	  0xffff	/* maximum unsigned 16-bit quantity */
 #define	BPN	  4		/* bits per nibble */
 #define	NPB	  2		/* nibbles per byte */
 
 #define	DOSMAGIC  0xaa55	/* DOS magic number */
 #define	MINBPS	  512		/* minimum bytes per sector */
 #define	MAXSPC	  128		/* maximum sectors per cluster */
 #define	MAXNFT	  16		/* maximum number of FATs */
 #define	DEFBLK	  4096		/* default block size */
 #define	DEFBLK16  2048		/* default block size FAT16 */
 #define	DEFRDE	  512		/* default root directory entries */
 #define	RESFTE	  2		/* reserved FAT entries */
 #define	MINCLS12  1U		/* minimum FAT12 clusters */
 #define	MINCLS16  0xff5U	/* minimum FAT16 clusters */
 #define	MINCLS32  0xfff5U	/* minimum FAT32 clusters */
 #define	MAXCLS12  0xff4U	/* maximum FAT12 clusters */
 #define	MAXCLS16  0xfff4U	/* maximum FAT16 clusters */
 #define	MAXCLS32  0xffffff4U	/* maximum FAT32 clusters */
 
 #define	mincls(fat)  ((fat) == 12 ? MINCLS12 :	\
 		      (fat) == 16 ? MINCLS16 :	\
 				    MINCLS32)
 
 #define	maxcls(fat)  ((fat) == 12 ? MAXCLS12 :	\
 		      (fat) == 16 ? MAXCLS16 :	\
 				    MAXCLS32)
 
 #define	mk1(p, x)				\
     (p) = (u_int8_t)(x)
 
 #define	mk2(p, x)				\
     (p)[0] = (u_int8_t)(x),			\
     (p)[1] = (u_int8_t)((x) >> 010)
 
 #define	mk4(p, x)				\
     (p)[0] = (u_int8_t)(x),			\
     (p)[1] = (u_int8_t)((x) >> 010),		\
     (p)[2] = (u_int8_t)((x) >> 020),		\
     (p)[3] = (u_int8_t)((x) >> 030)
 
 struct bs {
     u_int8_t bsJump[3];			/* bootstrap entry point */
     u_int8_t bsOemName[8];		/* OEM name and version */
 } __packed;
 
 struct bsbpb {
     u_int8_t bpbBytesPerSec[2];		/* bytes per sector */
     u_int8_t bpbSecPerClust;		/* sectors per cluster */
     u_int8_t bpbResSectors[2];		/* reserved sectors */
     u_int8_t bpbFATs;			/* number of FATs */
     u_int8_t bpbRootDirEnts[2];		/* root directory entries */
     u_int8_t bpbSectors[2];		/* total sectors */
     u_int8_t bpbMedia;			/* media descriptor */
     u_int8_t bpbFATsecs[2];		/* sectors per FAT */
     u_int8_t bpbSecPerTrack[2];		/* sectors per track */
     u_int8_t bpbHeads[2];		/* drive heads */
     u_int8_t bpbHiddenSecs[4];		/* hidden sectors */
     u_int8_t bpbHugeSectors[4];		/* big total sectors */
 } __packed;
 
 struct bsxbpb {
     u_int8_t bpbBigFATsecs[4];		/* big sectors per FAT */
     u_int8_t bpbExtFlags[2];		/* FAT control flags */
     u_int8_t bpbFSVers[2];		/* file system version */
     u_int8_t bpbRootClust[4];		/* root directory start cluster */
     u_int8_t bpbFSInfo[2];		/* file system info sector */
     u_int8_t bpbBackup[2];		/* backup boot sector */
     u_int8_t bpbReserved[12];		/* reserved */
 } __packed;
 
 struct bsx {
     u_int8_t exDriveNumber;		/* drive number */
     u_int8_t exReserved1;		/* reserved */
     u_int8_t exBootSignature;		/* extended boot signature */
     u_int8_t exVolumeID[4];		/* volume ID number */
     u_int8_t exVolumeLabel[11];		/* volume label */
     u_int8_t exFileSysType[8];		/* file system type */
 } __packed;
 
 struct de {
     u_int8_t deName[11];		/* name and extension */
     u_int8_t deAttributes;		/* attributes */
     u_int8_t rsvd[10];			/* reserved */
     u_int8_t deMTime[2];		/* last-modified time */
     u_int8_t deMDate[2];		/* last-modified date */
     u_int8_t deStartCluster[2];		/* starting cluster */
     u_int8_t deFileSize[4];		/* size */
 } __packed;
 
 struct bpb {
     u_int bpbBytesPerSec;		/* bytes per sector */
     u_int bpbSecPerClust;		/* sectors per cluster */
     u_int bpbResSectors;		/* reserved sectors */
     u_int bpbFATs;			/* number of FATs */
     u_int bpbRootDirEnts;		/* root directory entries */
     u_int bpbSectors;			/* total sectors */
     u_int bpbMedia;			/* media descriptor */
     u_int bpbFATsecs;			/* sectors per FAT */
     u_int bpbSecPerTrack;		/* sectors per track */
     u_int bpbHeads;			/* drive heads */
     u_int bpbHiddenSecs;		/* hidden sectors */
     u_int bpbHugeSectors; 		/* big total sectors */
     u_int bpbBigFATsecs; 		/* big sectors per FAT */
     u_int bpbRootClust; 		/* root directory start cluster */
     u_int bpbFSInfo; 			/* file system info sector */
     u_int bpbBackup; 			/* backup boot sector */
 };
 
 #define	BPBGAP 0, 0, 0, 0, 0, 0
 
 static struct {
     const char *name;
     struct bpb bpb;
 } const stdfmt[] = {
     {"160",  {512, 1, 1, 2,  64,  320, 0xfe, 1,  8, 1, BPBGAP}},
     {"180",  {512, 1, 1, 2,  64,  360, 0xfc, 2,  9, 1, BPBGAP}},
     {"320",  {512, 2, 1, 2, 112,  640, 0xff, 1,  8, 2, BPBGAP}},
     {"360",  {512, 2, 1, 2, 112,  720, 0xfd, 2,  9, 2, BPBGAP}},
     {"640",  {512, 2, 1, 2, 112, 1280, 0xfb, 2,  8, 2, BPBGAP}},
     {"720",  {512, 2, 1, 2, 112, 1440, 0xf9, 3,  9, 2, BPBGAP}},
     {"1200", {512, 1, 1, 2, 224, 2400, 0xf9, 7, 15, 2, BPBGAP}},
     {"1232", {1024,1, 1, 2, 192, 1232, 0xfe, 2,  8, 2, BPBGAP}},
     {"1440", {512, 1, 1, 2, 224, 2880, 0xf0, 9, 18, 2, BPBGAP}},
     {"2880", {512, 2, 1, 2, 240, 5760, 0xf0, 9, 36, 2, BPBGAP}}
 };
 
 static const u_int8_t bootcode[] = {
     0xfa,			/* cli		    */
     0x31, 0xc0, 		/* xor	   ax,ax    */
     0x8e, 0xd0, 		/* mov	   ss,ax    */
     0xbc, 0x00, 0x7c,		/* mov	   sp,7c00h */
     0xfb,			/* sti		    */
     0x8e, 0xd8, 		/* mov	   ds,ax    */
     0xe8, 0x00, 0x00,		/* call    $ + 3    */
     0x5e,			/* pop	   si	    */
     0x83, 0xc6, 0x19,		/* add	   si,+19h  */
     0xbb, 0x07, 0x00,		/* mov	   bx,0007h */
     0xfc,			/* cld		    */
     0xac,			/* lodsb	    */
     0x84, 0xc0, 		/* test    al,al    */
     0x74, 0x06, 		/* jz	   $ + 8    */
     0xb4, 0x0e, 		/* mov	   ah,0eh   */
     0xcd, 0x10, 		/* int	   10h	    */
     0xeb, 0xf5, 		/* jmp	   $ - 9    */
     0x30, 0xe4, 		/* xor	   ah,ah    */
     0xcd, 0x16, 		/* int	   16h	    */
     0xcd, 0x19, 		/* int	   19h	    */
     0x0d, 0x0a,
     'N', 'o', 'n', '-', 's', 'y', 's', 't',
     'e', 'm', ' ', 'd', 'i', 's', 'k',
     0x0d, 0x0a,
     'P', 'r', 'e', 's', 's', ' ', 'a', 'n',
     'y', ' ', 'k', 'e', 'y', ' ', 't', 'o',
     ' ', 'r', 'e', 'b', 'o', 'o', 't',
     0x0d, 0x0a,
     0
 };
 
 static volatile sig_atomic_t got_siginfo;
 static void infohandler(int);
 
 static int check_mounted(const char *, mode_t);
 static int getstdfmt(const char *, struct bpb *);
 static int getdiskinfo(int, const char *, const char *, int, struct bpb *);
 static void print_bpb(struct bpb *);
 static int ckgeom(const char *, u_int, const char *);
 static void mklabel(u_int8_t *, const char *);
 static int oklabel(const char *);
 static void setstr(u_int8_t *, const char *, size_t);
 
 int
 mkfs_msdos(const char *fname, const char *dtype, const struct msdos_options *op)
 {
     char buf[MAXPATHLEN];
     struct sigaction si_sa;
     struct stat sb;
     struct timeval tv;
     struct bpb bpb;
     struct tm *tm;
     struct bs *bs;
     struct bsbpb *bsbpb;
     struct bsxbpb *bsxbpb;
     struct bsx *bsx;
     struct de *de;
     u_int8_t *img;
     const char *bname;
     ssize_t n;
     time_t now;
     u_int fat, bss, rds, cls, dir, lsn, x, x1, x2;
     u_int extra_res, alignment, saved_x, attempts=0;
     bool set_res, set_spf, set_spc;
     int fd, fd1, rv;
     struct msdos_options o = *op;
 
     img = NULL;
     rv = -1;
     fd = fd1 = -1;
 
     if (o.block_size && o.sectors_per_cluster) {
 	warnx("Cannot specify both block size and sectors per cluster");
 	goto done;
     }
     if (o.OEM_string && strlen(o.OEM_string) > 8) {
 	warnx("%s: bad OEM string", o.OEM_string);
 	goto done;
     }
     if (o.create_size) {
 	if (o.no_create) {
 	    warnx("create (-C) is incompatible with -N");
 	    goto done;
 	}
 	fd = open(fname, O_RDWR | O_CREAT | O_TRUNC, 0644);
 	if (fd == -1) {
 	    warnx("failed to create %s", fname);
 	    goto done;
 	}
 	if (ftruncate(fd, o.create_size)) {
 	    warnx("failed to initialize %jd bytes", (intmax_t)o.create_size);
 	    goto done;
 	}
     } else if ((fd = open(fname, o.no_create ? O_RDONLY : O_RDWR)) == -1) {
 	warn("%s", fname);
 	goto done;
     }
     if (fstat(fd, &sb)) {
 	warn("%s", fname);
 	goto done;
     }
     if (o.create_size) {
 	if (!S_ISREG(sb.st_mode))
 	    warnx("warning, %s is not a regular file", fname);
     } else {
-#ifndef MAKEFS
+#ifdef MAKEFS
+	errx(1, "o.create_size must be set!");
+#else
 	if (!S_ISCHR(sb.st_mode))
 	    warnx("warning, %s is not a character device", fname);
 #endif
     }
+#ifndef MAKEFS
     if (!o.no_create)
 	if (check_mounted(fname, sb.st_mode) == -1)
 	    goto done;
+#endif
     if (o.offset && o.offset != lseek(fd, o.offset, SEEK_SET)) {
 	warnx("cannot seek to %jd", (intmax_t)o.offset);
 	goto done;
     }
     memset(&bpb, 0, sizeof(bpb));
     if (o.floppy) {
 	if (getstdfmt(o.floppy, &bpb) == -1)
 	    goto done;
 	bpb.bpbHugeSectors = bpb.bpbSectors;
 	bpb.bpbSectors = 0;
 	bpb.bpbBigFATsecs = bpb.bpbFATsecs;
 	bpb.bpbFATsecs = 0;
     }
     if (o.drive_heads)
 	bpb.bpbHeads = o.drive_heads;
     if (o.sectors_per_track)
 	bpb.bpbSecPerTrack = o.sectors_per_track;
     if (o.bytes_per_sector)
 	bpb.bpbBytesPerSec = o.bytes_per_sector;
     if (o.size)
 	bpb.bpbHugeSectors = o.size;
     if (o.hidden_sectors_set)
 	bpb.bpbHiddenSecs = o.hidden_sectors;
     if (!(o.floppy || (o.drive_heads && o.sectors_per_track &&
 	o.bytes_per_sector && o.size && o.hidden_sectors_set))) {
 	if (getdiskinfo(fd, fname, dtype, o.hidden_sectors_set, &bpb) == -1)
 		goto done;
 	bpb.bpbHugeSectors -= (o.offset / bpb.bpbBytesPerSec);
 	if (bpb.bpbSecPerClust == 0) {	/* set defaults */
 	    if (bpb.bpbHugeSectors <= 6000)	/* about 3MB -> 512 bytes */
 		bpb.bpbSecPerClust = 1;
 	    else if (bpb.bpbHugeSectors <= (1<<17)) /* 64M -> 4k */
 		bpb.bpbSecPerClust = 8;
 	    else if (bpb.bpbHugeSectors <= (1<<19)) /* 256M -> 8k */
 		bpb.bpbSecPerClust = 16;
 	    else if (bpb.bpbHugeSectors <= (1<<21)) /* 1G -> 16k */
 		bpb.bpbSecPerClust = 32;
 	    else
 		bpb.bpbSecPerClust = 64;		/* otherwise 32k */
 	}
     }
     if (!powerof2(bpb.bpbBytesPerSec)) {
 	warnx("bytes/sector (%u) is not a power of 2", bpb.bpbBytesPerSec);
 	goto done;
     }
     if (bpb.bpbBytesPerSec < MINBPS) {
 	warnx("bytes/sector (%u) is too small; minimum is %u",
 	     bpb.bpbBytesPerSec, MINBPS);
 	goto done;
     }
 
     if (o.volume_label && !oklabel(o.volume_label)) {
 	warnx("%s: bad volume label", o.volume_label);
 	goto done;
     }
     if (!(fat = o.fat_type)) {
 	if (o.floppy)
 	    fat = 12;
 	else if (!o.directory_entries && (o.info_sector || o.backup_sector))
 	    fat = 32;
     }
     if ((fat == 32 && o.directory_entries) || (fat != 32 && (o.info_sector || o.backup_sector))) {
 	warnx("-%c is not a legal FAT%s option",
 	     fat == 32 ? 'e' : o.info_sector ? 'i' : 'k',
 	     fat == 32 ? "32" : "12/16");
 	goto done;
     }
     if (o.floppy && fat == 32)
 	bpb.bpbRootDirEnts = 0;
     if (fat != 0 && fat != 12 && fat != 16 && fat != 32) {
 	warnx("%d: bad FAT type", fat);
 	goto done;
     }
 
     if (o.block_size) {
 	if (!powerof2(o.block_size)) {
 	    warnx("block size (%u) is not a power of 2", o.block_size);
 	    goto done;
 	}
 	if (o.block_size < bpb.bpbBytesPerSec) {
 	    warnx("block size (%u) is too small; minimum is %u",
 		 o.block_size, bpb.bpbBytesPerSec);
 	    goto done;
 	}
 	if (o.block_size > bpb.bpbBytesPerSec * MAXSPC) {
 	    warnx("block size (%u) is too large; maximum is %u",
 		 o.block_size, bpb.bpbBytesPerSec * MAXSPC);
 	    goto done;
 	}
 	bpb.bpbSecPerClust = o.block_size / bpb.bpbBytesPerSec;
     }
     if (o.sectors_per_cluster) {
 	if (!powerof2(o.sectors_per_cluster)) {
 	    warnx("sectors/cluster (%u) is not a power of 2",
 		o.sectors_per_cluster);
 	    goto done;
 	}
 	bpb.bpbSecPerClust = o.sectors_per_cluster;
     }
     if (o.reserved_sectors)
 	bpb.bpbResSectors = o.reserved_sectors;
     if (o.num_FAT) {
 	if (o.num_FAT > MAXNFT) {
 	    warnx("number of FATs (%u) is too large; maximum is %u",
 		 o.num_FAT, MAXNFT);
 	    goto done;
 	}
 	bpb.bpbFATs = o.num_FAT;
     }
     if (o.directory_entries)
 	bpb.bpbRootDirEnts = o.directory_entries;
     if (o.media_descriptor_set) {
 	if (o.media_descriptor < 0xf0) {
 	    warnx("illegal media descriptor (%#x)", o.media_descriptor);
 	    goto done;
 	}
 	bpb.bpbMedia = o.media_descriptor;
     }
     if (o.sectors_per_fat)
 	bpb.bpbBigFATsecs = o.sectors_per_fat;
     if (o.info_sector)
 	bpb.bpbFSInfo = o.info_sector;
     if (o.backup_sector)
 	bpb.bpbBackup = o.backup_sector;
     bss = 1;
     bname = NULL;
     fd1 = -1;
     if (o.bootstrap) {
 	bname = o.bootstrap;
 	if (!strchr(bname, '/')) {
 	    snprintf(buf, sizeof(buf), "/boot/%s", bname);
 	    bname = buf;
 	}
 	if ((fd1 = open(bname, O_RDONLY)) == -1 || fstat(fd1, &sb)) {
 	    warn("%s", bname);
 	    goto done;
 	}
 	if (!S_ISREG(sb.st_mode) || sb.st_size % bpb.bpbBytesPerSec ||
 	    sb.st_size < bpb.bpbBytesPerSec ||
 	    sb.st_size > bpb.bpbBytesPerSec * MAXU16) {
 	    warnx("%s: inappropriate file type or format", bname);
 	    goto done;
 	}
 	bss = sb.st_size / bpb.bpbBytesPerSec;
     }
     if (!bpb.bpbFATs)
 	bpb.bpbFATs = 2;
     if (!fat) {
 	if (bpb.bpbHugeSectors < (bpb.bpbResSectors ? bpb.bpbResSectors : bss) +
 	    howmany((RESFTE + (bpb.bpbSecPerClust ? MINCLS16 : MAXCLS12 + 1)) *
 		(bpb.bpbSecPerClust ? 16 : 12) / BPN,
 		bpb.bpbBytesPerSec * NPB) *
 	    bpb.bpbFATs +
 	    howmany(bpb.bpbRootDirEnts ? bpb.bpbRootDirEnts : DEFRDE,
 		    bpb.bpbBytesPerSec / sizeof(struct de)) +
 	    (bpb.bpbSecPerClust ? MINCLS16 : MAXCLS12 + 1) *
 	    (bpb.bpbSecPerClust ? bpb.bpbSecPerClust :
 	     howmany(DEFBLK, bpb.bpbBytesPerSec)))
 	    fat = 12;
 	else if (bpb.bpbRootDirEnts || bpb.bpbHugeSectors <
 		 (bpb.bpbResSectors ? bpb.bpbResSectors : bss) +
 		 howmany((RESFTE + MAXCLS16) * 2, bpb.bpbBytesPerSec) *
 		 bpb.bpbFATs +
 		 howmany(DEFRDE, bpb.bpbBytesPerSec / sizeof(struct de)) +
 		 (MAXCLS16 + 1) *
 		 (bpb.bpbSecPerClust ? bpb.bpbSecPerClust :
 		  howmany(8192, bpb.bpbBytesPerSec)))
 	    fat = 16;
 	else
 	    fat = 32;
     }
     x = bss;
     if (fat == 32) {
 	if (!bpb.bpbFSInfo) {
 	    if (x == MAXU16 || x == bpb.bpbBackup) {
 		warnx("no room for info sector");
 		goto done;
 	    }
 	    bpb.bpbFSInfo = x;
 	}
 	if (bpb.bpbFSInfo != MAXU16 && x <= bpb.bpbFSInfo)
 	    x = bpb.bpbFSInfo + 1;
 	if (!bpb.bpbBackup) {
 	    if (x == MAXU16) {
 		warnx("no room for backup sector");
 		goto done;
 	    }
 	    bpb.bpbBackup = x;
 	} else if (bpb.bpbBackup != MAXU16 && bpb.bpbBackup == bpb.bpbFSInfo) {
 	    warnx("backup sector would overwrite info sector");
 	    goto done;
 	}
 	if (bpb.bpbBackup != MAXU16 && x <= bpb.bpbBackup)
 	    x = bpb.bpbBackup + 1;
     }
 
     extra_res = 0;
     alignment = 0;
     set_res = (bpb.bpbResSectors == 0);
     set_spf = (bpb.bpbBigFATsecs == 0);
     set_spc = (bpb.bpbSecPerClust == 0);
     saved_x = x;
 
     /*
      * Attempt to align the root directory to cluster if o.align is set.
      * This is done by padding with reserved blocks. Note that this can
      * cause other factors to change, which can in turn change the alignment.
      * This should take at most 2 iterations, as increasing the reserved
      * amount may cause the FAT size to decrease by 1, requiring another
      * bpbFATs reserved blocks. If bpbSecPerClust changes, it will
      * be half of its previous size, and thus will not throw off alignment.
      */
     do {
 	x = saved_x;
 	if (set_res)
 	    bpb.bpbResSectors = ((fat == 32) ?
 		MAX(x, MAX(16384 / bpb.bpbBytesPerSec, 4)) : x) + extra_res;
 	else if (bpb.bpbResSectors < x) {
 	    warnx("too few reserved sectors (need %d have %d)", x,
 		bpb.bpbResSectors);
 	    goto done;
 	}
 	if (fat != 32 && !bpb.bpbRootDirEnts)
 	    bpb.bpbRootDirEnts = DEFRDE;
 	rds = howmany(bpb.bpbRootDirEnts,
 	    bpb.bpbBytesPerSec / sizeof(struct de));
 	if (set_spc) {
 	    for (bpb.bpbSecPerClust = howmany(fat == 16 ? DEFBLK16 :
 		    DEFBLK, bpb.bpbBytesPerSec);
 		bpb.bpbSecPerClust < MAXSPC && (bpb.bpbResSectors +
 		    howmany((RESFTE + maxcls(fat)) * (fat / BPN),
 			bpb.bpbBytesPerSec * NPB) * bpb.bpbFATs +
 		    rds +
 		    (u_int64_t) (maxcls(fat) + 1) * bpb.bpbSecPerClust) <=
 		    bpb.bpbHugeSectors;
 		bpb.bpbSecPerClust <<= 1)
 		    continue;
 
 	}
 	if (fat != 32 && bpb.bpbBigFATsecs > MAXU16) {
 	    warnx("too many sectors/FAT for FAT12/16");
 	    goto done;
 	}
 	x1 = bpb.bpbResSectors + rds;
 	x = bpb.bpbBigFATsecs ? bpb.bpbBigFATsecs : 1;
 	if (x1 + (u_int64_t)x * bpb.bpbFATs > bpb.bpbHugeSectors) {
 	    warnx("meta data exceeds file system size");
 	    goto done;
 	}
 	x1 += x * bpb.bpbFATs;
 	x = (u_int64_t)(bpb.bpbHugeSectors - x1) * bpb.bpbBytesPerSec * NPB /
 	    (bpb.bpbSecPerClust * bpb.bpbBytesPerSec * NPB +
 	    fat / BPN * bpb.bpbFATs);
 	x2 = howmany((RESFTE + MIN(x, maxcls(fat))) * (fat / BPN),
 	    bpb.bpbBytesPerSec * NPB);
 	if (set_spf) {
 	    if (bpb.bpbBigFATsecs == 0)
 		bpb.bpbBigFATsecs = x2;
 	    x1 += (bpb.bpbBigFATsecs - 1) * bpb.bpbFATs;
 	}
 	if (set_res) {
 	    /* attempt to align root directory */
 	    alignment = (bpb.bpbResSectors + bpb.bpbBigFATsecs * bpb.bpbFATs) %
 		bpb.bpbSecPerClust;
 	    if (o.align)
 		extra_res += bpb.bpbSecPerClust - alignment;
 	}
 	attempts++;
     } while (o.align && alignment != 0 && attempts < 2);
     if (o.align && alignment != 0)
 	warnx("warning: Alignment failed.");
 
     cls = (bpb.bpbHugeSectors - x1) / bpb.bpbSecPerClust;
     x = (u_int64_t)bpb.bpbBigFATsecs * bpb.bpbBytesPerSec * NPB / (fat / BPN) -
 	RESFTE;
     if (cls > x)
 	cls = x;
     if (bpb.bpbBigFATsecs < x2)
 	warnx("warning: sectors/FAT limits file system to %u clusters",
 	      cls);
     if (cls < mincls(fat)) {
 	warnx("%u clusters too few clusters for FAT%u, need %u", cls, fat,
 	    mincls(fat));
 	goto done;
     }
     if (cls > maxcls(fat)) {
 	cls = maxcls(fat);
 	bpb.bpbHugeSectors = x1 + (cls + 1) * bpb.bpbSecPerClust - 1;
 	warnx("warning: FAT type limits file system to %u sectors",
 	      bpb.bpbHugeSectors);
     }
     printf("%s: %u sector%s in %u FAT%u cluster%s "
 	   "(%u bytes/cluster)\n", fname, cls * bpb.bpbSecPerClust,
 	   cls * bpb.bpbSecPerClust == 1 ? "" : "s", cls, fat,
 	   cls == 1 ? "" : "s", bpb.bpbBytesPerSec * bpb.bpbSecPerClust);
     if (!bpb.bpbMedia)
 	bpb.bpbMedia = !bpb.bpbHiddenSecs ? 0xf0 : 0xf8;
     if (fat == 32)
 	bpb.bpbRootClust = RESFTE;
     if (bpb.bpbHugeSectors <= MAXU16) {
 	bpb.bpbSectors = bpb.bpbHugeSectors;
 	bpb.bpbHugeSectors = 0;
     }
     if (fat != 32) {
 	bpb.bpbFATsecs = bpb.bpbBigFATsecs;
 	bpb.bpbBigFATsecs = 0;
     }
     print_bpb(&bpb);
     if (!o.no_create) {
 	if (o.timestamp_set) {
 	    tv.tv_sec = now = o.timestamp;
 	    tv.tv_usec = 0;
 	    tm = gmtime(&now);
 	} else {
 	    gettimeofday(&tv, NULL);
 	    now = tv.tv_sec;
 	    tm = localtime(&now);
 	}
 
 
 	if (!(img = malloc(bpb.bpbBytesPerSec))) {
 	    warn(NULL);
 	    goto done;
 	}
 	dir = bpb.bpbResSectors + (bpb.bpbFATsecs ? bpb.bpbFATsecs :
 				   bpb.bpbBigFATsecs) * bpb.bpbFATs;
 	memset(&si_sa, 0, sizeof(si_sa));
 	si_sa.sa_handler = infohandler;
+#ifdef SIGINFO
 	if (sigaction(SIGINFO, &si_sa, NULL) == -1) {
 	    warn("sigaction SIGINFO");
 	    goto done;
 	}
+#endif
 	for (lsn = 0; lsn < dir + (fat == 32 ? bpb.bpbSecPerClust : rds); lsn++) {
 	    if (got_siginfo) {
 		    fprintf(stderr,"%s: writing sector %u of %u (%u%%)\n",
 			fname, lsn,
 			(dir + (fat == 32 ? bpb.bpbSecPerClust: rds)),
 			(lsn * 100) / (dir +
 			    (fat == 32 ? bpb.bpbSecPerClust: rds)));
 		    got_siginfo = 0;
 	    }
 	    x = lsn;
 	    if (o.bootstrap &&
 		fat == 32 && bpb.bpbBackup != MAXU16 &&
 		bss <= bpb.bpbBackup && x >= bpb.bpbBackup) {
 		x -= bpb.bpbBackup;
 		if (!x && lseek(fd1, o.offset, SEEK_SET)) {
 		    warn("%s", bname);
 		    goto done;
 		}
 	    }
 	    if (o.bootstrap && x < bss) {
 		if ((n = read(fd1, img, bpb.bpbBytesPerSec)) == -1) {
 		    warn("%s", bname);
 		    goto done;
 		}
 		if ((unsigned)n != bpb.bpbBytesPerSec) {
 		    warnx("%s: can't read sector %u", bname, x);
 		    goto done;
 		}
 	    } else
 		memset(img, 0, bpb.bpbBytesPerSec);
 	    if (!lsn ||
 		(fat == 32 && bpb.bpbBackup != MAXU16 &&
 		 lsn == bpb.bpbBackup)) {
 		x1 = sizeof(struct bs);
 		bsbpb = (struct bsbpb *)(img + x1);
 		mk2(bsbpb->bpbBytesPerSec, bpb.bpbBytesPerSec);
 		mk1(bsbpb->bpbSecPerClust, bpb.bpbSecPerClust);
 		mk2(bsbpb->bpbResSectors, bpb.bpbResSectors);
 		mk1(bsbpb->bpbFATs, bpb.bpbFATs);
 		mk2(bsbpb->bpbRootDirEnts, bpb.bpbRootDirEnts);
 		mk2(bsbpb->bpbSectors, bpb.bpbSectors);
 		mk1(bsbpb->bpbMedia, bpb.bpbMedia);
 		mk2(bsbpb->bpbFATsecs, bpb.bpbFATsecs);
 		mk2(bsbpb->bpbSecPerTrack, bpb.bpbSecPerTrack);
 		mk2(bsbpb->bpbHeads, bpb.bpbHeads);
 		mk4(bsbpb->bpbHiddenSecs, bpb.bpbHiddenSecs);
 		mk4(bsbpb->bpbHugeSectors, bpb.bpbHugeSectors);
 		x1 += sizeof(struct bsbpb);
 		if (fat == 32) {
 		    bsxbpb = (struct bsxbpb *)(img + x1);
 		    mk4(bsxbpb->bpbBigFATsecs, bpb.bpbBigFATsecs);
 		    mk2(bsxbpb->bpbExtFlags, 0);
 		    mk2(bsxbpb->bpbFSVers, 0);
 		    mk4(bsxbpb->bpbRootClust, bpb.bpbRootClust);
 		    mk2(bsxbpb->bpbFSInfo, bpb.bpbFSInfo);
 		    mk2(bsxbpb->bpbBackup, bpb.bpbBackup);
 		    x1 += sizeof(struct bsxbpb);
 		}
 		bsx = (struct bsx *)(img + x1);
 		mk1(bsx->exBootSignature, 0x29);
 		if (o.volume_id_set)
 		    x = o.volume_id;
 		else
 		    x = (((u_int)(1 + tm->tm_mon) << 8 |
 			  (u_int)tm->tm_mday) +
 			 ((u_int)tm->tm_sec << 8 |
 			  (u_int)(tv.tv_usec / 10))) << 16 |
 			((u_int)(1900 + tm->tm_year) +
 			 ((u_int)tm->tm_hour << 8 |
 			  (u_int)tm->tm_min));
 		mk4(bsx->exVolumeID, x);
 		mklabel(bsx->exVolumeLabel, o.volume_label ? o.volume_label : "NO NAME");
 		snprintf(buf, sizeof(buf), "FAT%u", fat);
 		setstr(bsx->exFileSysType, buf, sizeof(bsx->exFileSysType));
 		if (!o.bootstrap) {
 		    x1 += sizeof(struct bsx);
 		    bs = (struct bs *)img;
 		    mk1(bs->bsJump[0], 0xeb);
 		    mk1(bs->bsJump[1], x1 - 2);
 		    mk1(bs->bsJump[2], 0x90);
 		    setstr(bs->bsOemName, o.OEM_string ? o.OEM_string : "BSD4.4  ",
 			   sizeof(bs->bsOemName));
 		    memcpy(img + x1, bootcode, sizeof(bootcode));
 		    mk2(img + MINBPS - 2, DOSMAGIC);
 		}
 	    } else if (fat == 32 && bpb.bpbFSInfo != MAXU16 &&
 		       (lsn == bpb.bpbFSInfo ||
 			(bpb.bpbBackup != MAXU16 &&
 			 lsn == bpb.bpbBackup + bpb.bpbFSInfo))) {
 		mk4(img, 0x41615252);
 		mk4(img + MINBPS - 28, 0x61417272);
 		mk4(img + MINBPS - 24, 0xffffffff);
 		mk4(img + MINBPS - 20, 0xffffffff);
 		mk2(img + MINBPS - 2, DOSMAGIC);
 	    } else if (lsn >= bpb.bpbResSectors && lsn < dir &&
 		       !((lsn - bpb.bpbResSectors) %
 			 (bpb.bpbFATsecs ? bpb.bpbFATsecs :
 			  bpb.bpbBigFATsecs))) {
 		mk1(img[0], bpb.bpbMedia);
 		for (x = 1; x < fat * (fat == 32 ? 3 : 2) / 8; x++)
 		    mk1(img[x], fat == 32 && x % 4 == 3 ? 0x0f : 0xff);
 	    } else if (lsn == dir && o.volume_label) {
 		de = (struct de *)img;
 		mklabel(de->deName, o.volume_label);
 		mk1(de->deAttributes, 050);
 		x = (u_int)tm->tm_hour << 11 |
 		    (u_int)tm->tm_min << 5 |
 		    (u_int)tm->tm_sec >> 1;
 		mk2(de->deMTime, x);
 		x = (u_int)(tm->tm_year - 80) << 9 |
 		    (u_int)(tm->tm_mon + 1) << 5 |
 		    (u_int)tm->tm_mday;
 		mk2(de->deMDate, x);
 	    }
 	    if ((n = write(fd, img, bpb.bpbBytesPerSec)) == -1) {
 		warn("%s", fname);
 		goto done;
 	    }
 	    if ((unsigned)n != bpb.bpbBytesPerSec) {
 		warnx("%s: can't write sector %u", fname, lsn);
 		goto done;
 	    }
 	}
     }
     rv = 0;
 done:
     free(img);
     if (fd != -1)
 	    close(fd);
     if (fd1 != -1)
 	    close(fd1);
 
     return rv;
 }
 
 /*
  * return -1 with error if file system is mounted.
  */
 static int
 check_mounted(const char *fname, mode_t mode)
 {
+/*
+ * If getmntinfo() is not available (e.g. Linux) don't check. This should
+ * not be a problem since we will only be using makefs to create images.
+ */
+#if !defined(MAKEFS)
     struct statfs *mp;
     const char *s1, *s2;
     size_t len;
     int n, r;
 
     if (!(n = getmntinfo(&mp, MNT_NOWAIT))) {
 	warn("getmntinfo");
 	return -1;
     }
     len = strlen(_PATH_DEV);
     s1 = fname;
     if (!strncmp(s1, _PATH_DEV, len))
 	s1 += len;
     r = S_ISCHR(mode) && s1 != fname && *s1 == 'r';
     for (; n--; mp++) {
 	s2 = mp->f_mntfromname;
 	if (!strncmp(s2, _PATH_DEV, len))
 	    s2 += len;
 	if ((r && s2 != mp->f_mntfromname && !strcmp(s1 + 1, s2)) ||
 	    !strcmp(s1, s2)) {
 	    warnx("%s is mounted on %s", fname, mp->f_mntonname);
 	    return -1;
 	}
     }
+#endif
     return 0;
 }
 
 /*
  * Get a standard format.
  */
 static int
 getstdfmt(const char *fmt, struct bpb *bpb)
 {
     u_int x, i;
 
     x = nitems(stdfmt);
     for (i = 0; i < x && strcmp(fmt, stdfmt[i].name); i++);
     if (i == x) {
 	warnx("%s: unknown standard format", fmt);
 	return -1;
     }
     *bpb = stdfmt[i].bpb;
     return 0;
 }
 
+static void
+compute_geometry_from_file(int fd, const char *fname, struct disklabel *lp)
+{
+	struct stat st;
+	off_t ms;
+
+	if (fstat(fd, &st))
+		err(1, "cannot get disk size");
+	if (!S_ISREG(st.st_mode))
+		errx(1, "%s is not a regular file", fname);
+	ms = st.st_size;
+	lp->d_secsize = 512;
+	lp->d_nsectors = 63;
+	lp->d_ntracks = 255;
+	lp->d_secperunit = ms / lp->d_secsize;
+}
+
 /*
  * Get disk slice, partition, and geometry information.
  */
 static int
 getdiskinfo(int fd, const char *fname, const char *dtype, __unused int oflag,
 	    struct bpb *bpb)
 {
     struct disklabel *lp, dlp;
+    off_t hs = 0;
+#ifndef MAKEFS
+    off_t ms;
     struct fd_type type;
-    off_t ms, hs = 0;
 
     lp = NULL;
 
     /* If the user specified a disk type, try to use that */
     if (dtype != NULL) {
 	lp = getdiskbyname(dtype);
     }
 
     /* Maybe it's a floppy drive */
     if (lp == NULL) {
 	if (ioctl(fd, DIOCGMEDIASIZE, &ms) == -1) {
-	    struct stat st;
-
-	    if (fstat(fd, &st))
-		err(1, "cannot get disk size");
 	    /* create a fake geometry for a file image */
-	    ms = st.st_size;
-	    dlp.d_secsize = 512;
-	    dlp.d_nsectors = 63;
-	    dlp.d_ntracks = 255;
-	    dlp.d_secperunit = ms / dlp.d_secsize;
+	    compute_geometry_from_file(fd, fname, &dlp);
 	    lp = &dlp;
 	} else if (ioctl(fd, FD_GTYPE, &type) != -1) {
 	    dlp.d_secsize = 128 << type.secsize;
 	    dlp.d_nsectors = type.sectrac;
 	    dlp.d_ntracks = type.heads;
 	    dlp.d_secperunit = ms / dlp.d_secsize;
 	    lp = &dlp;
 	}
     }
 
     /* Maybe it's a fixed drive */
     if (lp == NULL) {
 	if (bpb->bpbBytesPerSec)
 	    dlp.d_secsize = bpb->bpbBytesPerSec;
 	if (bpb->bpbBytesPerSec == 0 && ioctl(fd, DIOCGSECTORSIZE,
 					      &dlp.d_secsize) == -1)
 	    err(1, "cannot get sector size");
 
 	dlp.d_secperunit = ms / dlp.d_secsize;
 
 	if (bpb->bpbSecPerTrack == 0 && ioctl(fd, DIOCGFWSECTORS,
 					      &dlp.d_nsectors) == -1) {
 	    warn("cannot get number of sectors per track");
 	    dlp.d_nsectors = 63;
 	}
 	if (bpb->bpbHeads == 0 &&
 	    ioctl(fd, DIOCGFWHEADS, &dlp.d_ntracks) == -1) {
 	    warn("cannot get number of heads");
 	    if (dlp.d_secperunit <= 63*1*1024)
 		dlp.d_ntracks = 1;
 	    else if (dlp.d_secperunit <= 63*16*1024)
 		dlp.d_ntracks = 16;
 	    else
 		dlp.d_ntracks = 255;
 	}
 
 	hs = (ms / dlp.d_secsize) - dlp.d_secperunit;
 	lp = &dlp;
     }
+#else
+    /* In the makefs case we only support image files: */
+    compute_geometry_from_file(fd, fname, &dlp);
+    lp = &dlp;
+#endif
 
     if (bpb->bpbBytesPerSec == 0) {
 	if (ckgeom(fname, lp->d_secsize, "bytes/sector") == -1)
 	    return -1;
 	bpb->bpbBytesPerSec = lp->d_secsize;
     }
     if (bpb->bpbSecPerTrack == 0) {
 	if (ckgeom(fname, lp->d_nsectors, "sectors/track") == -1)
 	    return -1;
 	bpb->bpbSecPerTrack = lp->d_nsectors;
     }
     if (bpb->bpbHeads == 0) {
 	if (ckgeom(fname, lp->d_ntracks, "drive heads") == -1)
 	    return -1;
 	bpb->bpbHeads = lp->d_ntracks;
     }
     if (bpb->bpbHugeSectors == 0)
 	bpb->bpbHugeSectors = lp->d_secperunit;
     if (bpb->bpbHiddenSecs == 0)
 	bpb->bpbHiddenSecs = hs;
     return 0;
 }
 
 /*
  * Print out BPB values.
  */
 static void
 print_bpb(struct bpb *bpb)
 {
     printf("BytesPerSec=%u SecPerClust=%u ResSectors=%u FATs=%u",
 	   bpb->bpbBytesPerSec, bpb->bpbSecPerClust, bpb->bpbResSectors,
 	   bpb->bpbFATs);
     if (bpb->bpbRootDirEnts)
 	printf(" RootDirEnts=%u", bpb->bpbRootDirEnts);
     if (bpb->bpbSectors)
 	printf(" Sectors=%u", bpb->bpbSectors);
     printf(" Media=%#x", bpb->bpbMedia);
     if (bpb->bpbFATsecs)
 	printf(" FATsecs=%u", bpb->bpbFATsecs);
     printf(" SecPerTrack=%u Heads=%u HiddenSecs=%u", bpb->bpbSecPerTrack,
 	   bpb->bpbHeads, bpb->bpbHiddenSecs);
     if (bpb->bpbHugeSectors)
 	printf(" HugeSectors=%u", bpb->bpbHugeSectors);
     if (!bpb->bpbFATsecs) {
 	printf(" FATsecs=%u RootCluster=%u", bpb->bpbBigFATsecs,
 	       bpb->bpbRootClust);
 	printf(" FSInfo=");
 	printf(bpb->bpbFSInfo == MAXU16 ? "%#x" : "%u", bpb->bpbFSInfo);
 	printf(" Backup=");
 	printf(bpb->bpbBackup == MAXU16 ? "%#x" : "%u", bpb->bpbBackup);
     }
     printf("\n");
 }
 
 /*
  * Check a disk geometry value.
  */
 static int
 ckgeom(const char *fname, u_int val, const char *msg)
 {
     if (!val) {
 	warnx("%s: no default %s", fname, msg);
 	return -1;
     }
     if (val > MAXU16) {
 	warnx("%s: illegal %s %d", fname, msg, val);
 	return -1;
     }
     return 0;
 }
 
 /*
  * Check a volume label.
  */
 static int
 oklabel(const char *src)
 {
     int c, i;
 
     for (i = 0; i <= 11; i++) {
 	c = (u_char)*src++;
 	if (c < ' ' + !i || strchr("\"*+,./:;<=>?[\\]|", c))
 	    break;
     }
     return i && !c;
 }
 
 /*
  * Make a volume label.
  */
 static void
 mklabel(u_int8_t *dest, const char *src)
 {
     int c, i;
 
     for (i = 0; i < 11; i++) {
 	c = *src ? toupper(*src++) : ' ';
 	*dest++ = !i && c == '\xe5' ? 5 : c;
     }
 }
 
 /*
  * Copy string, padding with spaces.
  */
 static void
 setstr(u_int8_t *dest, const char *src, size_t len)
 {
     while (len--)
 	*dest++ = *src ? *src++ : ' ';
 }
 
 static void
 infohandler(int sig __unused)
 {
 
 	got_siginfo = 1;
 }
Index: projects/clang1000-import/share/mk/bsd.compat.mk
===================================================================
--- projects/clang1000-import/share/mk/bsd.compat.mk	(revision 357178)
+++ projects/clang1000-import/share/mk/bsd.compat.mk	(revision 357179)
@@ -1,173 +1,172 @@
 # $FreeBSD$
 
 .if !targets(__<${_this:T}>__)
 __<${_this:T}>__:
 
 .if defined(_LIBCOMPAT)
 COMPAT_ARCH=	${TARGET_ARCH}
 COMPAT_CPUTYPE=	${TARGET_CPUTYPE}
 .if (defined(WANT_COMPILER_TYPE) && ${WANT_COMPILER_TYPE} == gcc) || \
     (defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc)
 COMPAT_COMPILER_TYPE=	gcc
 .else
 COMPAT_COMPILER_TYPE=	clang
 .endif
 .else
 COMPAT_ARCH=	${MACHINE_ARCH}
 COMPAT_CPUTYPE=	${CPUTYPE}
 .include <bsd.compiler.mk>
 COMPAT_COMPILER_TYPE=${COMPILER_TYPE}
 .endif
 
 # -------------------------------------------------------------------
 # 32 bit world
 .if ${COMPAT_ARCH} == "amd64"
 HAS_COMPAT=32
 .if empty(COMPAT_CPUTYPE)
 LIB32CPUFLAGS=	-march=i686 -mmmx -msse -msse2
 .else
 LIB32CPUFLAGS=	-march=${COMPAT_CPUTYPE}
 .endif
 .if ${COMPAT_COMPILER_TYPE} == gcc
 .else
 LIB32CPUFLAGS+=	-target x86_64-unknown-freebsd13.0
 .endif
 LIB32CPUFLAGS+=	-m32
 LIB32_MACHINE=	i386
 LIB32_MACHINE_ARCH=	i386
 LIB32WMAKEENV=	MACHINE_CPU="i686 mmx sse sse2"
 LIB32WMAKEFLAGS=	\
 		AS="${XAS} --32" \
 		LD="${XLD} -m elf_i386_fbsd -L${LIBCOMPATTMP}/usr/lib32"
 
 .elif ${COMPAT_ARCH} == "powerpc64"
 HAS_COMPAT=32
 .if empty(COMPAT_CPUTYPE)
 LIB32CPUFLAGS=	-mcpu=powerpc
 .else
 LIB32CPUFLAGS=	-mcpu=${COMPAT_CPUTYPE}
 .endif
 
 .if ${COMPAT_COMPILER_TYPE} == "gcc"
 LIB32CPUFLAGS+=	-m32
 .else
 LIB32CPUFLAGS+=	-target powerpc-unknown-freebsd13.0
 
 # Use BFD to workaround ld.lld issues on PowerPC 32 bit 
 LIB32CPUFLAGS+= -fuse-ld=${LD_BFD}
 .endif
 
 LIB32_MACHINE=	powerpc
 LIB32_MACHINE_ARCH=	powerpc
 LIB32WMAKEFLAGS=	\
 		LD="${LD_BFD} -m elf32ppc_fbsd"
 
 .elif ${COMPAT_ARCH:Mmips64*} != ""
 HAS_COMPAT=32
 .if ${COMPAT_COMPILER_TYPE} == gcc
 .if empty(COMPAT_CPUTYPE)
 LIB32CPUFLAGS=	-march=mips3
 .else
 LIB32CPUFLAGS=	-march=${COMPAT_CPUTYPE}
 .endif
 .else
 .if ${COMPAT_ARCH:Mmips64el*} != ""
 LIB32CPUFLAGS=  -target mipsel-unknown-freebsd13.0
 .else
 LIB32CPUFLAGS=  -target mips-unknown-freebsd13.0
 .endif
 .endif
 LIB32CPUFLAGS+= -mabi=32
 LIB32_MACHINE=	mips
+LIB32_MACHINE_ARCH:=	${COMPAT_ARCH:S/64//}
 .if ${COMPAT_ARCH:Mmips64el*} != ""
-LIB32_MACHINE_ARCH=	mipsel
 _EMULATION=	elf32ltsmip_fbsd
 .else
-LIB32_MACHINE_ARCH=	mips
 _EMULATION=	elf32btsmip_fbsd
 .endif
 LIB32WMAKEFLAGS= LD="${XLD} -m ${_EMULATION}"
 LIB32LDFLAGS=	-Wl,-m${_EMULATION}
 .endif
 
 LIB32WMAKEFLAGS+= NM="${XNM}"
 LIB32WMAKEFLAGS+= OBJCOPY="${XOBJCOPY}"
 
 LIB32CFLAGS=	-DCOMPAT_32BIT
 LIB32DTRACE=	${DTRACE} -32
 LIB32WMAKEFLAGS+=	-DCOMPAT_32BIT
 
 # -------------------------------------------------------------------
 # soft-fp world
 .if ${COMPAT_ARCH:Marmv[67]*} != ""
 HAS_COMPAT=SOFT
 LIBSOFTCFLAGS=        -DCOMPAT_SOFTFP
 LIBSOFTCPUFLAGS= -mfloat-abi=softfp
 LIBSOFT_MACHINE=	arm
 LIBSOFT_MACHINE_ARCH=	${COMPAT_ARCH}
 LIBSOFTWMAKEENV= CPUTYPE=soft
 LIBSOFTWMAKEFLAGS=        -DCOMPAT_SOFTFP
 .endif
 
 # -------------------------------------------------------------------
 # In the program linking case, select LIBCOMPAT
 .if defined(NEED_COMPAT)
 .ifndef HAS_COMPAT
 .warning NEED_COMPAT defined, but no LIBCOMPAT is available (COMPAT_ARCH == ${COMPAT_ARCH}
 .elif !${HAS_COMPAT:M${NEED_COMPAT}} && ${NEED_COMPAT} != "any"
 .error NEED_COMPAT (${NEED_COMPAT}) defined, but not in HAS_COMPAT ($HAS_COMPAT)
 .elif ${NEED_COMPAT} == "any"
 .endif
 .ifdef WANT_COMPAT
 .error Both WANT_COMPAT and NEED_COMPAT defined
 .endif
 WANT_COMPAT:=	${NEED_COMPAT}
 .endif
 
 .if defined(HAS_COMPAT) && defined(WANT_COMPAT)
 .if ${WANT_COMPAT} == "any"
 _LIBCOMPAT:=	${HAS_COMPAT:[1]}
 .else
 _LIBCOMPAT:=	${WANT_COMPAT}
 .endif
 .endif
 
 
 # -------------------------------------------------------------------
 # Generic code for each type.
 # Set defaults based on type.
 libcompat=	${_LIBCOMPAT:tl}
 _LIBCOMPAT_MAKEVARS=	_OBJTOP TMP CPUFLAGS CFLAGS CXXFLAGS LDFLAGS \
 			_MACHINE _MACHINE_ARCH WMAKEENV WMAKEFLAGS WMAKE
 .for _var in ${_LIBCOMPAT_MAKEVARS}
 .if !empty(LIB${_LIBCOMPAT}${_var})
 LIBCOMPAT${_var}?=	${LIB${_LIBCOMPAT}${_var}}
 .endif
 .endfor
 
 # Shared flags
 LIBCOMPAT_OBJTOP?=	${OBJTOP}/obj-lib${libcompat}
 LIBCOMPATTMP?=		${LIBCOMPAT_OBJTOP}/tmp
 
 LIBCOMPATCFLAGS+=	${LIBCOMPATCPUFLAGS} \
 			-L${LIBCOMPATTMP}/usr/lib${libcompat} \
 			--sysroot=${LIBCOMPATTMP} \
 			${BFLAGS}
 
 LIBCOMPATWMAKEENV+=	MACHINE=${LIBCOMPAT_MACHINE}
 LIBCOMPATWMAKEENV+=	MACHINE_ARCH=${LIBCOMPAT_MACHINE_ARCH}
 
 # -B is needed to find /usr/lib32/crti.o for GCC and /usr/libsoft/crti.o for
 # Clang/GCC.
 LIBCOMPATCFLAGS+=	-B${LIBCOMPATTMP}/usr/lib${libcompat}
 
 .if defined(WANT_COMPAT)
 LIBDIR_BASE:=	/usr/lib${libcompat}
 _LIB_OBJTOP=	${LIBCOMPAT_OBJTOP}
 CFLAGS+=	${LIBCOMPATCFLAGS}
 LDFLAGS+=	${CFLAGS} ${LIBCOMPATLDFLAGS}
 MACHINE=	${LIBCOMPAT_MACHINE}
 MACHINE_ARCH=	${LIBCOMPAT_MACHINE_ARCH}
 .endif
 
 .endif
Index: projects/clang1000-import/sys/conf/Makefile.mips
===================================================================
--- projects/clang1000-import/sys/conf/Makefile.mips	(revision 357178)
+++ projects/clang1000-import/sys/conf/Makefile.mips	(revision 357179)
@@ -1,111 +1,108 @@
 # Makefile.mips
 # $FreeBSD$
 #
 # Makefile for FreeBSD
 #
 # This makefile is constructed from a machine description:
 #	config machineid
 # Most changes should be made in the machine description
 #	/sys/mips/conf/``machineid''
 # after which you should do
 #	 config machineid
 # Generic makefile changes should be made in
 #	/sys/conf/Makefile.mips
 # after which config should be rerun for all machines.
 #
 
 # Which version of config(8) is required.
 %VERSREQ=	600012
 
 STD8X16FONT?=	iso
 
 .if !defined(S)
 .if exists(./@/.)
 S=	./@
 .else
 S=	../../..
 .endif
 .endif
 .include "$S/conf/kern.pre.mk"
 
 INCLUDES+= -I$S/contrib/libfdt
 
 LDSCRIPT_NAME?=ldscript.$M
 SYSTEM_LD:= ${SYSTEM_LD:$S/conf/${LDSCRIPT_NAME}=${LDSCRIPT_NAME}}
 SYSTEM_DEP:= ${SYSTEM_DEP:$S/conf/${LDSCRIPT_NAME}=${LDSCRIPT_NAME}}
 
 KERNLOADADDR?=0x80001000
 # This obscure value is defined by CFE for WR160N
 # To be changed later
 TRAMPLOADADDR?=0x807963c0
 
 # We default to the MIPS32 ISA for O32 and MIPS64 ISA for N64 and N32
 # if none is specified in the kernel configuration file.
 .if ${MACHINE_ARCH:Mmips64*} != "" || ${MACHINE_ARCH:Mmipsn32*} != ""
 ARCH_FLAGS?=-march=mips64
 .else
 ARCH_FLAGS?=-march=mips32
 .endif
 ARCH_FLAGS+=-mabi=${MIPS_ABI}
 EXTRA_FLAGS=-fno-pic -mno-abicalls -G0 -DKERNLOADADDR=${KERNLOADADDR}
 EXTRA_FLAGS+=-${MIPS_ENDIAN}
 
-HACK_EXTRA_FLAGS=-shared
-
 # We add the -fno-pic flag to kernels because otherwise performance
 # is extremely poor, as well as -mno-abicalls to force no ABI usage.
 CFLAGS+=${EXTRA_FLAGS} $(ARCH_FLAGS)
-HACK_EXTRA_FLAGS+=${EXTRA_FLAGS} $(ARCH_FLAGS)
 TRAMP_ARCH_FLAGS?=$(ARCH_FLAGS)
 TRAMP_EXTRA_FLAGS=${EXTRA_FLAGS} ${TRAMP_ARCH_FLAGS}
 # Kernel code is always compiled with soft-float on MIPS
 TRAMP_EXTRA_FLAGS+=-msoft-float
 .if ${MACHINE_ARCH:Mmips64*} != ""
 TRAMP_ELFSIZE=64
 .else
 TRAMP_ELFSIZE=32
 .endif
 
 ASM_CFLAGS+=${CFLAGS} -D_LOCORE -DLOCORE
 
 .if !defined(WITHOUT_KERNEL_TRAMPOLINE)
 KERNEL_EXTRA=trampoline
 KERNEL_EXTRA_INSTALL=${KERNEL_KO}.tramp.bin
 trampoline: ${KERNEL_KO}.tramp.bin
 ${KERNEL_KO}.tramp.bin: ${KERNEL_KO} $S/$M/$M/elf_trampoline.c \
 	$S/$M/$M/inckern.S 
 	${OBJCOPY} --strip-symbol '$$d' --strip-symbol '$$a' \
 		-g --strip-symbol '$$t' ${FULLKERNEL} ${KERNEL_KO}.tmp
 	sed -e s/${KERNLOADADDR}/${TRAMPLOADADDR}/ -e s/" + SIZEOF_HEADERS"// \
 		 ${LDSCRIPT_NAME} > ${LDSCRIPT_NAME}.tramp.noheader
 	${CC} -O -nostdlib -I. -I$S ${TRAMP_EXTRA_FLAGS} ${TRAMP_LDFLAGS} -Xlinker \
 		-T -Xlinker ${LDSCRIPT_NAME}.tramp.noheader \
 		-DKERNNAME="\"${KERNEL_KO}.tmp\"" -DELFSIZE=${TRAMP_ELFSIZE} \
 		-fno-asynchronous-unwind-tables \
 		$S/$M/$M/inckern.S $S/$M/$M/elf_trampoline.c \
 		-o ${KERNEL_KO}.tramp.elf
 	${OBJCOPY} -S -O binary ${KERNEL_KO}.tramp.elf \
 		${KERNEL_KO}.tramp.bin
 .endif
 
 %BEFORE_DEPEND
 
 %OBJS
 
 %FILES.c
 
 %FILES.s
 
 %FILES.m
 
 %CLEAN
 
 CLEAN+=	${LDSCRIPT_NAME} ${LDSCRIPT_NAME}.tramp.noheader \
 	${KERNEL_KO}.tramp.elf ${KERNEL_KO}.tramp.bin
 
 ${LDSCRIPT_NAME}: $S/conf/${LDSCRIPT_NAME}
 	sed s/KERNLOADADDR/${KERNLOADADDR}/g $S/conf/${LDSCRIPT_NAME} \
 		> ${LDSCRIPT_NAME}
 %RULES
 
 .include "$S/conf/kern.post.mk"
Index: projects/clang1000-import/sys/conf/Makefile.powerpc
===================================================================
--- projects/clang1000-import/sys/conf/Makefile.powerpc	(revision 357178)
+++ projects/clang1000-import/sys/conf/Makefile.powerpc	(revision 357179)
@@ -1,83 +1,82 @@
 # Makefile.powerpc -- with config changes.
 # Copyright 1990 W. Jolitz
 #	from: @(#)Makefile.i386	7.1 5/10/91
 # $FreeBSD$
 #
 # Makefile for FreeBSD
 #
 # This makefile is constructed from a machine description:
 #	config machineid
 # Most changes should be made in the machine description
 #	/sys/powerpc/conf/``machineid''
 # after which you should do
 #	 config machineid
 # Generic makefile changes should be made in
 #	/sys/conf/Makefile.powerpc
 # after which config should be rerun for all machines.
 #
 
 # Which version of config(8) is required.
 %VERSREQ=	600012
 
 STD8X16FONT?=	iso
 
 .if !defined(S)
 .if exists(./@/.)
 S=	./@
 .else
 S=	../../..
 .endif
 .endif
 
 LDSCRIPT_NAME?= ldscript.${MACHINE_ARCH}
 
 .include "$S/conf/kern.pre.mk"
 
 INCLUDES+= -I$S/contrib/libfdt
 
 .if "${MACHINE_ARCH}" == "powerpcspe"
 # Force __SPE__, since the builtin will be removed later with -mno-spe
 CFLAGS.gcc+= -mabi=spe -D__SPE__
 CFLAGS.clang+= -mspe -D__SPE__ -m32
-HACK_EXTRA_FLAGS= -shared -m32 -mspe -D__SPE__
 .endif
 CFLAGS+= -msoft-float
 CFLAGS.gcc+= -Wa,-many
 
 # Apply compiler-specific DPAA exceptions.
 .if "${COMPILER_TYPE}" == "clang"
 DPAAWARNFLAGS += \
         -Wno-error=parentheses-equality \
         -Wno-error=self-assign \
         -Wno-error=incompatible-pointer-types-discards-qualifiers \
         -Wno-error=non-literal-null-conversion \
         -Wno-error=enum-conversion
 .elif "${COMPILER_TYPE}" == "gcc" && ${COMPILER_VERSION} >= 50200
 DPAAWARNFLAGS += \
 	-Wno-error=redundant-decls \
 	-Wno-error=int-in-bool-context
 .endif
 
 # Build position-independent kernel
 CFLAGS+= -fPIC
 LDFLAGS+= -pie
 
 .if !empty(DDB_ENABLED)
 CFLAGS+=	-fno-omit-frame-pointer
 .endif
 
 %BEFORE_DEPEND
 
 %OBJS
 
 %FILES.c
 
 %FILES.s
 
 %FILES.m
 
 %CLEAN
 
 %RULES
 
 .include "$S/conf/kern.post.mk"
Index: projects/clang1000-import/sys/conf/kern.post.mk
===================================================================
--- projects/clang1000-import/sys/conf/kern.post.mk	(revision 357178)
+++ projects/clang1000-import/sys/conf/kern.post.mk	(revision 357179)
@@ -1,488 +1,487 @@
 # $FreeBSD$
 
 # Part of a unified Makefile for building kernels.  This part includes all
 # the definitions that need to be after all the % directives except %RULES
 # and ones that act like they are part of %RULES.
 #
 # Most make variables should not be defined in this file.  Instead, they
 # should be defined in the kern.pre.mk so that port makefiles can
 # override or augment them.
 
 .if defined(DTS) || defined(DTSO) || defined(FDT_DTS_FILE)
 .include "dtb.build.mk"
 
 KERNEL_EXTRA+=	${DTB} ${DTBO}
 CLEAN+=		${DTB} ${DTBO}
 
 kernel-install: _dtbinstall
 .ORDER: beforeinstall _dtbinstall
 .endif
 
 # In case the config had a makeoptions DESTDIR...
 .if defined(DESTDIR)
 MKMODULESENV+=	DESTDIR="${DESTDIR}"
 .endif
 SYSDIR?= ${S:C;^[^/];${.CURDIR}/&;:tA}
 MKMODULESENV+=	KERNBUILDDIR="${.CURDIR}" SYSDIR="${SYSDIR}"
 MKMODULESENV+=  MODULE_TIED=yes
 
 .if defined(CONF_CFLAGS)
 MKMODULESENV+=	CONF_CFLAGS="${CONF_CFLAGS}"
 .endif
 
 .if defined(WITH_CTF)
 MKMODULESENV+=	WITH_CTF="${WITH_CTF}"
 .endif
 
 .if defined(WITH_EXTRA_TCP_STACKS)
 MKMODULESENV+=	WITH_EXTRA_TCP_STACKS="${WITH_EXTRA_TCP_STACKS}"
 .endif
 
 .if defined(KCSAN_ENABLED)
 MKMODULESENV+=	KCSAN_ENABLED="yes"
 .endif
 
 .if defined(SAN_CFLAGS)
 MKMODULESENV+=	SAN_CFLAGS="${SAN_CFLAGS}"
 .endif
 
 .if defined(GCOV_CFLAGS)
 MKMODULESENV+=	GCOV_CFLAGS="${GCOV_CFLAGS}"
 .endif
 
 # Allow overriding the kernel debug directory, so kernel and user debug may be
 # installed in different directories. Setting it to "" restores the historical
 # behavior of installing debug files in the kernel directory.
 KERN_DEBUGDIR?=	${DEBUGDIR}
 
 .MAIN: all
 
 .if !defined(NO_MODULES)
 # Default prefix used for modules installed from ports
 LOCALBASE?=	/usr/local
 
 LOCAL_MODULES_DIR?= ${LOCALBASE}/sys/modules
 
 # Default to installing all modules installed by ports unless overridden
 # by the user.
 .if !defined(LOCAL_MODULES) && exists(${LOCAL_MODULES_DIR})
 LOCAL_MODULES!= ls ${LOCAL_MODULES_DIR}
 .endif
 .endif
 
 .for target in all clean cleandepend cleandir clobber depend install \
     ${_obj} reinstall tags
 ${target}: kernel-${target}
 .if !defined(NO_MODULES)
 ${target}: modules-${target}
 modules-${target}:
 .if !defined(MODULES_WITH_WORLD) && exists($S/modules)
 	cd $S/modules; ${MKMODULESENV} ${MAKE} \
 	    ${target:S/^reinstall$/install/:S/^clobber$/cleandir/}
 .endif
 .for module in ${LOCAL_MODULES}
 	@${ECHODIR} "===> ${module} (${target:S/^reinstall$/install/:S/^clobber$/cleandir/})"
 	@cd ${LOCAL_MODULES_DIR}/${module}; ${MKMODULESENV} ${MAKE} \
 	    DIRPRFX="${module}/" \
 	    ${target:S/^reinstall$/install/:S/^clobber$/cleandir/}
 .endfor
 .endif
 .endfor
 
 # Handle ports (as defined by the user) that build kernel modules
 .if !defined(NO_MODULES) && defined(PORTS_MODULES)
 #
 # The ports tree needs some environment variables defined to match the new kernel
 #
 # SRC_BASE is how the ports tree refers to the location of the base source files
 .if !defined(SRC_BASE)
 SRC_BASE=	${SYSDIR:H:tA}
 .endif
 # OSVERSION is used by some ports to determine build options
 .if !defined(OSRELDATE)
 # Definition copied from src/Makefile.inc1
 OSRELDATE!=	awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \
 		    ${MAKEOBJDIRPREFIX}${SRC_BASE}/include/osreldate.h
 .endif
 # Keep the related ports builds in the obj directory so that they are only rebuilt once per kernel build
 #
 # Ports search for some dependencies in PATH, so add the location of the
 # installed files
 WRKDIRPREFIX?=	${.OBJDIR}
 PORTSMODULESENV=\
 	env \
 	-u CC \
 	-u CXX \
 	-u CPP \
 	-u MAKESYSPATH \
 	-u MK_AUTO_OBJ \
 	-u MAKEOBJDIR \
 	MAKEFLAGS="${MAKEFLAGS:M*:tW:S/^-m /-m_/g:S/ -m / -m_/g:tw:N-m_*:NMK_AUTO_OBJ=*}" \
 	SYSDIR=${SYSDIR} \
 	PATH=${PATH}:${LOCALBASE}/bin:${LOCALBASE}/sbin \
 	SRC_BASE=${SRC_BASE} \
 	OSVERSION=${OSRELDATE} \
 	WRKDIRPREFIX=${WRKDIRPREFIX}
 
 # The WRKDIR needs to be cleaned before building, and trying to change the target
 # with a :C pattern below results in install -> instclean
 all:
 .for __i in ${PORTS_MODULES}
 	@${ECHO} "===> Ports module ${__i} (all)"
 	cd $${PORTSDIR:-/usr/ports}/${__i}; ${PORTSMODULESENV} ${MAKE} -B clean build
 .endfor
 
 .for __target in install reinstall clean
 ${__target}: ports-${__target}
 ports-${__target}:
 .for __i in ${PORTS_MODULES}
 	@${ECHO} "===> Ports module ${__i} (${__target})"
 	cd $${PORTSDIR:-/usr/ports}/${__i}; ${PORTSMODULESENV} ${MAKE} -B ${__target:C/(re)?install/deinstall reinstall/}
 .endfor
 .endfor
 .endif
 
 .ORDER: kernel-install modules-install
 
 beforebuild: .PHONY
 kernel-all: beforebuild .WAIT ${KERNEL_KO} ${KERNEL_EXTRA}
 
 kernel-cleandir: kernel-clean kernel-cleandepend
 
 kernel-clobber:
 	find . -maxdepth 1 ! -type d ! -name version -delete
 
 kernel-obj:
 
 .if !defined(NO_MODULES)
 modules: modules-all
 modules-depend: beforebuild
 modules-all: beforebuild
 
 .if !defined(NO_MODULES_OBJ)
 modules-all modules-depend: modules-obj
 .endif
 .endif
 
 .if !defined(DEBUG)
 FULLKERNEL=	${KERNEL_KO}
 .else
 FULLKERNEL=	${KERNEL_KO}.full
 ${KERNEL_KO}: ${FULLKERNEL} ${KERNEL_KO}.debug
 	${OBJCOPY} --strip-debug --add-gnu-debuglink=${KERNEL_KO}.debug \
 	    ${FULLKERNEL} ${.TARGET}
 ${KERNEL_KO}.debug: ${FULLKERNEL}
 	${OBJCOPY} --only-keep-debug ${FULLKERNEL} ${.TARGET}
 install.debug reinstall.debug: gdbinit
 	cd ${.CURDIR}; ${MAKE} ${.TARGET:R}
 
 # Install gdbinit files for kernel debugging.
 gdbinit:
 	grep -v '# XXX' ${S}/../tools/debugscripts/dot.gdbinit | \
 	    sed "s:MODPATH:${.OBJDIR}/modules:" > .gdbinit
 	cp ${S}/../tools/debugscripts/gdbinit.kernel ${.CURDIR}
 .if exists(${S}/../tools/debugscripts/gdbinit.${MACHINE_CPUARCH})
 	cp ${S}/../tools/debugscripts/gdbinit.${MACHINE_CPUARCH} \
 	    ${.CURDIR}/gdbinit.machine
 .endif
 .endif
 
 ${FULLKERNEL}: ${SYSTEM_DEP} vers.o
 	@rm -f ${.TARGET}
 	@echo linking ${.TARGET}
 	${SYSTEM_LD}
 .if !empty(MD_ROOT_SIZE_CONFIGURED) && defined(MFS_IMAGE)
 	@sh ${S}/tools/embed_mfs.sh ${.TARGET} ${MFS_IMAGE}
 .endif
 .if ${MK_CTF} != "no"
 	@echo ${CTFMERGE} ${CTFFLAGS} -o ${.TARGET} ...
 	@${CTFMERGE} ${CTFFLAGS} -o ${.TARGET} ${SYSTEM_OBJS} vers.o
 .endif
 .if !defined(DEBUG)
 	${OBJCOPY} --strip-debug ${.TARGET}
 .endif
 	${SYSTEM_LD_TAIL}
 
 OBJS_DEPEND_GUESS+=	offset.inc assym.inc vnode_if.h ${BEFORE_DEPEND:M*.h} \
 			${MFILES:T:S/.m$/.h/}
 
 .for mfile in ${MFILES}
 # XXX the low quality .m.o rules gnerated by config are normally used
 # instead of the .m.c rules here.
 ${mfile:T:S/.m$/.c/}: ${mfile}
 	${AWK} -f $S/tools/makeobjops.awk ${mfile} -c
 ${mfile:T:S/.m$/.h/}: ${mfile}
 	${AWK} -f $S/tools/makeobjops.awk ${mfile} -h
 .endfor
 
 kernel-clean:
 	rm -f *.o *.so *.pico *.ko *.s eddep errs \
 	    ${FULLKERNEL} ${KERNEL_KO} ${KERNEL_KO}.debug \
 	    tags vers.c \
 	    vnode_if.c vnode_if.h vnode_if_newproto.h vnode_if_typedef.h \
 	    ${MFILES:T:S/.m$/.c/} ${MFILES:T:S/.m$/.h/} \
 	    ${CLEAN}
 
 # This is a hack.  BFD "optimizes" away dynamic mode if there are no
 # dynamic references.  We could probably do a '-Bforcedynamic' mode like
 # in the a.out ld.  For now, this works.
-HACK_EXTRA_FLAGS?= -shared
 hack.pico: Makefile
 	:> hack.c
-	${CC} ${HACK_EXTRA_FLAGS} -nostdlib hack.c -o hack.pico
+	${CC} -shared ${CFLAGS} -nostdlib hack.c -o hack.pico
 	rm -f hack.c
 
 offset.inc: $S/kern/genoffset.sh genoffset.o
 	NM='${NM}' NMFLAGS='${NMFLAGS}' sh $S/kern/genoffset.sh genoffset.o > ${.TARGET}
 
 genoffset.o: $S/kern/genoffset.c
 	${CC} -c ${CFLAGS:N-flto:N-fno-common} $S/kern/genoffset.c
 
 # genoffset_test.o is not actually used for anything - the point of compiling it
 # is to exercise the CTASSERT that checks that the offsets in the offset.inc
 # _lite struct(s) match those in the original(s). 
 genoffset_test.o: $S/kern/genoffset.c offset.inc
 	${CC} -c ${CFLAGS:N-flto:N-fno-common} -DOFFSET_TEST \
 	    $S/kern/genoffset.c -o ${.TARGET}
 
 assym.inc: $S/kern/genassym.sh genassym.o genoffset_test.o
 	NM='${NM}' NMFLAGS='${NMFLAGS}' sh $S/kern/genassym.sh genassym.o > ${.TARGET}
 
 genassym.o: $S/$M/$M/genassym.c  offset.inc
 	${CC} -c ${CFLAGS:N-flto:N-fno-common} $S/$M/$M/genassym.c
 
 OBJS_DEPEND_GUESS+= opt_global.h
 genoffset.o genassym.o vers.o: opt_global.h
 
 .if !empty(.MAKE.MODE:Unormal:Mmeta) && empty(.MAKE.MODE:Unormal:Mnofilemon)
 _meta_filemon=	1
 .endif
 # Skip reading .depend when not needed to speed up tree-walks and simple
 # lookups.  For install, only do this if no other targets are specified.
 # Also skip generating or including .depend.* files if in meta+filemon mode
 # since it will track dependencies itself.  OBJS_DEPEND_GUESS is still used
 # for _meta_filemon but not for _SKIP_DEPEND.
 .if !defined(NO_SKIP_DEPEND) && \
     ((!empty(.MAKEFLAGS:M-V) && empty(.MAKEFLAGS:M*DEP*)) || \
     ${.TARGETS:M*obj} == ${.TARGETS} || \
     ${.TARGETS:M*clean*} == ${.TARGETS} || \
     ${.TARGETS:M*install*} == ${.TARGETS})
 _SKIP_DEPEND=	1
 .endif
 .if defined(_SKIP_DEPEND) || defined(_meta_filemon)
 .MAKE.DEPENDFILE=	/dev/null
 .endif
 
 kernel-depend: .depend
 SRCS=	assym.inc offset.inc vnode_if.h ${BEFORE_DEPEND} ${CFILES} \
 	${SYSTEM_CFILES} ${GEN_CFILES} ${SFILES} \
 	${MFILES:T:S/.m$/.h/}
 DEPENDOBJS+=	${SYSTEM_OBJS} genassym.o genoffset.o genoffset_test.o
 DEPENDOBJS+=	${CLEAN:M*.o}
 DEPENDFILES=	${DEPENDOBJS:O:u:C/^/.depend./}
 .if ${MAKE_VERSION} < 20160220
 DEPEND_MP?=	-MP
 .endif
 .if defined(_SKIP_DEPEND)
 # Don't bother reading any .meta files
 ${DEPENDOBJS}:	.NOMETA
 .depend:	.NOMETA
 # Unset these to avoid looping/statting on them later.
 .undef DEPENDOBJS
 .undef DEPENDFILES
 .endif	# defined(_SKIP_DEPEND)
 DEPEND_CFLAGS+=	-MD ${DEPEND_MP} -MF.depend.${.TARGET}
 DEPEND_CFLAGS+=	-MT${.TARGET}
 .if !defined(_meta_filemon)
 .if !empty(DEPEND_CFLAGS)
 # Only add in DEPEND_CFLAGS for CFLAGS on files we expect from DEPENDOBJS
 # as those are the only ones we will include.
 DEPEND_CFLAGS_CONDITION= "${DEPENDOBJS:M${.TARGET}}" != ""
 CFLAGS+=	${${DEPEND_CFLAGS_CONDITION}:?${DEPEND_CFLAGS}:}
 .endif
 .for __depend_obj in ${DEPENDFILES}
 .if ${MAKE_VERSION} < 20160220
 .sinclude "${.OBJDIR}/${__depend_obj}"
 .else
 .dinclude "${.OBJDIR}/${__depend_obj}"
 .endif
 .endfor
 .endif	# !defined(_meta_filemon)
 
 # Always run 'make depend' to generate dependencies early and to avoid the
 # need for manually running it.  For the kernel this is mostly a NOP since
 # all dependencies are correctly added or accounted for.  This is mostly to
 # ensure downstream uses of kernel-depend are handled.
 beforebuild: kernel-depend
 
 # Guess some dependencies for when no ${DEPENDFILE}.OBJ is generated yet.
 # For meta+filemon the .meta file is checked for since it is the dependency
 # file used.
 .for __obj in ${DEPENDOBJS:O:u}
 .if defined(_meta_filemon)
 _depfile=	${.OBJDIR}/${__obj}.meta
 .else
 _depfile=	${.OBJDIR}/.depend.${__obj}
 .endif
 .if !exists(${_depfile})
 .if ${SYSTEM_OBJS:M${__obj}}
 ${__obj}: ${OBJS_DEPEND_GUESS}
 .endif
 ${__obj}: ${OBJS_DEPEND_GUESS.${__obj}}
 .elif defined(_meta_filemon)
 # For meta mode we still need to know which file to depend on to avoid
 # ambiguous suffix transformation rules from .PATH.  Meta mode does not
 # use .depend files.  We really only need source files, not headers since
 # they are typically in SRCS/beforebuild already.  For target-specific
 # guesses do include headers though since they may not be in SRCS.
 .if ${SYSTEM_OBJS:M${__obj}}
 ${__obj}: ${OBJS_DEPEND_GUESS:N*.h}
 .endif
 ${__obj}: ${OBJS_DEPEND_GUESS.${__obj}}
 .endif	# !exists(${_depfile})
 .endfor
 
 .NOPATH: .depend ${DEPENDFILES}
 
 .depend: .PRECIOUS ${SRCS}
 
 .if ${COMPILER_TYPE} == "clang" || \
     (${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} >= 60000)
 _MAP_DEBUG_PREFIX= yes
 .endif
 
 _ILINKS= machine
 .if ${MACHINE} != ${MACHINE_CPUARCH} && ${MACHINE} != "arm64"
 _ILINKS+= ${MACHINE_CPUARCH}
 .endif
 .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64"
 _ILINKS+= x86
 .endif
 
 # Ensure that the link exists without depending on it when it exists.
 # Ensure that debug info references the path in the source tree.
 .for _link in ${_ILINKS}
 .if !exists(${.OBJDIR}/${_link})
 ${SRCS} ${DEPENDOBJS}: ${_link}
 .endif
 .if defined(_MAP_DEBUG_PREFIX)
 .if ${_link} == "machine"
 CFLAGS+= -fdebug-prefix-map=./machine=${SYSDIR}/${MACHINE}/include
 .else
 CFLAGS+= -fdebug-prefix-map=./${_link}=${SYSDIR}/${_link}/include
 .endif
 .endif
 .endfor
 
 ${_ILINKS}:
 	@case ${.TARGET} in \
 	machine) \
 		path=${S}/${MACHINE}/include ;; \
 	*) \
 		path=${S}/${.TARGET}/include ;; \
 	esac ; \
 	${ECHO} ${.TARGET} "->" $$path ; \
 	ln -fns $$path ${.TARGET}
 
 # .depend needs include links so we remove them only together.
 kernel-cleandepend: .PHONY
 	rm -f .depend .depend.* ${_ILINKS}
 
 kernel-tags:
 	@ls .depend.* > /dev/null 2>&1 || \
 	    { echo "you must make depend first"; exit 1; }
 	sh $S/conf/systags.sh
 
 kernel-install: .PHONY
 	@if [ ! -f ${KERNEL_KO} ] ; then \
 		echo "You must build a kernel first." ; \
 		exit 1 ; \
 	fi
 .if exists(${DESTDIR}${KODIR})
 	-thiskernel=`sysctl -n kern.bootfile` ; \
 	if [ ! "`dirname "$$thiskernel"`" -ef ${DESTDIR}${KODIR} ] ; then \
 		chflags -R noschg ${DESTDIR}${KODIR} ; \
 		rm -rf ${DESTDIR}${KODIR} ; \
 		rm -rf ${DESTDIR}${KERN_DEBUGDIR}${KODIR} ; \
 	else \
 		if [ -d ${DESTDIR}${KODIR}.old ] ; then \
 			chflags -R noschg ${DESTDIR}${KODIR}.old ; \
 			rm -rf ${DESTDIR}${KODIR}.old ; \
 		fi ; \
 		mv ${DESTDIR}${KODIR} ${DESTDIR}${KODIR}.old ; \
 		if [ -n "${KERN_DEBUGDIR}" -a \
 		     -d ${DESTDIR}${KERN_DEBUGDIR}${KODIR} ]; then \
 			rm -rf ${DESTDIR}${KERN_DEBUGDIR}${KODIR}.old ; \
 			mv ${DESTDIR}${KERN_DEBUGDIR}${KODIR} ${DESTDIR}${KERN_DEBUGDIR}${KODIR}.old ; \
 		fi ; \
 		sysctl kern.bootfile=${DESTDIR}${KODIR}.old/"`basename "$$thiskernel"`" ; \
 	fi
 .endif
 	mkdir -p ${DESTDIR}${KODIR}
 	${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_KO} ${DESTDIR}${KODIR}/
 .if defined(DEBUG) && !defined(INSTALL_NODEBUG) && ${MK_KERNEL_SYMBOLS} != "no"
 	mkdir -p ${DESTDIR}${KERN_DEBUGDIR}${KODIR}
 	${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_KO}.debug ${DESTDIR}${KERN_DEBUGDIR}${KODIR}/
 .endif
 .if defined(KERNEL_EXTRA_INSTALL)
 	${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_EXTRA_INSTALL} ${DESTDIR}${KODIR}/
 .endif
 
 
 
 kernel-reinstall:
 	@-chflags -R noschg ${DESTDIR}${KODIR}
 	${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_KO} ${DESTDIR}${KODIR}/
 .if defined(DEBUG) && !defined(INSTALL_NODEBUG) && ${MK_KERNEL_SYMBOLS} != "no"
 	${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_KO}.debug ${DESTDIR}${KERN_DEBUGDIR}${KODIR}/
 .endif
 
 config.o env.o hints.o vers.o vnode_if.o:
 	${NORMAL_C}
 	${NORMAL_CTFCONVERT}
 
 .if ${MK_REPRODUCIBLE_BUILD} != "no"
 REPRO_FLAG="-R"
 .endif
 vers.c: $S/conf/newvers.sh $S/sys/param.h ${SYSTEM_DEP}
 	MAKE="${MAKE}" sh $S/conf/newvers.sh ${REPRO_FLAG} ${KERN_IDENT}
 
 vnode_if.c: $S/tools/vnode_if.awk $S/kern/vnode_if.src
 	${AWK} -f $S/tools/vnode_if.awk $S/kern/vnode_if.src -c
 
 vnode_if.h vnode_if_newproto.h vnode_if_typedef.h: $S/tools/vnode_if.awk \
     $S/kern/vnode_if.src
 vnode_if.h: vnode_if_newproto.h vnode_if_typedef.h
 	${AWK} -f $S/tools/vnode_if.awk $S/kern/vnode_if.src -h
 vnode_if_newproto.h:
 	${AWK} -f $S/tools/vnode_if.awk $S/kern/vnode_if.src -p
 vnode_if_typedef.h:
 	${AWK} -f $S/tools/vnode_if.awk $S/kern/vnode_if.src -q
 
 .if ${MFS_IMAGE:Uno} != "no"
 .if empty(MD_ROOT_SIZE_CONFIGURED)
 # Generate an object file from the file system image to embed in the kernel
 # via linking. Make sure the contents are in the mfs section and rename the
 # start/end/size variables to __start_mfs, __stop_mfs, and mfs_size,
 # respectively.
 embedfs_${MFS_IMAGE:T:R}.o: ${MFS_IMAGE}
 	${OBJCOPY} --input-target binary \
 	    --output-target ${EMBEDFS_FORMAT.${MACHINE_ARCH}} \
 	    --binary-architecture ${EMBEDFS_ARCH.${MACHINE_ARCH}} \
 	    ${MFS_IMAGE} ${.TARGET}
 	${OBJCOPY} \
 	    --rename-section .data=mfs,contents,alloc,load,readonly,data \
 	    --redefine-sym \
 		_binary_${MFS_IMAGE:C,[^[:alnum:]],_,g}_size=__mfs_root_size \
 	    --redefine-sym \
 		_binary_${MFS_IMAGE:C,[^[:alnum:]],_,g}_start=mfs_root \
 	    --redefine-sym \
 		_binary_${MFS_IMAGE:C,[^[:alnum:]],_,g}_end=mfs_root_end \
 	    ${.TARGET}
 .endif
 .endif
 
 # XXX strictly, everything depends on Makefile because changes to ${PROF}
 # only appear there, but we don't handle that.
 
 .include "kern.mk"
Index: projects/clang1000-import/sys/dev/mrsas/mrsas_cam.c
===================================================================
--- projects/clang1000-import/sys/dev/mrsas/mrsas_cam.c	(revision 357178)
+++ projects/clang1000-import/sys/dev/mrsas/mrsas_cam.c	(revision 357179)
@@ -1,2154 +1,2155 @@
 /*
  * Copyright (c) 2015, AVAGO Tech. All rights reserved. Author: Marian Choy
  * Copyright (c) 2014, LSI Corp. All rights reserved. Author: Marian Choy
  * Support: freebsdraid@avagotech.com
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * 1. Redistributions of source code must retain the above copyright notice,
  * this list of conditions and the following disclaimer. 2. Redistributions
  * in binary form must reproduce the above copyright notice, this list of
  * conditions and the following disclaimer in the documentation and/or other
  * materials provided with the distribution. 3. Neither the name of the
  * <ORGANIZATION> nor the names of its contributors may be used to endorse or
  * promote products derived from this software without specific prior written
  * permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "dev/mrsas/mrsas.h"
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_sim.h>
 #include <cam/cam_xpt_sim.h>
 #include <cam/cam_debug.h>
 #include <cam/cam_periph.h>
 #include <cam/cam_xpt_periph.h>
 
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_message.h>
 #include <sys/taskqueue.h>
 #include <sys/kernel.h>
 
 #include <sys/time.h>			/* XXX for pcpu.h */
 #include <sys/pcpu.h>			/* XXX for PCPU_GET */
 
 #define	smp_processor_id()  PCPU_GET(cpuid)
 
 /*
  * Function prototypes
  */
 int	mrsas_cam_attach(struct mrsas_softc *sc);
 int	mrsas_find_io_type(struct cam_sim *sim, union ccb *ccb);
 int	mrsas_bus_scan(struct mrsas_softc *sc);
 int	mrsas_bus_scan_sim(struct mrsas_softc *sc, struct cam_sim *sim);
 int 
 mrsas_map_request(struct mrsas_softc *sc,
     struct mrsas_mpt_cmd *cmd, union ccb *ccb);
 int
 mrsas_build_ldio_rw(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
     union ccb *ccb);
 int
 mrsas_build_ldio_nonrw(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
     union ccb *ccb);
 int
 mrsas_build_syspdio(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
     union ccb *ccb, struct cam_sim *sim, u_int8_t fp_possible);
 int
 mrsas_setup_io(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
     union ccb *ccb, u_int32_t device_id,
     MRSAS_RAID_SCSI_IO_REQUEST * io_request);
 void	mrsas_xpt_freeze(struct mrsas_softc *sc);
 void	mrsas_xpt_release(struct mrsas_softc *sc);
 void	mrsas_cam_detach(struct mrsas_softc *sc);
 void	mrsas_release_mpt_cmd(struct mrsas_mpt_cmd *cmd);
 void	mrsas_unmap_request(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd);
 void	mrsas_cmd_done(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd);
 void
 mrsas_fire_cmd(struct mrsas_softc *sc, u_int32_t req_desc_lo,
     u_int32_t req_desc_hi);
 void
 mrsas_set_pd_lba(MRSAS_RAID_SCSI_IO_REQUEST * io_request,
     u_int8_t cdb_len, struct IO_REQUEST_INFO *io_info, union ccb *ccb,
     MR_DRV_RAID_MAP_ALL * local_map_ptr, u_int32_t ref_tag,
     u_int32_t ld_block_size);
 static void mrsas_freeze_simq(struct mrsas_mpt_cmd *cmd, struct cam_sim *sim);
 static void mrsas_cam_poll(struct cam_sim *sim);
 static void mrsas_action(struct cam_sim *sim, union ccb *ccb);
 static void mrsas_scsiio_timeout(void *data);
 static int mrsas_track_scsiio(struct mrsas_softc *sc, target_id_t id, u_int32_t bus_id);
 static void mrsas_tm_response_code(struct mrsas_softc *sc,
     MPI2_SCSI_TASK_MANAGE_REPLY *mpi_reply);
 static int mrsas_issue_tm(struct mrsas_softc *sc,
     MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc);
 static void
 mrsas_data_load_cb(void *arg, bus_dma_segment_t *segs,
     int nseg, int error);
 static int32_t
 mrsas_startio(struct mrsas_softc *sc, struct cam_sim *sim,
     union ccb *ccb);
 
 static boolean_t mrsas_is_prp_possible(struct mrsas_mpt_cmd *cmd,
 	bus_dma_segment_t *segs, int nsegs);
 static void mrsas_build_ieee_sgl(struct mrsas_mpt_cmd *cmd,
 	bus_dma_segment_t *segs, int nseg);
 static void mrsas_build_prp_nvme(struct mrsas_mpt_cmd *cmd,
 	bus_dma_segment_t *segs, int nseg);
 
 struct mrsas_mpt_cmd *mrsas_get_mpt_cmd(struct mrsas_softc *sc);
 MRSAS_REQUEST_DESCRIPTOR_UNION *
 	mrsas_get_request_desc(struct mrsas_softc *sc, u_int16_t index);
 
 extern int mrsas_reset_targets(struct mrsas_softc *sc);
 extern u_int16_t MR_TargetIdToLdGet(u_int32_t ldTgtId, MR_DRV_RAID_MAP_ALL * map);
 extern u_int32_t
 MR_LdBlockSizeGet(u_int32_t ldTgtId, MR_DRV_RAID_MAP_ALL * map);
 extern void mrsas_isr(void *arg);
 extern void mrsas_aen_handler(struct mrsas_softc *sc);
 extern u_int8_t
 MR_BuildRaidContext(struct mrsas_softc *sc,
     struct IO_REQUEST_INFO *io_info, RAID_CONTEXT * pRAID_Context,
     MR_DRV_RAID_MAP_ALL * map);
 extern u_int16_t
 MR_LdSpanArrayGet(u_int32_t ld, u_int32_t span,
     MR_DRV_RAID_MAP_ALL * map);
 extern u_int16_t 
 mrsas_get_updated_dev_handle(struct mrsas_softc *sc,
     PLD_LOAD_BALANCE_INFO lbInfo, struct IO_REQUEST_INFO *io_info);
 extern int mrsas_complete_cmd(struct mrsas_softc *sc, u_int32_t MSIxIndex);
 extern MR_LD_RAID *MR_LdRaidGet(u_int32_t ld, MR_DRV_RAID_MAP_ALL * map);
 extern void mrsas_disable_intr(struct mrsas_softc *sc);
 extern void mrsas_enable_intr(struct mrsas_softc *sc);
 void mrsas_prepare_secondRaid1_IO(struct mrsas_softc *sc,
     struct mrsas_mpt_cmd *cmd);
 
 /*
  * mrsas_cam_attach:	Main entry to CAM subsystem
  * input:				Adapter instance soft state
  *
  * This function is called from mrsas_attach() during initialization to perform
  * SIM allocations and XPT bus registration.  If the kernel version is 7.4 or
  * earlier, it would also initiate a bus scan.
  */
 int
 mrsas_cam_attach(struct mrsas_softc *sc)
 {
 	struct cam_devq *devq;
 	int mrsas_cam_depth;
 
 	mrsas_cam_depth = sc->max_scsi_cmds;
 
 	if ((devq = cam_simq_alloc(mrsas_cam_depth)) == NULL) {
 		device_printf(sc->mrsas_dev, "Cannot allocate SIM queue\n");
 		return (ENOMEM);
 	}
 	/*
 	 * Create SIM for bus 0 and register, also create path
 	 */
 	sc->sim_0 = cam_sim_alloc(mrsas_action, mrsas_cam_poll, "mrsas", sc,
 	    device_get_unit(sc->mrsas_dev), &sc->sim_lock, mrsas_cam_depth,
 	    mrsas_cam_depth, devq);
 	if (sc->sim_0 == NULL) {
 		cam_simq_free(devq);
 		device_printf(sc->mrsas_dev, "Cannot register SIM\n");
 		return (ENXIO);
 	}
 	/* Initialize taskqueue for Event Handling */
 	TASK_INIT(&sc->ev_task, 0, (void *)mrsas_aen_handler, sc);
 	sc->ev_tq = taskqueue_create("mrsas_taskq", M_NOWAIT | M_ZERO,
 	    taskqueue_thread_enqueue, &sc->ev_tq);
 
 	/* Run the task queue with lowest priority */
 	taskqueue_start_threads(&sc->ev_tq, 1, 255, "%s taskq",
 	    device_get_nameunit(sc->mrsas_dev));
 	mtx_lock(&sc->sim_lock);
 	if (xpt_bus_register(sc->sim_0, sc->mrsas_dev, 0) != CAM_SUCCESS) {
 		cam_sim_free(sc->sim_0, TRUE);	/* passing true frees the devq */
 		mtx_unlock(&sc->sim_lock);
 		return (ENXIO);
 	}
 	if (xpt_create_path(&sc->path_0, NULL, cam_sim_path(sc->sim_0),
 	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		xpt_bus_deregister(cam_sim_path(sc->sim_0));
 		cam_sim_free(sc->sim_0, TRUE);	/* passing true will free the
 						 * devq */
 		mtx_unlock(&sc->sim_lock);
 		return (ENXIO);
 	}
 	mtx_unlock(&sc->sim_lock);
 
 	/*
 	 * Create SIM for bus 1 and register, also create path
 	 */
 	sc->sim_1 = cam_sim_alloc(mrsas_action, mrsas_cam_poll, "mrsas", sc,
 	    device_get_unit(sc->mrsas_dev), &sc->sim_lock, mrsas_cam_depth,
 	    mrsas_cam_depth, devq);
 	if (sc->sim_1 == NULL) {
 		cam_simq_free(devq);
 		device_printf(sc->mrsas_dev, "Cannot register SIM\n");
 		return (ENXIO);
 	}
 	mtx_lock(&sc->sim_lock);
 	if (xpt_bus_register(sc->sim_1, sc->mrsas_dev, 1) != CAM_SUCCESS) {
 		cam_sim_free(sc->sim_1, TRUE);	/* passing true frees the devq */
 		mtx_unlock(&sc->sim_lock);
 		return (ENXIO);
 	}
 	if (xpt_create_path(&sc->path_1, NULL, cam_sim_path(sc->sim_1),
 	    CAM_TARGET_WILDCARD,
 	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		xpt_bus_deregister(cam_sim_path(sc->sim_1));
 		cam_sim_free(sc->sim_1, TRUE);
 		mtx_unlock(&sc->sim_lock);
 		return (ENXIO);
 	}
 	mtx_unlock(&sc->sim_lock);
 
 #if (__FreeBSD_version <= 704000)
 	if (mrsas_bus_scan(sc)) {
 		device_printf(sc->mrsas_dev, "Error in bus scan.\n");
 		return (1);
 	}
 #endif
 	return (0);
 }
 
 /*
  * mrsas_cam_detach:	De-allocates and teardown CAM
  * input:				Adapter instance soft state
  *
  * De-registers and frees the paths and SIMs.
  */
 void
 mrsas_cam_detach(struct mrsas_softc *sc)
 {
 	if (sc->ev_tq != NULL)
 		taskqueue_free(sc->ev_tq);
 	mtx_lock(&sc->sim_lock);
 	if (sc->path_0)
 		xpt_free_path(sc->path_0);
 	if (sc->sim_0) {
 		xpt_bus_deregister(cam_sim_path(sc->sim_0));
 		cam_sim_free(sc->sim_0, FALSE);
 	}
 	if (sc->path_1)
 		xpt_free_path(sc->path_1);
 	if (sc->sim_1) {
 		xpt_bus_deregister(cam_sim_path(sc->sim_1));
 		cam_sim_free(sc->sim_1, TRUE);
 	}
 	mtx_unlock(&sc->sim_lock);
 }
 
 /*
  * mrsas_action:	SIM callback entry point
  * input:			pointer to SIM pointer to CAM Control Block
  *
  * This function processes CAM subsystem requests. The type of request is stored
  * in ccb->ccb_h.func_code.  The preprocessor #ifdef is necessary because
  * ccb->cpi.maxio is not supported for FreeBSD version 7.4 or earlier.
  */
 static void
 mrsas_action(struct cam_sim *sim, union ccb *ccb)
 {
 	struct mrsas_softc *sc = (struct mrsas_softc *)cam_sim_softc(sim);
 	struct ccb_hdr *ccb_h = &(ccb->ccb_h);
 	u_int32_t device_id;
 
 	/*
      * Check if the system going down
      * or the adapter is in unrecoverable critical error
      */
     if (sc->remove_in_progress ||
         (sc->adprecovery == MRSAS_HW_CRITICAL_ERROR)) {
         ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
         xpt_done(ccb);
         return;
     }
 
 	switch (ccb->ccb_h.func_code) {
 	case XPT_SCSI_IO:
 		{
 			device_id = ccb_h->target_id;
 
 			/*
 			 * bus 0 is LD, bus 1 is for system-PD
 			 */
 			if (cam_sim_bus(sim) == 1 &&
 			    sc->pd_list[device_id].driveState != MR_PD_STATE_SYSTEM) {
 				ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
 				xpt_done(ccb);
 			} else {
 				if (mrsas_startio(sc, sim, ccb)) {
 					ccb->ccb_h.status |= CAM_REQ_INVALID;
 					xpt_done(ccb);
 				}
 			}
 			break;
 		}
 	case XPT_ABORT:
 		{
 			ccb->ccb_h.status = CAM_UA_ABORT;
 			xpt_done(ccb);
 			break;
 		}
 	case XPT_RESET_BUS:
 		{
 			xpt_done(ccb);
 			break;
 		}
 	case XPT_GET_TRAN_SETTINGS:
 		{
 			ccb->cts.protocol = PROTO_SCSI;
 			ccb->cts.protocol_version = SCSI_REV_2;
 			ccb->cts.transport = XPORT_SPI;
 			ccb->cts.transport_version = 2;
 			ccb->cts.xport_specific.spi.valid = CTS_SPI_VALID_DISC;
 			ccb->cts.xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
 			ccb->cts.proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
 			ccb->cts.proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
 			ccb->ccb_h.status = CAM_REQ_CMP;
 			xpt_done(ccb);
 			break;
 		}
 	case XPT_SET_TRAN_SETTINGS:
 		{
 			ccb->ccb_h.status = CAM_FUNC_NOTAVAIL;
 			xpt_done(ccb);
 			break;
 		}
 	case XPT_CALC_GEOMETRY:
 		{
 			cam_calc_geometry(&ccb->ccg, 1);
 			xpt_done(ccb);
 			break;
 		}
 	case XPT_PATH_INQ:
 		{
 			ccb->cpi.version_num = 1;
 			ccb->cpi.hba_inquiry = 0;
 			ccb->cpi.target_sprt = 0;
 #if (__FreeBSD_version >= 902001)
 			ccb->cpi.hba_misc = PIM_UNMAPPED;
 #else
 			ccb->cpi.hba_misc = 0;
 #endif
 			ccb->cpi.hba_eng_cnt = 0;
 			ccb->cpi.max_lun = MRSAS_SCSI_MAX_LUNS;
 			ccb->cpi.unit_number = cam_sim_unit(sim);
 			ccb->cpi.bus_id = cam_sim_bus(sim);
 			ccb->cpi.initiator_id = MRSAS_SCSI_INITIATOR_ID;
 			ccb->cpi.base_transfer_speed = 150000;
 			strlcpy(ccb->cpi.sim_vid, "FreeBSD", SIM_IDLEN);
 			strlcpy(ccb->cpi.hba_vid, "AVAGO", HBA_IDLEN);
 			strlcpy(ccb->cpi.dev_name, cam_sim_name(sim), DEV_IDLEN);
 			ccb->cpi.transport = XPORT_SPI;
 			ccb->cpi.transport_version = 2;
 			ccb->cpi.protocol = PROTO_SCSI;
 			ccb->cpi.protocol_version = SCSI_REV_2;
 			if (ccb->cpi.bus_id == 0)
 				ccb->cpi.max_target = MRSAS_MAX_PD - 1;
 			else
 				ccb->cpi.max_target = MRSAS_MAX_LD_IDS - 1;
 #if (__FreeBSD_version > 704000)
 			ccb->cpi.maxio = sc->max_num_sge * MRSAS_PAGE_SIZE;
 #endif
 			ccb->ccb_h.status = CAM_REQ_CMP;
 			xpt_done(ccb);
 			break;
 		}
 	default:
 		{
 			ccb->ccb_h.status = CAM_REQ_INVALID;
 			xpt_done(ccb);
 			break;
 		}
 	}
 }
 
 /*
  * mrsas_scsiio_timeout:	Callback function for IO timed out
  * input:					mpt command context
  *
  * This function will execute after timeout value provided by ccb header from
  * CAM layer, if timer expires. Driver will run timer for all DCDM and LDIO
  * coming from CAM layer. This function is callback function for IO timeout
  * and it runs in no-sleep context. Set do_timedout_reset in Adapter context
  * so that it will execute OCR/Kill adpter from ocr_thread context.
  */
 static void
 mrsas_scsiio_timeout(void *data)
 {
 	struct mrsas_mpt_cmd *cmd;
 	struct mrsas_softc *sc;
 	u_int32_t target_id;
 
 	if (!data)
 		return;
 
 	cmd = (struct mrsas_mpt_cmd *)data;
 	sc = cmd->sc;
 
 	if (cmd->ccb_ptr == NULL) {
 		printf("command timeout with NULL ccb\n");
 		return;
 	}
 
 	/*
 	 * Below callout is dummy entry so that it will be cancelled from
 	 * mrsas_cmd_done(). Now Controller will go to OCR/Kill Adapter based
 	 * on OCR enable/disable property of Controller from ocr_thread
 	 * context.
 	 */
 #if (__FreeBSD_version >= 1000510)
 	callout_reset_sbt(&cmd->cm_callout, SBT_1S * 180, 0,
 	    mrsas_scsiio_timeout, cmd, 0);
 #else
 	callout_reset(&cmd->cm_callout, (180000 * hz) / 1000,
 	    mrsas_scsiio_timeout, cmd);
 #endif
 
 	if (cmd->ccb_ptr->cpi.bus_id == 0)
 		target_id = cmd->ccb_ptr->ccb_h.target_id;
 	else
 		target_id = (cmd->ccb_ptr->ccb_h.target_id + (MRSAS_MAX_PD - 1));
 
 	/* Save the cmd to be processed for TM, if it is not there in the array */
 	if (sc->target_reset_pool[target_id] == NULL) {
 		sc->target_reset_pool[target_id] = cmd;
 		mrsas_atomic_inc(&sc->target_reset_outstanding);
 	}
 
 	return;
 }
 
 /*
  * mrsas_startio:	SCSI IO entry point
  * input:			Adapter instance soft state
  * 					pointer to CAM Control Block
  *
  * This function is the SCSI IO entry point and it initiates IO processing. It
  * copies the IO and depending if the IO is read/write or inquiry, it would
  * call mrsas_build_ldio() or mrsas_build_dcdb(), respectively.  It returns 0
  * if the command is sent to firmware successfully, otherwise it returns 1.
  */
 static int32_t
 mrsas_startio(struct mrsas_softc *sc, struct cam_sim *sim,
     union ccb *ccb)
 {
 	struct mrsas_mpt_cmd *cmd, *r1_cmd = NULL;
 	struct ccb_hdr *ccb_h = &(ccb->ccb_h);
 	struct ccb_scsiio *csio = &(ccb->csio);
 	MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc;
 	u_int8_t cmd_type;
 
 	if ((csio->cdb_io.cdb_bytes[0]) == SYNCHRONIZE_CACHE &&
 		(!sc->fw_sync_cache_support)) {
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return (0);
 	}
 	ccb_h->status |= CAM_SIM_QUEUED;
 
 	if (mrsas_atomic_inc_return(&sc->fw_outstanding) > sc->max_scsi_cmds) {
 		ccb_h->status |= CAM_REQUEUE_REQ;
 		xpt_done(ccb);
 		mrsas_atomic_dec(&sc->fw_outstanding); 
 		return (0);
 	}
 
 	cmd = mrsas_get_mpt_cmd(sc);
 
 	if (!cmd) {
 		ccb_h->status |= CAM_REQUEUE_REQ;
 		xpt_done(ccb);
 		mrsas_atomic_dec(&sc->fw_outstanding); 
 		return (0);
 	}
 
 	if ((ccb_h->flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
 		if (ccb_h->flags & CAM_DIR_IN)
 			cmd->flags |= MRSAS_DIR_IN;
 		if (ccb_h->flags & CAM_DIR_OUT)
 			cmd->flags |= MRSAS_DIR_OUT;
 	} else
 		cmd->flags = MRSAS_DIR_NONE;	/* no data */
 
 /* For FreeBSD 9.2 and higher */
 #if (__FreeBSD_version >= 902001)
 	/*
 	 * XXX We don't yet support physical addresses here.
 	 */
 	switch ((ccb->ccb_h.flags & CAM_DATA_MASK)) {
 	case CAM_DATA_PADDR:
 	case CAM_DATA_SG_PADDR:
 		device_printf(sc->mrsas_dev, "%s: physical addresses not supported\n",
 		    __func__);
 		mrsas_release_mpt_cmd(cmd);
 		ccb_h->status = CAM_REQ_INVALID;
 		ccb_h->status &= ~CAM_SIM_QUEUED;
 		goto done;
 	case CAM_DATA_SG:
 		device_printf(sc->mrsas_dev, "%s: scatter gather is not supported\n",
 		    __func__);
 		mrsas_release_mpt_cmd(cmd);
 		ccb_h->status = CAM_REQ_INVALID;
 		goto done;
 	case CAM_DATA_VADDR:
 		if (csio->dxfer_len > (sc->max_num_sge * MRSAS_PAGE_SIZE)) {
 			mrsas_release_mpt_cmd(cmd);
 			ccb_h->status = CAM_REQ_TOO_BIG;
 			goto done;
 		}
 		cmd->length = csio->dxfer_len;
 		if (cmd->length)
 			cmd->data = csio->data_ptr;
 		break;
 	case CAM_DATA_BIO:
 		if (csio->dxfer_len > (sc->max_num_sge * MRSAS_PAGE_SIZE)) {
 			mrsas_release_mpt_cmd(cmd);
 			ccb_h->status = CAM_REQ_TOO_BIG;
 			goto done;
 		}
 		cmd->length = csio->dxfer_len;
 		if (cmd->length)
 			cmd->data = csio->data_ptr;
 		break;
 	default:
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		goto done;
 	}
 #else
 	if (!(ccb_h->flags & CAM_DATA_PHYS)) {	/* Virtual data address */
 		if (!(ccb_h->flags & CAM_SCATTER_VALID)) {
 			if (csio->dxfer_len > (sc->max_num_sge * MRSAS_PAGE_SIZE)) {
 				mrsas_release_mpt_cmd(cmd);
 				ccb_h->status = CAM_REQ_TOO_BIG;
 				goto done;
 			}
 			cmd->length = csio->dxfer_len;
 			if (cmd->length)
 				cmd->data = csio->data_ptr;
 		} else {
 			mrsas_release_mpt_cmd(cmd);
 			ccb_h->status = CAM_REQ_INVALID;
 			goto done;
 		}
 	} else {			/* Data addresses are physical. */
 		mrsas_release_mpt_cmd(cmd);
 		ccb_h->status = CAM_REQ_INVALID;
 		ccb_h->status &= ~CAM_SIM_QUEUED;
 		goto done;
 	}
 #endif
 	/* save ccb ptr */
 	cmd->ccb_ptr = ccb;
 
 	req_desc = mrsas_get_request_desc(sc, (cmd->index) - 1);
 	if (!req_desc) {
 		device_printf(sc->mrsas_dev, "Cannot get request_descriptor.\n");
 		return (FAIL);
 	}
 	memset(req_desc, 0, sizeof(MRSAS_REQUEST_DESCRIPTOR_UNION));
 	cmd->request_desc = req_desc;
 
 	if (ccb_h->flags & CAM_CDB_POINTER)
 		bcopy(csio->cdb_io.cdb_ptr, cmd->io_request->CDB.CDB32, csio->cdb_len);
 	else
 		bcopy(csio->cdb_io.cdb_bytes, cmd->io_request->CDB.CDB32, csio->cdb_len);
 	mtx_lock(&sc->raidmap_lock);
 
 	/* Check for IO type READ-WRITE targeted for Logical Volume */
 	cmd_type = mrsas_find_io_type(sim, ccb);
 	switch (cmd_type) {
 	case READ_WRITE_LDIO:
 		/* Build READ-WRITE IO for Logical Volume  */
 		if (mrsas_build_ldio_rw(sc, cmd, ccb)) {
 			device_printf(sc->mrsas_dev, "Build RW LDIO failed.\n");
 			mtx_unlock(&sc->raidmap_lock);
 			mrsas_release_mpt_cmd(cmd);
 			return (1);
 		}
 		break;
 	case NON_READ_WRITE_LDIO:
 		/* Build NON READ-WRITE IO for Logical Volume  */
 		if (mrsas_build_ldio_nonrw(sc, cmd, ccb)) {
 			device_printf(sc->mrsas_dev, "Build NON-RW LDIO failed.\n");
 			mtx_unlock(&sc->raidmap_lock);
 			mrsas_release_mpt_cmd(cmd);
 			return (1);
 		}
 		break;
 	case READ_WRITE_SYSPDIO:
 	case NON_READ_WRITE_SYSPDIO:
 		if (sc->secure_jbod_support &&
 		    (cmd_type == NON_READ_WRITE_SYSPDIO)) {
 			/* Build NON-RW IO for JBOD */
 			if (mrsas_build_syspdio(sc, cmd, ccb, sim, 0)) {
 				device_printf(sc->mrsas_dev,
 				    "Build SYSPDIO failed.\n");
 				mtx_unlock(&sc->raidmap_lock);
 				mrsas_release_mpt_cmd(cmd);
 				return (1);
 			}
 		} else {
 			/* Build RW IO for JBOD */
 			if (mrsas_build_syspdio(sc, cmd, ccb, sim, 1)) {
 				device_printf(sc->mrsas_dev,
 				    "Build SYSPDIO failed.\n");
 				mtx_unlock(&sc->raidmap_lock);
 				mrsas_release_mpt_cmd(cmd);
 				return (1);
 			}
 		}
 	}
 	mtx_unlock(&sc->raidmap_lock);
 
 	if (cmd->flags == MRSAS_DIR_IN)	/* from device */
 		cmd->io_request->Control |= MPI2_SCSIIO_CONTROL_READ;
 	else if (cmd->flags == MRSAS_DIR_OUT)	/* to device */
 		cmd->io_request->Control |= MPI2_SCSIIO_CONTROL_WRITE;
 
 	cmd->io_request->SGLFlags = MPI2_SGE_FLAGS_64_BIT_ADDRESSING;
 	cmd->io_request->SGLOffset0 = offsetof(MRSAS_RAID_SCSI_IO_REQUEST, SGL) / 4;
 	cmd->io_request->SenseBufferLowAddress = cmd->sense_phys_addr;
 	cmd->io_request->SenseBufferLength = MRSAS_SCSI_SENSE_BUFFERSIZE;
 
 	req_desc = cmd->request_desc;
 	req_desc->SCSIIO.SMID = cmd->index;
 
 	/*
 	 * Start timer for IO timeout. Default timeout value is 90 second.
 	 */
 	cmd->callout_owner = true;
 #if (__FreeBSD_version >= 1000510)
 	callout_reset_sbt(&cmd->cm_callout, SBT_1S * 180, 0,
 	    mrsas_scsiio_timeout, cmd, 0);
 #else
 	callout_reset(&cmd->cm_callout, (180000 * hz) / 1000,
 	    mrsas_scsiio_timeout, cmd);
 #endif
 
 	if (mrsas_atomic_read(&sc->fw_outstanding) > sc->io_cmds_highwater)
 		sc->io_cmds_highwater++;
 
 	/*
 	 *  if it is raid 1/10 fp write capable.
 	 *  try to get second command from pool and construct it.
 	 *  From FW, it has confirmed that lba values of two PDs corresponds to
 	 *  single R1/10 LD are always same
 	 *
 	 */
 	/*
 	 * driver side count always should be less than max_fw_cmds to get
 	 * new command
 	 */
 	if (cmd->r1_alt_dev_handle != MR_DEVHANDLE_INVALID) {
 		mrsas_prepare_secondRaid1_IO(sc, cmd);
 		mrsas_fire_cmd(sc, req_desc->addr.u.low,
 			req_desc->addr.u.high);
 		r1_cmd = cmd->peer_cmd;
 		mrsas_fire_cmd(sc, r1_cmd->request_desc->addr.u.low,
 				r1_cmd->request_desc->addr.u.high);
 	} else {
 		mrsas_fire_cmd(sc, req_desc->addr.u.low,
 			req_desc->addr.u.high);
 	}
 
 	return (0);
 
 done:
 	xpt_done(ccb);
 	mrsas_atomic_dec(&sc->fw_outstanding); 
 	return (0);
 }
 
 /*
  * mrsas_find_io_type:	Determines if IO is read/write or inquiry
  * input:			pointer to CAM Control Block
  *
  * This function determines if the IO is read/write or inquiry.  It returns a 1
  * if the IO is read/write and 0 if it is inquiry.
  */
 int 
 mrsas_find_io_type(struct cam_sim *sim, union ccb *ccb)
 {
 	struct ccb_scsiio *csio = &(ccb->csio);
 
 	switch (csio->cdb_io.cdb_bytes[0]) {
 	case READ_10:
 	case WRITE_10:
 	case READ_12:
 	case WRITE_12:
 	case READ_6:
 	case WRITE_6:
 	case READ_16:
 	case WRITE_16:
 		return (cam_sim_bus(sim) ?
 		    READ_WRITE_SYSPDIO : READ_WRITE_LDIO);
 	default:
 		return (cam_sim_bus(sim) ?
 		    NON_READ_WRITE_SYSPDIO : NON_READ_WRITE_LDIO);
 	}
 }
 
 /*
  * mrsas_get_mpt_cmd:	Get a cmd from free command pool
  * input:				Adapter instance soft state
  *
  * This function removes an MPT command from the command free list and
  * initializes it.
  */
 struct mrsas_mpt_cmd *
 mrsas_get_mpt_cmd(struct mrsas_softc *sc)
 {
 	struct mrsas_mpt_cmd *cmd = NULL;
 
 	mtx_lock(&sc->mpt_cmd_pool_lock);
 	if (!TAILQ_EMPTY(&sc->mrsas_mpt_cmd_list_head)) {
 		cmd = TAILQ_FIRST(&sc->mrsas_mpt_cmd_list_head);
 		TAILQ_REMOVE(&sc->mrsas_mpt_cmd_list_head, cmd, next);
 	} else {
 		goto out;
 	}
 
 	memset((uint8_t *)cmd->io_request, 0, MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE);
 	cmd->data = NULL;
 	cmd->length = 0;
 	cmd->flags = 0;
 	cmd->error_code = 0;
 	cmd->load_balance = 0;
 	cmd->ccb_ptr = NULL;
 out:
 	mtx_unlock(&sc->mpt_cmd_pool_lock);
 	return cmd;
 }
 
 /*
  * mrsas_release_mpt_cmd:	Return a cmd to free command pool
  * input:					Command packet for return to free command pool
  *
  * This function returns an MPT command to the free command list.
  */
 void
 mrsas_release_mpt_cmd(struct mrsas_mpt_cmd *cmd)
 {
 	struct mrsas_softc *sc = cmd->sc;
 
 	mtx_lock(&sc->mpt_cmd_pool_lock);
 	cmd->r1_alt_dev_handle = MR_DEVHANDLE_INVALID;
 	cmd->sync_cmd_idx = (u_int32_t)MRSAS_ULONG_MAX;
 	cmd->peer_cmd = NULL;
 	cmd->cmd_completed = 0;
 	memset((uint8_t *)cmd->io_request, 0,
 		sizeof(MRSAS_RAID_SCSI_IO_REQUEST));
 	TAILQ_INSERT_HEAD(&(sc->mrsas_mpt_cmd_list_head), cmd, next);
 	mtx_unlock(&sc->mpt_cmd_pool_lock);
 
 	return;
 }
 
 /*
  * mrsas_get_request_desc:	Get request descriptor from array
  * input:					Adapter instance soft state
  * 							SMID index
  *
  * This function returns a pointer to the request descriptor.
  */
 MRSAS_REQUEST_DESCRIPTOR_UNION *
 mrsas_get_request_desc(struct mrsas_softc *sc, u_int16_t index)
 {
 	u_int8_t *p;
 
 	KASSERT(index < sc->max_fw_cmds, ("req_desc is out of range"));
 	p = sc->req_desc + sizeof(MRSAS_REQUEST_DESCRIPTOR_UNION) * index;
 
 	return (MRSAS_REQUEST_DESCRIPTOR_UNION *) p;
 }
 
 
 
 
 /* mrsas_prepare_secondRaid1_IO
  * It prepares the raid 1 second IO
  */
 void
 mrsas_prepare_secondRaid1_IO(struct mrsas_softc *sc,
     struct mrsas_mpt_cmd *cmd)
 {
 	MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc, *req_desc2 = NULL;
 	struct mrsas_mpt_cmd *r1_cmd;
 
 	r1_cmd = cmd->peer_cmd;
 	req_desc = cmd->request_desc;
 
 	/*
 	 * copy the io request frame as well as 8 SGEs data for r1
 	 * command
 	 */
 	memcpy(r1_cmd->io_request, cmd->io_request,
 	    (sizeof(MRSAS_RAID_SCSI_IO_REQUEST)));
 	memcpy(&r1_cmd->io_request->SGL, &cmd->io_request->SGL,
 	    (sc->max_sge_in_main_msg * sizeof(MPI2_SGE_IO_UNION)));
 
 	/* sense buffer is different for r1 command */
 	r1_cmd->io_request->SenseBufferLowAddress = r1_cmd->sense_phys_addr;
 	r1_cmd->ccb_ptr = cmd->ccb_ptr;
 
 	req_desc2 = mrsas_get_request_desc(sc, r1_cmd->index - 1);
 	req_desc2->addr.Words = 0;
 	r1_cmd->request_desc = req_desc2;
 	req_desc2->SCSIIO.SMID = r1_cmd->index;
 	req_desc2->SCSIIO.RequestFlags = req_desc->SCSIIO.RequestFlags;
 	r1_cmd->request_desc->SCSIIO.DevHandle = cmd->r1_alt_dev_handle;
 	r1_cmd->r1_alt_dev_handle =  cmd->io_request->DevHandle;
 	r1_cmd->io_request->DevHandle = cmd->r1_alt_dev_handle;
 	cmd->io_request->RaidContext.raid_context_g35.smid.peerSMID =
 	    r1_cmd->index;
 	r1_cmd->io_request->RaidContext.raid_context_g35.smid.peerSMID =
 		cmd->index;
 	/*
 	 * MSIxIndex of both commands request descriptors
 	 * should be same
 	 */
 	r1_cmd->request_desc->SCSIIO.MSIxIndex = cmd->request_desc->SCSIIO.MSIxIndex;
 	/* span arm is different for r1 cmd */
 	r1_cmd->io_request->RaidContext.raid_context_g35.spanArm =
 	    cmd->io_request->RaidContext.raid_context_g35.spanArm + 1;
 
 }
 
 
 /*
  * mrsas_build_ldio_rw:	Builds an LDIO command
  * input:				Adapter instance soft state
  * 						Pointer to command packet
  * 						Pointer to CCB
  *
  * This function builds the LDIO command packet.  It returns 0 if the command is
  * built successfully, otherwise it returns a 1.
  */
 int
 mrsas_build_ldio_rw(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
     union ccb *ccb)
 {
 	struct ccb_hdr *ccb_h = &(ccb->ccb_h);
 	struct ccb_scsiio *csio = &(ccb->csio);
 	u_int32_t device_id;
 	MRSAS_RAID_SCSI_IO_REQUEST *io_request;
 
 	device_id = ccb_h->target_id;
 
 	io_request = cmd->io_request;
 	io_request->RaidContext.raid_context.VirtualDiskTgtId = device_id;
 	io_request->RaidContext.raid_context.status = 0;
 	io_request->RaidContext.raid_context.exStatus = 0;
 
 	/* just the cdb len, other flags zero, and ORed-in later for FP */
 	io_request->IoFlags = csio->cdb_len;
 
 	if (mrsas_setup_io(sc, cmd, ccb, device_id, io_request) != SUCCESS)
 		device_printf(sc->mrsas_dev, "Build ldio or fpio error\n");
 
 	io_request->DataLength = cmd->length;
 
 	if (mrsas_map_request(sc, cmd, ccb) == SUCCESS) {
 		if (cmd->sge_count > sc->max_num_sge) {
 			device_printf(sc->mrsas_dev, "Error: sge_count (0x%x) exceeds"
 			    "max (0x%x) allowed\n", cmd->sge_count, sc->max_num_sge);
 			return (FAIL);
 		}
 		if (sc->is_ventura || sc->is_aero)
 			io_request->RaidContext.raid_context_g35.numSGE = cmd->sge_count;
 		else {
 			/*
 			 * numSGE store lower 8 bit of sge_count. numSGEExt store
 			 * higher 8 bit of sge_count
 			 */
 			io_request->RaidContext.raid_context.numSGE = cmd->sge_count;
 			io_request->RaidContext.raid_context.numSGEExt = (uint8_t)(cmd->sge_count >> 8);
 		}
 
 	} else {
 		device_printf(sc->mrsas_dev, "Data map/load failed.\n");
 		return (FAIL);
 	}
 	return (0);
 }
 
 /* stream detection on read and and write IOs */
 static void
 mrsas_stream_detect(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
     struct IO_REQUEST_INFO *io_info)
 {
 	u_int32_t device_id = io_info->ldTgtId;
 	LD_STREAM_DETECT *current_ld_SD = sc->streamDetectByLD[device_id];
 	u_int32_t *track_stream = &current_ld_SD->mruBitMap;
 	u_int32_t streamNum, shiftedValues, unshiftedValues;
 	u_int32_t indexValueMask, shiftedValuesMask;
 	int i;
 	boolean_t isReadAhead = false;
 	STREAM_DETECT *current_SD;
 
 	/* find possible stream */
 	for (i = 0; i < MAX_STREAMS_TRACKED; ++i) {
 		streamNum = (*track_stream >> (i * BITS_PER_INDEX_STREAM)) &
 				STREAM_MASK;
 		current_SD = &current_ld_SD->streamTrack[streamNum];
 		/*
 		 * if we found a stream, update the raid context and
 		 * also update the mruBitMap
 		 */
 		if (current_SD->nextSeqLBA &&
 		    io_info->ldStartBlock >= current_SD->nextSeqLBA &&
 		    (io_info->ldStartBlock <= (current_SD->nextSeqLBA+32)) &&
 		    (current_SD->isRead == io_info->isRead)) {
 			if (io_info->ldStartBlock != current_SD->nextSeqLBA &&
 			    (!io_info->isRead || !isReadAhead)) {
 				/*
 				 * Once the API availible we need to change this.
 				 * At this point we are not allowing any gap
 				 */
 				continue;
 			}
 			cmd->io_request->RaidContext.raid_context_g35.streamDetected = TRUE;
 			current_SD->nextSeqLBA = io_info->ldStartBlock + io_info->numBlocks;
 			/*
 			 * update the mruBitMap LRU
 			 */
 			shiftedValuesMask = (1 << i * BITS_PER_INDEX_STREAM) - 1 ;
 			shiftedValues = ((*track_stream & shiftedValuesMask) <<
 			    BITS_PER_INDEX_STREAM);
 			indexValueMask = STREAM_MASK << i * BITS_PER_INDEX_STREAM;
 			unshiftedValues = (*track_stream) &
 			    (~(shiftedValuesMask | indexValueMask));
 			*track_stream =
 			    (unshiftedValues | shiftedValues | streamNum);
 			return;
 		}
 	}
 	/*
 	 * if we did not find any stream, create a new one from the least recently used
 	 */
 	streamNum = (*track_stream >>
 	    ((MAX_STREAMS_TRACKED - 1) * BITS_PER_INDEX_STREAM)) & STREAM_MASK;
 	current_SD = &current_ld_SD->streamTrack[streamNum];
 	current_SD->isRead = io_info->isRead;
 	current_SD->nextSeqLBA = io_info->ldStartBlock + io_info->numBlocks;
 	*track_stream = (((*track_stream & ZERO_LAST_STREAM) << 4) | streamNum);
 	return;
 }
 
 
 /*
  * mrsas_setup_io:	Set up data including Fast Path I/O
  * input:			Adapter instance soft state
  * 					Pointer to command packet
  * 					Pointer to CCB
  *
  * This function builds the DCDB inquiry command.  It returns 0 if the command
  * is built successfully, otherwise it returns a 1.
  */
 int
 mrsas_setup_io(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
     union ccb *ccb, u_int32_t device_id,
     MRSAS_RAID_SCSI_IO_REQUEST * io_request)
 {
 	struct ccb_hdr *ccb_h = &(ccb->ccb_h);
 	struct ccb_scsiio *csio = &(ccb->csio);
 	struct IO_REQUEST_INFO io_info;
 	MR_DRV_RAID_MAP_ALL *map_ptr;
 	struct mrsas_mpt_cmd *r1_cmd = NULL;
 
 	MR_LD_RAID *raid;
 	u_int8_t fp_possible;
 	u_int32_t start_lba_hi, start_lba_lo, ld_block_size, ld;
 	u_int32_t datalength = 0;
 
 	io_request->RaidContext.raid_context.VirtualDiskTgtId = device_id;
 
 	start_lba_lo = 0;
 	start_lba_hi = 0;
 	fp_possible = 0;
 
 	/*
 	 * READ_6 (0x08) or WRITE_6 (0x0A) cdb
 	 */
 	if (csio->cdb_len == 6) {
 		datalength = (u_int32_t)csio->cdb_io.cdb_bytes[4];
 		start_lba_lo = ((u_int32_t)csio->cdb_io.cdb_bytes[1] << 16) |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[2] << 8) |
 		    (u_int32_t)csio->cdb_io.cdb_bytes[3];
 		start_lba_lo &= 0x1FFFFF;
 	}
 	/*
 	 * READ_10 (0x28) or WRITE_6 (0x2A) cdb
 	 */
 	else if (csio->cdb_len == 10) {
 		datalength = (u_int32_t)csio->cdb_io.cdb_bytes[8] |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[7] << 8);
 		start_lba_lo = ((u_int32_t)csio->cdb_io.cdb_bytes[2] << 24) |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[3] << 16) |
 		    (u_int32_t)csio->cdb_io.cdb_bytes[4] << 8 |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[5]);
 	}
 	/*
 	 * READ_12 (0xA8) or WRITE_12 (0xAA) cdb
 	 */
 	else if (csio->cdb_len == 12) {
 		datalength = (u_int32_t)csio->cdb_io.cdb_bytes[6] << 24 |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[7] << 16) |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[8] << 8) |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[9]);
 		start_lba_lo = ((u_int32_t)csio->cdb_io.cdb_bytes[2] << 24) |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[3] << 16) |
 		    (u_int32_t)csio->cdb_io.cdb_bytes[4] << 8 |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[5]);
 	}
 	/*
 	 * READ_16 (0x88) or WRITE_16 (0xx8A) cdb
 	 */
 	else if (csio->cdb_len == 16) {
 		datalength = (u_int32_t)csio->cdb_io.cdb_bytes[10] << 24 |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[11] << 16) |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[12] << 8) |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[13]);
 		start_lba_lo = ((u_int32_t)csio->cdb_io.cdb_bytes[6] << 24) |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[7] << 16) |
 		    (u_int32_t)csio->cdb_io.cdb_bytes[8] << 8 |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[9]);
 		start_lba_hi = ((u_int32_t)csio->cdb_io.cdb_bytes[2] << 24) |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[3] << 16) |
 		    (u_int32_t)csio->cdb_io.cdb_bytes[4] << 8 |
 		    ((u_int32_t)csio->cdb_io.cdb_bytes[5]);
 	}
 	memset(&io_info, 0, sizeof(struct IO_REQUEST_INFO));
 	io_info.ldStartBlock = ((u_int64_t)start_lba_hi << 32) | start_lba_lo;
 	io_info.numBlocks = datalength;
 	io_info.ldTgtId = device_id;
 	io_info.r1_alt_dev_handle = MR_DEVHANDLE_INVALID;
 
 	io_request->DataLength = cmd->length;
 
 	switch (ccb_h->flags & CAM_DIR_MASK) {
 	case CAM_DIR_IN:
 		io_info.isRead = 1;
 		break;
 	case CAM_DIR_OUT:
 		io_info.isRead = 0;
 		break;
 	case CAM_DIR_NONE:
 	default:
 		mrsas_dprint(sc, MRSAS_TRACE, "From %s : DMA Flag is %d \n", __func__, ccb_h->flags & CAM_DIR_MASK);
 		break;
 	}
 
 	map_ptr = sc->ld_drv_map[(sc->map_id & 1)];
 	ld_block_size = MR_LdBlockSizeGet(device_id, map_ptr);
 
 	ld = MR_TargetIdToLdGet(device_id, map_ptr);
 	if ((ld >= MAX_LOGICAL_DRIVES_EXT) || (!sc->fast_path_io)) {
 		io_request->RaidContext.raid_context.regLockFlags = 0;
 		fp_possible = 0;
 	} else {
 		if (MR_BuildRaidContext(sc, &io_info, &io_request->RaidContext.raid_context, map_ptr))
 			fp_possible = io_info.fpOkForIo;
 	}
 
 	raid = MR_LdRaidGet(ld, map_ptr);
 	/* Store the TM capability value in cmd */
 	cmd->tmCapable = raid->capability.tmCapable;
 
 	cmd->request_desc->SCSIIO.MSIxIndex =
 	    sc->msix_vectors ? smp_processor_id() % sc->msix_vectors : 0;
 
 	if (sc->is_ventura || sc->is_aero) {
 		if (sc->streamDetectByLD) {
 			mtx_lock(&sc->stream_lock);
 			mrsas_stream_detect(sc, cmd, &io_info);
 			mtx_unlock(&sc->stream_lock);
 			/* In ventura if stream detected for a read and
 			 * it is read ahead capable make this IO as LDIO */
 			if (io_request->RaidContext.raid_context_g35.streamDetected &&
 					io_info.isRead && io_info.raCapable)
 				fp_possible = FALSE;
 		}
 
 		/* Set raid 1/10 fast path write capable bit in io_info.
 		 * Note - reset peer_cmd and r1_alt_dev_handle if fp_possible
 		 * disabled after this point. Try not to add more check for
 		 * fp_possible toggle after this.
 		 */
 		if (fp_possible &&
 				(io_info.r1_alt_dev_handle != MR_DEVHANDLE_INVALID) &&
 				(raid->level == 1) && !io_info.isRead) {
 			r1_cmd = mrsas_get_mpt_cmd(sc);
 			if (mrsas_atomic_inc_return(&sc->fw_outstanding) > sc->max_scsi_cmds) {
 				fp_possible = FALSE;
 				mrsas_atomic_dec(&sc->fw_outstanding); 
 			} else {
 				r1_cmd = mrsas_get_mpt_cmd(sc);
 				if (!r1_cmd) {
 					fp_possible = FALSE;
 					mrsas_atomic_dec(&sc->fw_outstanding); 
 				}
 				else {
 					cmd->peer_cmd = r1_cmd;
 					r1_cmd->peer_cmd = cmd;
 				}
  			}
 		}
 	}
 
 	if (fp_possible) {
 		mrsas_set_pd_lba(io_request, csio->cdb_len, &io_info, ccb, map_ptr,
 		    start_lba_lo, ld_block_size);
 		io_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST;
 		cmd->request_desc->SCSIIO.RequestFlags =
 		    (MPI2_REQ_DESCRIPT_FLAGS_FP_IO <<
 		    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
 		if (sc->mrsas_gen3_ctrl) {
 			if (io_request->RaidContext.raid_context.regLockFlags == REGION_TYPE_UNUSED)
 				cmd->request_desc->SCSIIO.RequestFlags =
 				    (MRSAS_REQ_DESCRIPT_FLAGS_NO_LOCK <<
 				    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
 			io_request->RaidContext.raid_context.Type = MPI2_TYPE_CUDA;
 			io_request->RaidContext.raid_context.nseg = 0x1;
 			io_request->IoFlags |= MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH;
 			io_request->RaidContext.raid_context.regLockFlags |=
 			    (MR_RL_FLAGS_GRANT_DESTINATION_CUDA |
 			    MR_RL_FLAGS_SEQ_NUM_ENABLE);
 		} else if (sc->is_ventura || sc->is_aero) {
 			io_request->RaidContext.raid_context_g35.Type = MPI2_TYPE_CUDA;
 			io_request->RaidContext.raid_context_g35.nseg = 0x1;
 			io_request->RaidContext.raid_context_g35.routingFlags.bits.sqn = 1;
 			io_request->IoFlags |= MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH;
 			if (io_request->RaidContext.raid_context_g35.routingFlags.bits.sld) {
 					io_request->RaidContext.raid_context_g35.RAIDFlags =
 					(MR_RAID_FLAGS_IO_SUB_TYPE_CACHE_BYPASS
 					<< MR_RAID_CTX_RAID_FLAGS_IO_SUB_TYPE_SHIFT);
 			}
 		}
 		if ((sc->load_balance_info[device_id].loadBalanceFlag) &&
 		    (io_info.isRead)) {
 			io_info.devHandle =
 			    mrsas_get_updated_dev_handle(sc,
 			    &sc->load_balance_info[device_id], &io_info);
 			cmd->load_balance = MRSAS_LOAD_BALANCE_FLAG;
 			cmd->pd_r1_lb = io_info.pd_after_lb;
 			if (sc->is_ventura || sc->is_aero)
 				io_request->RaidContext.raid_context_g35.spanArm = io_info.span_arm;
 			else
 				io_request->RaidContext.raid_context.spanArm = io_info.span_arm;
 		} else
 			cmd->load_balance = 0;
 
 		if (sc->is_ventura || sc->is_aero)
 				cmd->r1_alt_dev_handle = io_info.r1_alt_dev_handle;
 		else
 				cmd->r1_alt_dev_handle = MR_DEVHANDLE_INVALID;
 
 		cmd->request_desc->SCSIIO.DevHandle = io_info.devHandle;
 		io_request->DevHandle = io_info.devHandle;
 		cmd->pdInterface = io_info.pdInterface;
 	} else {
 		/* Not FP IO */
 		io_request->RaidContext.raid_context.timeoutValue = map_ptr->raidMap.fpPdIoTimeoutSec;
 		cmd->request_desc->SCSIIO.RequestFlags =
 		    (MRSAS_REQ_DESCRIPT_FLAGS_LD_IO <<
 		    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
 		if (sc->mrsas_gen3_ctrl) {
 			if (io_request->RaidContext.raid_context.regLockFlags == REGION_TYPE_UNUSED)
 				cmd->request_desc->SCSIIO.RequestFlags =
 				    (MRSAS_REQ_DESCRIPT_FLAGS_NO_LOCK <<
 				    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
 			io_request->RaidContext.raid_context.Type = MPI2_TYPE_CUDA;
 			io_request->RaidContext.raid_context.regLockFlags |=
 			    (MR_RL_FLAGS_GRANT_DESTINATION_CPU0 |
 			    MR_RL_FLAGS_SEQ_NUM_ENABLE);
 			io_request->RaidContext.raid_context.nseg = 0x1;
 		} else if (sc->is_ventura || sc->is_aero) {
 			io_request->RaidContext.raid_context_g35.Type = MPI2_TYPE_CUDA;
 			io_request->RaidContext.raid_context_g35.routingFlags.bits.sqn = 1;
 			io_request->RaidContext.raid_context_g35.nseg = 0x1;
 		}
 		io_request->Function = MRSAS_MPI2_FUNCTION_LD_IO_REQUEST;
 		io_request->DevHandle = device_id;
 	}
 	return (0);
 }
 
 /*
  * mrsas_build_ldio_nonrw:	Builds an LDIO command
  * input:				Adapter instance soft state
  * 						Pointer to command packet
  * 						Pointer to CCB
  *
  * This function builds the LDIO command packet.  It returns 0 if the command is
  * built successfully, otherwise it returns a 1.
  */
 int
 mrsas_build_ldio_nonrw(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
     union ccb *ccb)
 {
 	struct ccb_hdr *ccb_h = &(ccb->ccb_h);
 	u_int32_t device_id, ld;
 	MR_DRV_RAID_MAP_ALL *map_ptr;
 	MR_LD_RAID *raid;
 	RAID_CONTEXT *pRAID_Context;
 	MRSAS_RAID_SCSI_IO_REQUEST *io_request;
 
 	io_request = cmd->io_request;
 	device_id = ccb_h->target_id;
 
 	map_ptr = sc->ld_drv_map[(sc->map_id & 1)];
 	ld = MR_TargetIdToLdGet(device_id, map_ptr);
 	raid = MR_LdRaidGet(ld, map_ptr);
 	/* get RAID_Context pointer */
 	pRAID_Context = &io_request->RaidContext.raid_context;
 	/* Store the TM capability value in cmd */
 	cmd->tmCapable = raid->capability.tmCapable;
 
 	/* FW path for LD Non-RW (SCSI management commands) */
 	io_request->Function = MRSAS_MPI2_FUNCTION_LD_IO_REQUEST;
 	io_request->DevHandle = device_id;
 	cmd->request_desc->SCSIIO.RequestFlags =
 	    (MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO <<
 	    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
 
 	io_request->RaidContext.raid_context.VirtualDiskTgtId = device_id;
 	io_request->LUN[1] = ccb_h->target_lun & 0xF;
 	io_request->DataLength = cmd->length;
 
 	if (mrsas_map_request(sc, cmd, ccb) == SUCCESS) {
 		if (cmd->sge_count > sc->max_num_sge) {
 			device_printf(sc->mrsas_dev, "Error: sge_count (0x%x) exceeds"
 			    "max (0x%x) allowed\n", cmd->sge_count, sc->max_num_sge);
 			return (1);
 		}
 		if (sc->is_ventura || sc->is_aero)
 			io_request->RaidContext.raid_context_g35.numSGE = cmd->sge_count;
 		else {
 			/*
 			 * numSGE store lower 8 bit of sge_count. numSGEExt store
 			 * higher 8 bit of sge_count
 			 */
 			io_request->RaidContext.raid_context.numSGE = cmd->sge_count;
 			io_request->RaidContext.raid_context.numSGEExt = (uint8_t)(cmd->sge_count >> 8);
 		}
 	} else {
 		device_printf(sc->mrsas_dev, "Data map/load failed.\n");
 		return (1);
 	}
 	return (0);
 }
 
 /*
  * mrsas_build_syspdio:	Builds an DCDB command
  * input:				Adapter instance soft state
  * 						Pointer to command packet
  * 						Pointer to CCB
  *
  * This function builds the DCDB inquiry command.  It returns 0 if the command
  * is built successfully, otherwise it returns a 1.
  */
 int
 mrsas_build_syspdio(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
     union ccb *ccb, struct cam_sim *sim, u_int8_t fp_possible)
 {
 	struct ccb_hdr *ccb_h = &(ccb->ccb_h);
 	u_int32_t device_id;
 	MR_DRV_RAID_MAP_ALL *local_map_ptr;
 	MRSAS_RAID_SCSI_IO_REQUEST *io_request;
 	RAID_CONTEXT *pRAID_Context;
 	struct MR_PD_CFG_SEQ_NUM_SYNC *pd_sync;
 
 	io_request = cmd->io_request;
 	/* get RAID_Context pointer */
 	pRAID_Context = &io_request->RaidContext.raid_context;
 	device_id = ccb_h->target_id;
 	local_map_ptr = sc->ld_drv_map[(sc->map_id & 1)];
 	io_request->RaidContext.raid_context.RAIDFlags = MR_RAID_FLAGS_IO_SUB_TYPE_SYSTEM_PD
 	    << MR_RAID_CTX_RAID_FLAGS_IO_SUB_TYPE_SHIFT;
 	io_request->RaidContext.raid_context.regLockFlags = 0;
 	io_request->RaidContext.raid_context.regLockRowLBA = 0;
 	io_request->RaidContext.raid_context.regLockLength = 0;
 
 	cmd->pdInterface = sc->target_list[device_id].interface_type;
 
 	/* If FW supports PD sequence number */
 	if (sc->use_seqnum_jbod_fp &&
 	    sc->pd_list[device_id].driveType == 0x00) {
 		//printf("Using Drv seq num\n");
 		pd_sync = (void *)sc->jbodmap_mem[(sc->pd_seq_map_id - 1) & 1];
 		cmd->tmCapable = pd_sync->seq[device_id].capability.tmCapable;
 		/* More than 256 PD/JBOD support for Ventura */
 		if (sc->support_morethan256jbod)
 			io_request->RaidContext.raid_context.VirtualDiskTgtId =
 				pd_sync->seq[device_id].pdTargetId;
 		else
 			io_request->RaidContext.raid_context.VirtualDiskTgtId =
 				device_id + 255;
 		io_request->RaidContext.raid_context.configSeqNum = pd_sync->seq[device_id].seqNum;
 		io_request->DevHandle = pd_sync->seq[device_id].devHandle;
 		if (sc->is_ventura || sc->is_aero)
 			io_request->RaidContext.raid_context_g35.routingFlags.bits.sqn = 1;
 		else
 			io_request->RaidContext.raid_context.regLockFlags |=
 			    (MR_RL_FLAGS_SEQ_NUM_ENABLE | MR_RL_FLAGS_GRANT_DESTINATION_CUDA);
 		/* raid_context.Type = MPI2_TYPE_CUDA is valid only,
 		 * if FW support Jbod Sequence number
 		 */
 		io_request->RaidContext.raid_context.Type = MPI2_TYPE_CUDA;
 		io_request->RaidContext.raid_context.nseg = 0x1;
 	} else if (sc->fast_path_io) {
 		//printf("Using LD RAID map\n");
 		io_request->RaidContext.raid_context.VirtualDiskTgtId = device_id;
 		io_request->RaidContext.raid_context.configSeqNum = 0;
 		local_map_ptr = sc->ld_drv_map[(sc->map_id & 1)];
 		io_request->DevHandle =
 		    local_map_ptr->raidMap.devHndlInfo[device_id].curDevHdl;
 	} else {
 		//printf("Using FW PATH\n");
 		/* Want to send all IO via FW path */
 		io_request->RaidContext.raid_context.VirtualDiskTgtId = device_id;
 		io_request->RaidContext.raid_context.configSeqNum = 0;
 		io_request->DevHandle = MR_DEVHANDLE_INVALID;
 	}
 
 	cmd->request_desc->SCSIIO.DevHandle = io_request->DevHandle;
 	cmd->request_desc->SCSIIO.MSIxIndex =
 	    sc->msix_vectors ? smp_processor_id() % sc->msix_vectors : 0;
 
 	if (!fp_possible) {
 		/* system pd firmware path */
 		io_request->Function = MRSAS_MPI2_FUNCTION_LD_IO_REQUEST;
 		cmd->request_desc->SCSIIO.RequestFlags =
 		    (MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO <<
 		    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
 		io_request->RaidContext.raid_context.timeoutValue =
 		    local_map_ptr->raidMap.fpPdIoTimeoutSec;
 		io_request->RaidContext.raid_context.VirtualDiskTgtId = device_id;
 	} else {
 		/* system pd fast path */
 		io_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST;
 		io_request->RaidContext.raid_context.timeoutValue = local_map_ptr->raidMap.fpPdIoTimeoutSec;
 
 		/*
 		 * NOTE - For system pd RW cmds only IoFlags will be FAST_PATH
 		 * Because the NON RW cmds will now go via FW Queue
 		 * and not the Exception queue
 		 */
 		if (sc->mrsas_gen3_ctrl || sc->is_ventura || sc->is_aero)
 			io_request->IoFlags |= MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH;
 
 		cmd->request_desc->SCSIIO.RequestFlags =
 		    (MPI2_REQ_DESCRIPT_FLAGS_FP_IO <<
 		    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
 	}
 
 	io_request->LUN[1] = ccb_h->target_lun & 0xF;
 	io_request->DataLength = cmd->length;
 
 	if (mrsas_map_request(sc, cmd, ccb) == SUCCESS) {
 		if (cmd->sge_count > sc->max_num_sge) {
 			device_printf(sc->mrsas_dev, "Error: sge_count (0x%x) exceeds"
 			    "max (0x%x) allowed\n", cmd->sge_count, sc->max_num_sge);
 			return (1);
 		}
 		if (sc->is_ventura || sc->is_aero)
 			io_request->RaidContext.raid_context_g35.numSGE = cmd->sge_count;
 		else {
 			/*
 			 * numSGE store lower 8 bit of sge_count. numSGEExt store
 			 * higher 8 bit of sge_count
 			 */
 			io_request->RaidContext.raid_context.numSGE = cmd->sge_count;
 			io_request->RaidContext.raid_context.numSGEExt = (uint8_t)(cmd->sge_count >> 8);
 		}
 	} else {
 		device_printf(sc->mrsas_dev, "Data map/load failed.\n");
 		return (1);
 	}
 	return (0);
 }
 
 /*
  * mrsas_is_prp_possible:	This function will tell whether PRPs should be built or not
  * sc:						Adapter instance soft state
  * cmd:						MPT command frame pointer
  * nsesg:					Number of OS SGEs
  *
  * This function will check whether IO is qualified to build PRPs
  * return:				true: if PRP should be built
  *						false: if IEEE SGLs should be built
  */
 static boolean_t mrsas_is_prp_possible(struct mrsas_mpt_cmd *cmd,
 	bus_dma_segment_t *segs, int nsegs)
 {
 	struct mrsas_softc *sc = cmd->sc;
 	int i;
 	u_int32_t data_length = 0;
 	bool build_prp = false;
 	u_int32_t mr_nvme_pg_size;
 
 	mr_nvme_pg_size = max(sc->nvme_page_size, MR_DEFAULT_NVME_PAGE_SIZE);
 	data_length = cmd->length;
 
 	if (data_length > (mr_nvme_pg_size * 5))
 		build_prp = true;
 	else if ((data_length > (mr_nvme_pg_size * 4)) &&
 		(data_length <= (mr_nvme_pg_size * 5)))  {
 		/* check if 1st SG entry size is < residual beyond 4 pages */
 		if ((segs[0].ds_len) < (data_length - (mr_nvme_pg_size * 4)))
 			build_prp = true;
 	}
 
 	/*check for SGE holes here*/
 	for (i = 0; i < nsegs; i++) {
 		/* check for mid SGEs */
 		if ((i != 0) && (i != (nsegs - 1))) {
 				if ((segs[i].ds_addr % mr_nvme_pg_size) ||
 					(segs[i].ds_len % mr_nvme_pg_size)) {
 					build_prp = false;
 					mrsas_atomic_inc(&sc->sge_holes);
 					break;
 				}
 		}
 
 		/* check for first SGE*/
 		if ((nsegs > 1) && (i == 0)) {
 				if ((segs[i].ds_addr + segs[i].ds_len) % mr_nvme_pg_size) {
 					build_prp = false;
 					mrsas_atomic_inc(&sc->sge_holes);
 					break;
 				}
 		}
 
 		/* check for Last SGE*/
 		if ((nsegs > 1) && (i == (nsegs - 1))) {
 				if (segs[i].ds_addr % mr_nvme_pg_size) {
 					build_prp = false;
 					mrsas_atomic_inc(&sc->sge_holes);
 					break;
 				}
 		}
 
 	}
 
 	return build_prp;
 }
 
 /*
  * mrsas_map_request:	Map and load data
  * input:				Adapter instance soft state
  * 						Pointer to command packet
  *
  * For data from OS, map and load the data buffer into bus space.  The SG list
  * is built in the callback.  If the  bus dmamap load is not successful,
  * cmd->error_code will contain the  error code and a 1 is returned.
  */
 int 
 mrsas_map_request(struct mrsas_softc *sc,
     struct mrsas_mpt_cmd *cmd, union ccb *ccb)
 {
 	u_int32_t retcode = 0;
 	struct cam_sim *sim;
 
 	sim = xpt_path_sim(cmd->ccb_ptr->ccb_h.path);
 
 	if (cmd->data != NULL) {
 		/* Map data buffer into bus space */
 		mtx_lock(&sc->io_lock);
 #if (__FreeBSD_version >= 902001)
 		retcode = bus_dmamap_load_ccb(sc->data_tag, cmd->data_dmamap, ccb,
 		    mrsas_data_load_cb, cmd, 0);
 #else
 		retcode = bus_dmamap_load(sc->data_tag, cmd->data_dmamap, cmd->data,
 		    cmd->length, mrsas_data_load_cb, cmd, BUS_DMA_NOWAIT);
 #endif
 		mtx_unlock(&sc->io_lock);
 		if (retcode)
 			device_printf(sc->mrsas_dev, "bus_dmamap_load(): retcode = %d\n", retcode);
 		if (retcode == EINPROGRESS) {
 			device_printf(sc->mrsas_dev, "request load in progress\n");
 			mrsas_freeze_simq(cmd, sim);
 		}
 	}
 	if (cmd->error_code)
 		return (1);
 	return (retcode);
 }
 
 /*
  * mrsas_unmap_request:	Unmap and unload data
  * input:				Adapter instance soft state
  * 						Pointer to command packet
  *
  * This function unmaps and unloads data from OS.
  */
 void
 mrsas_unmap_request(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd)
 {
 	if (cmd->data != NULL) {
 		if (cmd->flags & MRSAS_DIR_IN)
 			bus_dmamap_sync(sc->data_tag, cmd->data_dmamap, BUS_DMASYNC_POSTREAD);
 		if (cmd->flags & MRSAS_DIR_OUT)
 			bus_dmamap_sync(sc->data_tag, cmd->data_dmamap, BUS_DMASYNC_POSTWRITE);
 		mtx_lock(&sc->io_lock);
 		bus_dmamap_unload(sc->data_tag, cmd->data_dmamap);
 		mtx_unlock(&sc->io_lock);
 	}
 }
 
 /**
  * mrsas_build_ieee_sgl -	Prepare IEEE SGLs
  * @sc:						Adapter soft state
  * @segs:					OS SGEs pointers
  * @nseg:					Number of OS SGEs
  * @cmd:					Fusion command frame
  * return:					void
  */
 static void mrsas_build_ieee_sgl(struct mrsas_mpt_cmd *cmd, bus_dma_segment_t *segs, int nseg)
 {
 	struct mrsas_softc *sc = cmd->sc;
 	MRSAS_RAID_SCSI_IO_REQUEST *io_request;
 	pMpi25IeeeSgeChain64_t sgl_ptr;
 	int i = 0, sg_processed = 0;
 
 	io_request = cmd->io_request;
 	sgl_ptr = (pMpi25IeeeSgeChain64_t)&io_request->SGL;
 
 	if (sc->mrsas_gen3_ctrl || sc->is_ventura || sc->is_aero) {
 		pMpi25IeeeSgeChain64_t sgl_ptr_end = sgl_ptr;
 
 		sgl_ptr_end += sc->max_sge_in_main_msg - 1;
 		sgl_ptr_end->Flags = 0;
 	}
 	if (nseg != 0) {
 		for (i = 0; i < nseg; i++) {
 			sgl_ptr->Address = segs[i].ds_addr;
 			sgl_ptr->Length = segs[i].ds_len;
 			sgl_ptr->Flags = 0;
 			if (sc->mrsas_gen3_ctrl || sc->is_ventura || sc->is_aero) {
 				if (i == nseg - 1)
 					sgl_ptr->Flags = IEEE_SGE_FLAGS_END_OF_LIST;
 			}
 			sgl_ptr++;
 			sg_processed = i + 1;
 			if ((sg_processed == (sc->max_sge_in_main_msg - 1)) &&
 				(nseg > sc->max_sge_in_main_msg)) {
 				pMpi25IeeeSgeChain64_t sg_chain;
 
 				if (sc->mrsas_gen3_ctrl || sc->is_ventura || sc->is_aero) {
 					if ((cmd->io_request->IoFlags & MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH)
 						!= MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH)
 						cmd->io_request->ChainOffset = sc->chain_offset_io_request;
 					else
 						cmd->io_request->ChainOffset = 0;
 				} else
 					cmd->io_request->ChainOffset = sc->chain_offset_io_request;
 				sg_chain = sgl_ptr;
 				if (sc->mrsas_gen3_ctrl || sc->is_ventura || sc->is_aero)
 					sg_chain->Flags = IEEE_SGE_FLAGS_CHAIN_ELEMENT;
 				else
 					sg_chain->Flags = (IEEE_SGE_FLAGS_CHAIN_ELEMENT | MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR);
 				sg_chain->Length = (sizeof(MPI2_SGE_IO_UNION) * (nseg - sg_processed));
 				sg_chain->Address = cmd->chain_frame_phys_addr;
 				sgl_ptr = (pMpi25IeeeSgeChain64_t)cmd->chain_frame;
 			}
 		}
 	}
 }
 
 /**
  * mrsas_build_prp_nvme - Prepare PRPs(Physical Region Page)- SGLs specific to NVMe drives only
  * @sc:						Adapter soft state
  * @segs:					OS SGEs pointers
  * @nseg:					Number of OS SGEs
  * @cmd:					Fusion command frame
  * return:					void
  */
 static void mrsas_build_prp_nvme(struct mrsas_mpt_cmd *cmd, bus_dma_segment_t *segs, int nseg)
 {
 	struct mrsas_softc *sc = cmd->sc;
 	int sge_len, offset, num_prp_in_chain = 0;
 	pMpi25IeeeSgeChain64_t main_chain_element, ptr_first_sgl, sgl_ptr;
 	u_int64_t *ptr_sgl;
 	bus_addr_t ptr_sgl_phys;
 	u_int64_t sge_addr;
 	u_int32_t page_mask, page_mask_result, i = 0;
 	u_int32_t first_prp_len;
 	int data_len = cmd->length;
 	u_int32_t mr_nvme_pg_size = max(sc->nvme_page_size,
 					MR_DEFAULT_NVME_PAGE_SIZE);
 
 	sgl_ptr = (pMpi25IeeeSgeChain64_t) &cmd->io_request->SGL;
 	/*
 	 * NVMe has a very convoluted PRP format.  One PRP is required
 	 * for each page or partial page.  We need to split up OS SG
 	 * entries if they are longer than one page or cross a page
 	 * boundary.  We also have to insert a PRP list pointer entry as
 	 * the last entry in each physical page of the PRP list.
 	 *
 	 * NOTE: The first PRP "entry" is actually placed in the first
 	 * SGL entry in the main message in IEEE 64 format.  The 2nd
 	 * entry in the main message is the chain element, and the rest
 	 * of the PRP entries are built in the contiguous PCIe buffer.
 	 */
 	page_mask = mr_nvme_pg_size - 1;
 	ptr_sgl = (u_int64_t *) cmd->chain_frame;
 	ptr_sgl_phys = cmd->chain_frame_phys_addr;
 	memset(ptr_sgl, 0, sc->max_chain_frame_sz);
 
 	/* Build chain frame element which holds all PRPs except first*/
 	main_chain_element = (pMpi25IeeeSgeChain64_t)
 	    ((u_int8_t *)sgl_ptr + sizeof(MPI25_IEEE_SGE_CHAIN64));
 
 
 	main_chain_element->Address = cmd->chain_frame_phys_addr;
 	main_chain_element->NextChainOffset = 0;
 	main_chain_element->Flags = IEEE_SGE_FLAGS_CHAIN_ELEMENT |
 					IEEE_SGE_FLAGS_SYSTEM_ADDR |
 					MPI26_IEEE_SGE_FLAGS_NSF_NVME_PRP;
 
 
 	/* Build first PRP, SGE need not to be PAGE aligned*/
 	ptr_first_sgl = sgl_ptr;
 	sge_addr = segs[i].ds_addr;
 	sge_len = segs[i].ds_len;
 	i++;
 
 	offset = (u_int32_t) (sge_addr & page_mask);
 	first_prp_len = mr_nvme_pg_size - offset;
 
 	ptr_first_sgl->Address = sge_addr;
 	ptr_first_sgl->Length = first_prp_len;
 
 	data_len -= first_prp_len;
 
 	if (sge_len > first_prp_len) {
 		sge_addr += first_prp_len;
 		sge_len -= first_prp_len;
 	} else if (sge_len == first_prp_len) {
 		sge_addr = segs[i].ds_addr;
 		sge_len = segs[i].ds_len;
 		i++;
 	}
 
 	for (;;) {
 
 		offset = (u_int32_t) (sge_addr & page_mask);
 
 		/* Put PRP pointer due to page boundary*/
 		page_mask_result = (uintptr_t)(ptr_sgl + 1) & page_mask;
 		if (!page_mask_result) {
 			device_printf(sc->mrsas_dev, "BRCM: Put prp pointer as we are at page boundary"
 					" ptr_sgl: 0x%p\n", ptr_sgl);
 			ptr_sgl_phys++;
 			*ptr_sgl = (uintptr_t)ptr_sgl_phys;
 			ptr_sgl++;
 			num_prp_in_chain++;
 		}
 
 		*ptr_sgl = sge_addr;
 		ptr_sgl++;
 		ptr_sgl_phys++;
 		num_prp_in_chain++;
 
 
 		sge_addr += mr_nvme_pg_size;
 		sge_len -= mr_nvme_pg_size;
 		data_len -= mr_nvme_pg_size;
 
 		if (data_len <= 0)
 			break;
 
 		if (sge_len > 0)
 			continue;
 
 		sge_addr = segs[i].ds_addr;
 		sge_len = segs[i].ds_len;
 		i++;
 	}
 
 	main_chain_element->Length = num_prp_in_chain * sizeof(u_int64_t);
 	mrsas_atomic_inc(&sc->prp_count);
 
 }
 
 /*
  * mrsas_data_load_cb:	Callback entry point to build SGLs
  * input:				Pointer to command packet as argument
  *						Pointer to segment
  *						Number of segments Error
  *
  * This is the callback function of the bus dma map load.  It builds SG list
  */
 static void
 mrsas_data_load_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	struct mrsas_mpt_cmd *cmd = (struct mrsas_mpt_cmd *)arg;
 	struct mrsas_softc *sc = cmd->sc;
 	boolean_t build_prp = false;
 
 	if (error) {
 		cmd->error_code = error;
 		device_printf(sc->mrsas_dev, "mrsas_data_load_cb_prp: error=%d\n", error);
 		if (error == EFBIG) {
 			cmd->ccb_ptr->ccb_h.status = CAM_REQ_TOO_BIG;
 			return;
 		}
 	}
 	if (cmd->flags & MRSAS_DIR_IN)
 		bus_dmamap_sync(cmd->sc->data_tag, cmd->data_dmamap,
 		    BUS_DMASYNC_PREREAD);
 	if (cmd->flags & MRSAS_DIR_OUT)
 		bus_dmamap_sync(cmd->sc->data_tag, cmd->data_dmamap,
 		    BUS_DMASYNC_PREWRITE);
 	if (nseg > sc->max_num_sge) {
 		device_printf(sc->mrsas_dev, "SGE count is too large or 0.\n");
 		return;
 	}
 
 	/* Check for whether PRPs should be built or IEEE SGLs*/
 	if ((cmd->io_request->IoFlags & MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) &&
 			(cmd->pdInterface == NVME_PD))
 		build_prp = mrsas_is_prp_possible(cmd, segs, nseg);
 
 	if (build_prp == true)
 		mrsas_build_prp_nvme(cmd, segs, nseg);
 	else
 		mrsas_build_ieee_sgl(cmd, segs, nseg);
 
 	cmd->sge_count = nseg;
 }
 
 /*
  * mrsas_freeze_simq:	Freeze SIM queue
  * input:				Pointer to command packet
  * 						Pointer to SIM
  *
  * This function freezes the sim queue.
  */
 static void
 mrsas_freeze_simq(struct mrsas_mpt_cmd *cmd, struct cam_sim *sim)
 {
 	union ccb *ccb = (union ccb *)(cmd->ccb_ptr);
 
 	xpt_freeze_simq(sim, 1);
 	ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
 	ccb->ccb_h.status |= CAM_REQUEUE_REQ;
 }
 
 void
 mrsas_xpt_freeze(struct mrsas_softc *sc)
 {
 	xpt_freeze_simq(sc->sim_0, 1);
 	xpt_freeze_simq(sc->sim_1, 1);
 }
 
 void
 mrsas_xpt_release(struct mrsas_softc *sc)
 {
 	xpt_release_simq(sc->sim_0, 1);
 	xpt_release_simq(sc->sim_1, 1);
 }
 
 /*
  * mrsas_cmd_done:	Perform remaining command completion
  * input:			Adapter instance soft state  Pointer to command packet
  *
  * This function calls ummap request and releases the MPT command.
  */
 void
 mrsas_cmd_done(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd)
 {
 	mrsas_unmap_request(sc, cmd);
 	
 	mtx_lock(&sc->sim_lock);
 	if (cmd->callout_owner) {
 		callout_stop(&cmd->cm_callout);
 		cmd->callout_owner  = false;
 	}
 	xpt_done(cmd->ccb_ptr);
 	cmd->ccb_ptr = NULL;
 	mtx_unlock(&sc->sim_lock);
 	mrsas_release_mpt_cmd(cmd);
 }
 
 /*
  * mrsas_cam_poll:	Polling entry point
  * input:			Pointer to SIM
  *
  * This is currently a stub function.
  */
 static void
 mrsas_cam_poll(struct cam_sim *sim)
 {
 	int i;
 	struct mrsas_softc *sc = (struct mrsas_softc *)cam_sim_softc(sim);
 
 	if (sc->msix_vectors != 0){
 		for (i=0; i<sc->msix_vectors; i++){
 			mrsas_complete_cmd(sc, i);
 		}
 	} else {
 		mrsas_complete_cmd(sc, 0);
 	}
 }
 
 /*
  * mrsas_bus_scan:	Perform bus scan
  * input:			Adapter instance soft state
  *
  * This mrsas_bus_scan function is needed for FreeBSD 7.x.  Also, it should not
  * be called in FreeBSD 8.x and later versions, where the bus scan is
  * automatic.
  */
 int
 mrsas_bus_scan(struct mrsas_softc *sc)
 {
 	union ccb *ccb_0;
 	union ccb *ccb_1;
 
 	if ((ccb_0 = xpt_alloc_ccb()) == NULL) {
 		return (ENOMEM);
 	}
 	if ((ccb_1 = xpt_alloc_ccb()) == NULL) {
 		xpt_free_ccb(ccb_0);
 		return (ENOMEM);
 	}
 	mtx_lock(&sc->sim_lock);
 	if (xpt_create_path(&ccb_0->ccb_h.path, xpt_periph, cam_sim_path(sc->sim_0),
 	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		xpt_free_ccb(ccb_0);
 		xpt_free_ccb(ccb_1);
 		mtx_unlock(&sc->sim_lock);
 		return (EIO);
 	}
 	if (xpt_create_path(&ccb_1->ccb_h.path, xpt_periph, cam_sim_path(sc->sim_1),
 	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		xpt_free_ccb(ccb_0);
 		xpt_free_ccb(ccb_1);
 		mtx_unlock(&sc->sim_lock);
 		return (EIO);
 	}
 	mtx_unlock(&sc->sim_lock);
 	xpt_rescan(ccb_0);
 	xpt_rescan(ccb_1);
 
 	return (0);
 }
 
 /*
  * mrsas_bus_scan_sim:	Perform bus scan per SIM
  * input:				adapter instance soft state
  *
  * This function will be called from Event handler on LD creation/deletion,
  * JBOD on/off.
  */
 int
 mrsas_bus_scan_sim(struct mrsas_softc *sc, struct cam_sim *sim)
 {
 	union ccb *ccb;
 
 	if ((ccb = xpt_alloc_ccb()) == NULL) {
 		return (ENOMEM);
 	}
 	mtx_lock(&sc->sim_lock);
 	if (xpt_create_path(&ccb->ccb_h.path, xpt_periph, cam_sim_path(sim),
 	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		xpt_free_ccb(ccb);
 		mtx_unlock(&sc->sim_lock);
 		return (EIO);
 	}
 	mtx_unlock(&sc->sim_lock);
 	xpt_rescan(ccb);
 
 	return (0);
 }
 
 /*
  * mrsas_track_scsiio:  Track IOs for a given target in the mpt_cmd_list
  * input:           Adapter instance soft state
  *                  Target ID of target
  *                  Bus ID of the target
  *
  * This function checks for any pending IO in the whole mpt_cmd_list pool
  * with the bus_id and target_id passed in arguments. If some IO is found
  * that means target reset is not successfully completed.
  *
  * Returns FAIL if IOs pending to the target device, else return SUCCESS
  */
 static int
 mrsas_track_scsiio(struct mrsas_softc *sc, target_id_t tgt_id, u_int32_t bus_id)
 {
 	int i;
 	struct mrsas_mpt_cmd *mpt_cmd = NULL;
 
 	for (i = 0 ; i < sc->max_fw_cmds; i++) {
 		mpt_cmd = sc->mpt_cmd_list[i];
 
-	/*
-	 * Check if the target_id and bus_id is same as the timeout IO
-	 */
-	if (mpt_cmd->ccb_ptr) {
-		/* bus_id = 1 denotes a VD */
-		if (bus_id == 1)
-			tgt_id = (mpt_cmd->ccb_ptr->ccb_h.target_id - (MRSAS_MAX_PD - 1));
+		/*
+		 * Check if the target_id and bus_id is same as the timeout IO
+		 */
+		if (mpt_cmd->ccb_ptr) {
+			/* bus_id = 1 denotes a VD */
+			if (bus_id == 1)
+				tgt_id =
+				    (mpt_cmd->ccb_ptr->ccb_h.target_id - (MRSAS_MAX_PD - 1));
 
 			if (mpt_cmd->ccb_ptr->cpi.bus_id == bus_id &&
 			    mpt_cmd->ccb_ptr->ccb_h.target_id == tgt_id) {
 				device_printf(sc->mrsas_dev,
 				    "IO commands pending to target id %d\n", tgt_id);
 				return FAIL;
 			}
 		}
 	}
 
 	return SUCCESS;
 }
 
 #if TM_DEBUG
 /*
  * mrsas_tm_response_code: Prints TM response code received from FW
  * input:           Adapter instance soft state
  *                  MPI reply returned from firmware
  *
  * Returns nothing.
  */
 static void
 mrsas_tm_response_code(struct mrsas_softc *sc,
 	MPI2_SCSI_TASK_MANAGE_REPLY *mpi_reply)
 {
 	char *desc;
 
 	switch (mpi_reply->ResponseCode) {
 	case MPI2_SCSITASKMGMT_RSP_TM_COMPLETE:
 		desc = "task management request completed";
 		break;
 	case MPI2_SCSITASKMGMT_RSP_INVALID_FRAME:
 		desc = "invalid frame";
 		break;
 	case MPI2_SCSITASKMGMT_RSP_TM_NOT_SUPPORTED:
 		desc = "task management request not supported";
 		break;
 	case MPI2_SCSITASKMGMT_RSP_TM_FAILED:
 		desc = "task management request failed";
 		break;
 	case MPI2_SCSITASKMGMT_RSP_TM_SUCCEEDED:
 		desc = "task management request succeeded";
 		break;
 	case MPI2_SCSITASKMGMT_RSP_TM_INVALID_LUN:
 		desc = "invalid lun";
 		break;
 	case 0xA:
 		desc = "overlapped tag attempted";
 		break;
 	case MPI2_SCSITASKMGMT_RSP_IO_QUEUED_ON_IOC:
 		desc = "task queued, however not sent to target";
 		break;
 	default:
 		desc = "unknown";
 		break;
 	}
 	device_printf(sc->mrsas_dev, "response_code(%01x): %s\n",
 	    mpi_reply->ResponseCode, desc);
 	device_printf(sc->mrsas_dev,
 	    "TerminationCount/DevHandle/Function/TaskType/IOCStat/IOCLoginfo\n"
 	    "0x%x/0x%x/0x%x/0x%x/0x%x/0x%x\n",
 	    mpi_reply->TerminationCount, mpi_reply->DevHandle,
 	    mpi_reply->Function, mpi_reply->TaskType,
 	    mpi_reply->IOCStatus, mpi_reply->IOCLogInfo);
 }
 #endif
 
 /*
  * mrsas_issue_tm:  Fires the TM command to FW and waits for completion
  * input:           Adapter instance soft state
  *                  reqest descriptor compiled by mrsas_reset_targets
  *
  * Returns FAIL if TM command TIMEDOUT from FW else SUCCESS.
  */
 static int
 mrsas_issue_tm(struct mrsas_softc *sc,
 	MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc)
 {
 	int sleep_stat;
 
 	mrsas_fire_cmd(sc, req_desc->addr.u.low, req_desc->addr.u.high);
 	sleep_stat = msleep(&sc->ocr_chan, &sc->sim_lock, PRIBIO, "tm_sleep", 50*hz);
 
 	if (sleep_stat == EWOULDBLOCK) {
 		device_printf(sc->mrsas_dev, "tm cmd TIMEDOUT\n");
 		return FAIL;
 	}
 
 	return SUCCESS;
 }
 
 /*
  * mrsas_reset_targets : Gathers info to fire a target reset command
  * input:           Adapter instance soft state
  *
  * This function compiles data for a target reset command to be fired to the FW
  * and then traverse the target_reset_pool to see targets with TIMEDOUT IOs.
  *
  * Returns SUCCESS or FAIL
  */
 int mrsas_reset_targets(struct mrsas_softc *sc)
 {
 	struct mrsas_mpt_cmd *tm_mpt_cmd = NULL;
 	struct mrsas_mpt_cmd *tgt_mpt_cmd = NULL;
 	MR_TASK_MANAGE_REQUEST *mr_request;
 	MPI2_SCSI_TASK_MANAGE_REQUEST *tm_mpi_request;
 	MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc;
 	int retCode = FAIL, count, i, outstanding;
 	u_int32_t MSIxIndex, bus_id;
 	target_id_t tgt_id;
 #if TM_DEBUG
 	MPI2_SCSI_TASK_MANAGE_REPLY *mpi_reply;
 #endif
 
 	outstanding = mrsas_atomic_read(&sc->fw_outstanding);
 
 	if (!outstanding) {
 		device_printf(sc->mrsas_dev, "NO IOs pending...\n");
 		mrsas_atomic_set(&sc->target_reset_outstanding, 0);
 		retCode = SUCCESS;
 		goto return_status;
 	} else if (sc->adprecovery != MRSAS_HBA_OPERATIONAL) {
 		device_printf(sc->mrsas_dev, "Controller is not operational\n");
 		goto return_status;
 	} else {
 		/* Some more error checks will be added in future */
 	}
 
 	/* Get an mpt frame and an index to fire the TM cmd */
 	tm_mpt_cmd = mrsas_get_mpt_cmd(sc);
 	if (!tm_mpt_cmd) {
 		retCode = FAIL;
 		goto return_status;
 	}
 
 	req_desc = mrsas_get_request_desc(sc, (tm_mpt_cmd->index) - 1);
 	if (!req_desc) {
 		device_printf(sc->mrsas_dev, "Cannot get request_descriptor for tm.\n");
 		retCode = FAIL;
 		goto release_mpt;
 	}
 	memset(req_desc, 0, sizeof(MRSAS_REQUEST_DESCRIPTOR_UNION));
 
 	req_desc->HighPriority.SMID = tm_mpt_cmd->index;
 	req_desc->HighPriority.RequestFlags =
 	    (MPI2_REQ_DESCRIPT_FLAGS_HIGH_PRIORITY <<
 	    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
 	req_desc->HighPriority.MSIxIndex =  0;
 	req_desc->HighPriority.LMID = 0;
 	req_desc->HighPriority.Reserved1 = 0;
 	tm_mpt_cmd->request_desc = req_desc;
 
 	mr_request = (MR_TASK_MANAGE_REQUEST *) tm_mpt_cmd->io_request;
 	memset(mr_request, 0, sizeof(MR_TASK_MANAGE_REQUEST));
 
 	tm_mpi_request = (MPI2_SCSI_TASK_MANAGE_REQUEST *) &mr_request->TmRequest;
 	tm_mpi_request->Function = MPI2_FUNCTION_SCSI_TASK_MGMT;
 	tm_mpi_request->TaskType = MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET;
 	tm_mpi_request->TaskMID = 0; /* smid task */
 	tm_mpi_request->LUN[1] = 0;
 
 	/* Traverse the tm_mpt pool to get valid entries */
 	for (i = 0 ; i < MRSAS_MAX_TM_TARGETS; i++) {
 		if(!sc->target_reset_pool[i]) {
 			continue;
 		} else {
 			tgt_mpt_cmd = sc->target_reset_pool[i];
 		}
 
 		tgt_id = i;
 
 		/* See if the target is tm capable or NOT */
 		if (!tgt_mpt_cmd->tmCapable) {
 			device_printf(sc->mrsas_dev, "Task management NOT SUPPORTED for "
 			    "CAM target:%d\n", tgt_id);
 
 			retCode = FAIL;
 			goto release_mpt;
 		}
 
 		tm_mpi_request->DevHandle = tgt_mpt_cmd->io_request->DevHandle;
 
 		if (i < (MRSAS_MAX_PD - 1)) {
 			mr_request->uTmReqReply.tmReqFlags.isTMForPD = 1;
 			bus_id = 0;
 		} else {
 			mr_request->uTmReqReply.tmReqFlags.isTMForLD = 1;
 			bus_id = 1;
 		}
 
 		device_printf(sc->mrsas_dev, "TM will be fired for "
 		    "CAM target:%d and bus_id %d\n", tgt_id, bus_id);
 
 		sc->ocr_chan = (void *)&tm_mpt_cmd;
 		retCode = mrsas_issue_tm(sc, req_desc);
 		if (retCode == FAIL)
 			goto release_mpt;
 
 #if TM_DEBUG
 		mpi_reply =
 		    (MPI2_SCSI_TASK_MANAGE_REPLY *) &mr_request->uTmReqReply.TMReply;
 		mrsas_tm_response_code(sc, mpi_reply);
 #endif
 		mrsas_atomic_dec(&sc->target_reset_outstanding);
 		sc->target_reset_pool[i] = NULL;
 
 		/* Check for pending cmds in the mpt_cmd_pool with the tgt_id */
 		mrsas_disable_intr(sc);
 		/* Wait for 1 second to complete parallel ISR calling same
 		 * mrsas_complete_cmd()
 		 */
 		msleep(&sc->ocr_chan, &sc->sim_lock, PRIBIO, "mrsas_reset_wakeup",
 		   1 * hz);
 		count = sc->msix_vectors > 0 ? sc->msix_vectors : 1;
 		mtx_unlock(&sc->sim_lock);
 		for (MSIxIndex = 0; MSIxIndex < count; MSIxIndex++)
 		    mrsas_complete_cmd(sc, MSIxIndex);
 		mtx_lock(&sc->sim_lock);
 		retCode = mrsas_track_scsiio(sc, tgt_id, bus_id);
 		mrsas_enable_intr(sc);
 
 		if (retCode == FAIL)
 			goto release_mpt;
 	}
 
 	device_printf(sc->mrsas_dev, "Number of targets outstanding "
 	    "after reset: %d\n", mrsas_atomic_read(&sc->target_reset_outstanding));
 
 release_mpt:
 	mrsas_release_mpt_cmd(tm_mpt_cmd);
 return_status:
 	device_printf(sc->mrsas_dev, "target reset %s!!\n",
 		(retCode == SUCCESS) ? "SUCCESS" : "FAIL");
 
 	return retCode;
 }
 
Index: projects/clang1000-import/sys/dev/msk/if_msk.c
===================================================================
--- projects/clang1000-import/sys/dev/msk/if_msk.c	(revision 357178)
+++ projects/clang1000-import/sys/dev/msk/if_msk.c	(revision 357179)
@@ -1,4607 +1,4610 @@
 /******************************************************************************
  *
  * Name   : sky2.c
  * Project: Gigabit Ethernet Driver for FreeBSD 5.x/6.x
  * Version: $Revision: 1.23 $
  * Date   : $Date: 2005/12/22 09:04:11 $
  * Purpose: Main driver source file
  *
  *****************************************************************************/
 
 /******************************************************************************
  *
  *	LICENSE:
  *	Copyright (C) Marvell International Ltd. and/or its affiliates
  *
  *	The computer program files contained in this folder ("Files")
  *	are provided to you under the BSD-type license terms provided
  *	below, and any use of such Files and any derivative works
  *	thereof created by you shall be governed by the following terms
  *	and conditions:
  *
  *	- Redistributions of source code must retain the above copyright
  *	  notice, this list of conditions and the following disclaimer.
  *	- Redistributions in binary form must reproduce the above
  *	  copyright notice, this list of conditions and the following
  *	  disclaimer in the documentation and/or other materials provided
  *	  with the distribution.
  *	- Neither the name of Marvell nor the names of its contributors
  *	  may be used to endorse or promote products derived from this
  *	  software without specific prior written permission.
  *
  *	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  *	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  *	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  *	FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  *	COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  *	INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  *	BUT NOT LIMITED TO, PROCUREMENT OF  SUBSTITUTE GOODS OR SERVICES;
  *	LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  *	HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  *	STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  *	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  *	OF THE POSSIBILITY OF SUCH DAMAGE.
  *	/LICENSE
  *
  *****************************************************************************/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause AND BSD-3-Clause
  *
  * Copyright (c) 1997, 1998, 1999, 2000
  *	Bill Paul <wpaul@ctr.columbia.edu>.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Bill Paul.
  * 4. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Copyright (c) 2003 Nathan L. Binkert <binkertn@umich.edu>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
 /*
  * Device driver for the Marvell Yukon II Ethernet controller.
  * Due to lack of documentation, this driver is based on the code from
  * sk(4) and Marvell's myk(4) driver for FreeBSD 5.x.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
 #include <machine/bus.h>
 #include <machine/in_cksum.h>
 #include <machine/resource.h>
 #include <sys/rman.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include <dev/msk/if_mskreg.h>
 
 MODULE_DEPEND(msk, pci, 1, 1, 1);
 MODULE_DEPEND(msk, ether, 1, 1, 1);
 MODULE_DEPEND(msk, miibus, 1, 1, 1);
 
 /* "device miibus" required.  See GENERIC if you get errors here. */
 #include "miibus_if.h"
 
 /* Tunables. */
 static int msi_disable = 0;
 TUNABLE_INT("hw.msk.msi_disable", &msi_disable);
 static int legacy_intr = 0;
 TUNABLE_INT("hw.msk.legacy_intr", &legacy_intr);
 static int jumbo_disable = 0;
 TUNABLE_INT("hw.msk.jumbo_disable", &jumbo_disable);
 
 #define MSK_CSUM_FEATURES	(CSUM_TCP | CSUM_UDP)
 
 /*
  * Devices supported by this driver.
  */
 static const struct msk_product {
 	uint16_t	msk_vendorid;
 	uint16_t	msk_deviceid;
 	const char	*msk_name;
 } msk_products[] = {
 	{ VENDORID_SK, DEVICEID_SK_YUKON2,
 	    "SK-9Sxx Gigabit Ethernet" },
 	{ VENDORID_SK, DEVICEID_SK_YUKON2_EXPR,
 	    "SK-9Exx Gigabit Ethernet"},
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8021CU,
 	    "Marvell Yukon 88E8021CU Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8021X,
 	    "Marvell Yukon 88E8021 SX/LX Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8022CU,
 	    "Marvell Yukon 88E8022CU Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8022X,
 	    "Marvell Yukon 88E8022 SX/LX Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8061CU,
 	    "Marvell Yukon 88E8061CU Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8061X,
 	    "Marvell Yukon 88E8061 SX/LX Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8062CU,
 	    "Marvell Yukon 88E8062CU Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8062X,
 	    "Marvell Yukon 88E8062 SX/LX Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8035,
 	    "Marvell Yukon 88E8035 Fast Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8036,
 	    "Marvell Yukon 88E8036 Fast Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8038,
 	    "Marvell Yukon 88E8038 Fast Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8039,
 	    "Marvell Yukon 88E8039 Fast Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8040,
 	    "Marvell Yukon 88E8040 Fast Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8040T,
 	    "Marvell Yukon 88E8040T Fast Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8042,
 	    "Marvell Yukon 88E8042 Fast Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_8048,
 	    "Marvell Yukon 88E8048 Fast Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_4361,
 	    "Marvell Yukon 88E8050 Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_4360,
 	    "Marvell Yukon 88E8052 Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_4362,
 	    "Marvell Yukon 88E8053 Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_4363,
 	    "Marvell Yukon 88E8055 Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_4364,
 	    "Marvell Yukon 88E8056 Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_4365,
 	    "Marvell Yukon 88E8070 Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_436A,
 	    "Marvell Yukon 88E8058 Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_436B,
 	    "Marvell Yukon 88E8071 Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_436C,
 	    "Marvell Yukon 88E8072 Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_436D,
 	    "Marvell Yukon 88E8055 Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_4370,
 	    "Marvell Yukon 88E8075 Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_4380,
 	    "Marvell Yukon 88E8057 Gigabit Ethernet" },
 	{ VENDORID_MARVELL, DEVICEID_MRVL_4381,
 	    "Marvell Yukon 88E8059 Gigabit Ethernet" },
 	{ VENDORID_DLINK, DEVICEID_DLINK_DGE550SX,
 	    "D-Link 550SX Gigabit Ethernet" },
 	{ VENDORID_DLINK, DEVICEID_DLINK_DGE560SX,
 	    "D-Link 560SX Gigabit Ethernet" },
 	{ VENDORID_DLINK, DEVICEID_DLINK_DGE560T,
 	    "D-Link 560T Gigabit Ethernet" }
 };
 
 static const char *model_name[] = {
 	"Yukon XL",
         "Yukon EC Ultra",
         "Yukon EX",
         "Yukon EC",
         "Yukon FE",
         "Yukon FE+",
         "Yukon Supreme",
         "Yukon Ultra 2",
         "Yukon Unknown",
         "Yukon Optima",
 };
 
 static int mskc_probe(device_t);
 static int mskc_attach(device_t);
 static int mskc_detach(device_t);
 static int mskc_shutdown(device_t);
 static int mskc_setup_rambuffer(struct msk_softc *);
 static int mskc_suspend(device_t);
 static int mskc_resume(device_t);
 static bus_dma_tag_t mskc_get_dma_tag(device_t, device_t);
 static void mskc_reset(struct msk_softc *);
 
 static int msk_probe(device_t);
 static int msk_attach(device_t);
 static int msk_detach(device_t);
 
 static void msk_tick(void *);
 static void msk_intr(void *);
 static void msk_intr_phy(struct msk_if_softc *);
 static void msk_intr_gmac(struct msk_if_softc *);
 static __inline void msk_rxput(struct msk_if_softc *);
 static int msk_handle_events(struct msk_softc *);
 static void msk_handle_hwerr(struct msk_if_softc *, uint32_t);
 static void msk_intr_hwerr(struct msk_softc *);
 #ifndef __NO_STRICT_ALIGNMENT
 static __inline void msk_fixup_rx(struct mbuf *);
 #endif
 static __inline void msk_rxcsum(struct msk_if_softc *, uint32_t, struct mbuf *);
 static void msk_rxeof(struct msk_if_softc *, uint32_t, uint32_t, int);
 static void msk_jumbo_rxeof(struct msk_if_softc *, uint32_t, uint32_t, int);
 static void msk_txeof(struct msk_if_softc *, int);
 static int msk_encap(struct msk_if_softc *, struct mbuf **);
 static void msk_start(struct ifnet *);
 static void msk_start_locked(struct ifnet *);
 static int msk_ioctl(struct ifnet *, u_long, caddr_t);
 static void msk_set_prefetch(struct msk_softc *, int, bus_addr_t, uint32_t);
 static void msk_set_rambuffer(struct msk_if_softc *);
 static void msk_set_tx_stfwd(struct msk_if_softc *);
 static void msk_init(void *);
 static void msk_init_locked(struct msk_if_softc *);
 static void msk_stop(struct msk_if_softc *);
 static void msk_watchdog(struct msk_if_softc *);
 static int msk_mediachange(struct ifnet *);
 static void msk_mediastatus(struct ifnet *, struct ifmediareq *);
 static void msk_phy_power(struct msk_softc *, int);
 static void msk_dmamap_cb(void *, bus_dma_segment_t *, int, int);
 static int msk_status_dma_alloc(struct msk_softc *);
 static void msk_status_dma_free(struct msk_softc *);
 static int msk_txrx_dma_alloc(struct msk_if_softc *);
 static int msk_rx_dma_jalloc(struct msk_if_softc *);
 static void msk_txrx_dma_free(struct msk_if_softc *);
 static void msk_rx_dma_jfree(struct msk_if_softc *);
 static int msk_rx_fill(struct msk_if_softc *, int);
 static int msk_init_rx_ring(struct msk_if_softc *);
 static int msk_init_jumbo_rx_ring(struct msk_if_softc *);
 static void msk_init_tx_ring(struct msk_if_softc *);
 static __inline void msk_discard_rxbuf(struct msk_if_softc *, int);
 static __inline void msk_discard_jumbo_rxbuf(struct msk_if_softc *, int);
 static int msk_newbuf(struct msk_if_softc *, int);
 static int msk_jumbo_newbuf(struct msk_if_softc *, int);
 
 static int msk_phy_readreg(struct msk_if_softc *, int, int);
 static int msk_phy_writereg(struct msk_if_softc *, int, int, int);
 static int msk_miibus_readreg(device_t, int, int);
 static int msk_miibus_writereg(device_t, int, int, int);
 static void msk_miibus_statchg(device_t);
 
 static void msk_rxfilter(struct msk_if_softc *);
 static void msk_setvlan(struct msk_if_softc *, struct ifnet *);
 
 static void msk_stats_clear(struct msk_if_softc *);
 static void msk_stats_update(struct msk_if_softc *);
 static int msk_sysctl_stat32(SYSCTL_HANDLER_ARGS);
 static int msk_sysctl_stat64(SYSCTL_HANDLER_ARGS);
 static void msk_sysctl_node(struct msk_if_softc *);
 static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int, int);
 static int sysctl_hw_msk_proc_limit(SYSCTL_HANDLER_ARGS);
 
 static device_method_t mskc_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		mskc_probe),
 	DEVMETHOD(device_attach,	mskc_attach),
 	DEVMETHOD(device_detach,	mskc_detach),
 	DEVMETHOD(device_suspend,	mskc_suspend),
 	DEVMETHOD(device_resume,	mskc_resume),
 	DEVMETHOD(device_shutdown,	mskc_shutdown),
 
 	DEVMETHOD(bus_get_dma_tag,	mskc_get_dma_tag),
 
 	DEVMETHOD_END
 };
 
 static driver_t mskc_driver = {
 	"mskc",
 	mskc_methods,
 	sizeof(struct msk_softc)
 };
 
 static devclass_t mskc_devclass;
 
 static device_method_t msk_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		msk_probe),
 	DEVMETHOD(device_attach,	msk_attach),
 	DEVMETHOD(device_detach,	msk_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 
 	/* MII interface */
 	DEVMETHOD(miibus_readreg,	msk_miibus_readreg),
 	DEVMETHOD(miibus_writereg,	msk_miibus_writereg),
 	DEVMETHOD(miibus_statchg,	msk_miibus_statchg),
 
 	DEVMETHOD_END
 };
 
 static driver_t msk_driver = {
 	"msk",
 	msk_methods,
 	sizeof(struct msk_if_softc)
 };
 
 static devclass_t msk_devclass;
 
 DRIVER_MODULE(mskc, pci, mskc_driver, mskc_devclass, NULL, NULL);
 DRIVER_MODULE(msk, mskc, msk_driver, msk_devclass, NULL, NULL);
 DRIVER_MODULE(miibus, msk, miibus_driver, miibus_devclass, NULL, NULL);
 
 static struct resource_spec msk_res_spec_io[] = {
 	{ SYS_RES_IOPORT,	PCIR_BAR(1),	RF_ACTIVE },
 	{ -1,			0,		0 }
 };
 
 static struct resource_spec msk_res_spec_mem[] = {
 	{ SYS_RES_MEMORY,	PCIR_BAR(0),	RF_ACTIVE },
 	{ -1,			0,		0 }
 };
 
 static struct resource_spec msk_irq_spec_legacy[] = {
 	{ SYS_RES_IRQ,		0,		RF_ACTIVE | RF_SHAREABLE },
 	{ -1,			0,		0 }
 };
 
 static struct resource_spec msk_irq_spec_msi[] = {
 	{ SYS_RES_IRQ,		1,		RF_ACTIVE },
 	{ -1,			0,		0 }
 };
 
 static int
 msk_miibus_readreg(device_t dev, int phy, int reg)
 {
 	struct msk_if_softc *sc_if;
 
 	sc_if = device_get_softc(dev);
 
 	return (msk_phy_readreg(sc_if, phy, reg));
 }
 
 static int
 msk_phy_readreg(struct msk_if_softc *sc_if, int phy, int reg)
 {
 	struct msk_softc *sc;
 	int i, val;
 
 	sc = sc_if->msk_softc;
 
         GMAC_WRITE_2(sc, sc_if->msk_port, GM_SMI_CTRL,
 	    GM_SMI_CT_PHY_AD(phy) | GM_SMI_CT_REG_AD(reg) | GM_SMI_CT_OP_RD);
 
 	for (i = 0; i < MSK_TIMEOUT; i++) {
 		DELAY(1);
 		val = GMAC_READ_2(sc, sc_if->msk_port, GM_SMI_CTRL);
 		if ((val & GM_SMI_CT_RD_VAL) != 0) {
 			val = GMAC_READ_2(sc, sc_if->msk_port, GM_SMI_DATA);
 			break;
 		}
 	}
 
 	if (i == MSK_TIMEOUT) {
 		if_printf(sc_if->msk_ifp, "phy failed to come ready\n");
 		val = 0;
 	}
 
 	return (val);
 }
 
 static int
 msk_miibus_writereg(device_t dev, int phy, int reg, int val)
 {
 	struct msk_if_softc *sc_if;
 
 	sc_if = device_get_softc(dev);
 
 	return (msk_phy_writereg(sc_if, phy, reg, val));
 }
 
 static int
 msk_phy_writereg(struct msk_if_softc *sc_if, int phy, int reg, int val)
 {
 	struct msk_softc *sc;
 	int i;
 
 	sc = sc_if->msk_softc;
 
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_SMI_DATA, val);
         GMAC_WRITE_2(sc, sc_if->msk_port, GM_SMI_CTRL,
 	    GM_SMI_CT_PHY_AD(phy) | GM_SMI_CT_REG_AD(reg));
 	for (i = 0; i < MSK_TIMEOUT; i++) {
 		DELAY(1);
 		if ((GMAC_READ_2(sc, sc_if->msk_port, GM_SMI_CTRL) &
 		    GM_SMI_CT_BUSY) == 0)
 			break;
 	}
 	if (i == MSK_TIMEOUT)
 		if_printf(sc_if->msk_ifp, "phy write timeout\n");
 
 	return (0);
 }
 
 static void
 msk_miibus_statchg(device_t dev)
 {
 	struct msk_softc *sc;
 	struct msk_if_softc *sc_if;
 	struct mii_data *mii;
 	struct ifnet *ifp;
 	uint32_t gmac;
 
 	sc_if = device_get_softc(dev);
 	sc = sc_if->msk_softc;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 
 	mii = device_get_softc(sc_if->msk_miibus);
 	ifp = sc_if->msk_ifp;
 	if (mii == NULL || ifp == NULL ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 
 	sc_if->msk_flags &= ~MSK_FLAG_LINK;
 	if ((mii->mii_media_status & (IFM_AVALID | IFM_ACTIVE)) ==
 	    (IFM_AVALID | IFM_ACTIVE)) {
 		switch (IFM_SUBTYPE(mii->mii_media_active)) {
 		case IFM_10_T:
 		case IFM_100_TX:
 			sc_if->msk_flags |= MSK_FLAG_LINK;
 			break;
 		case IFM_1000_T:
 		case IFM_1000_SX:
 		case IFM_1000_LX:
 		case IFM_1000_CX:
 			if ((sc_if->msk_flags & MSK_FLAG_FASTETHER) == 0)
 				sc_if->msk_flags |= MSK_FLAG_LINK;
 			break;
 		default:
 			break;
 		}
 	}
 
 	if ((sc_if->msk_flags & MSK_FLAG_LINK) != 0) {
 		/* Enable Tx FIFO Underrun. */
 		CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, GMAC_IRQ_MSK),
 		    GM_IS_TX_FF_UR | GM_IS_RX_FF_OR);
 		/*
 		 * Because mii(4) notify msk(4) that it detected link status
 		 * change, there is no need to enable automatic
 		 * speed/flow-control/duplex updates.
 		 */
 		gmac = GM_GPCR_AU_ALL_DIS;
 		switch (IFM_SUBTYPE(mii->mii_media_active)) {
 		case IFM_1000_SX:
 		case IFM_1000_T:
 			gmac |= GM_GPCR_SPEED_1000;
 			break;
 		case IFM_100_TX:
 			gmac |= GM_GPCR_SPEED_100;
 			break;
 		case IFM_10_T:
 			break;
 		}
 
 		if ((IFM_OPTIONS(mii->mii_media_active) &
 		    IFM_ETH_RXPAUSE) == 0)
 			gmac |= GM_GPCR_FC_RX_DIS;
 		if ((IFM_OPTIONS(mii->mii_media_active) &
 		     IFM_ETH_TXPAUSE) == 0)
 			gmac |= GM_GPCR_FC_TX_DIS;
 		if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0)
 			gmac |= GM_GPCR_DUP_FULL;
 		else
 			gmac |= GM_GPCR_FC_RX_DIS | GM_GPCR_FC_TX_DIS;
 		gmac |= GM_GPCR_RX_ENA | GM_GPCR_TX_ENA;
 		GMAC_WRITE_2(sc, sc_if->msk_port, GM_GP_CTRL, gmac);
 		/* Read again to ensure writing. */
 		GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL);
 		gmac = GMC_PAUSE_OFF;
 		if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) {
 			if ((IFM_OPTIONS(mii->mii_media_active) &
 			    IFM_ETH_RXPAUSE) != 0)
 				gmac = GMC_PAUSE_ON;
 		}
 		CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), gmac);
 
 		/* Enable PHY interrupt for FIFO underrun/overflow. */
 		msk_phy_writereg(sc_if, PHY_ADDR_MARV,
 		    PHY_MARV_INT_MASK, PHY_M_IS_FIFO_ERROR);
 	} else {
 		/*
 		 * Link state changed to down.
 		 * Disable PHY interrupts.
 		 */
 		msk_phy_writereg(sc_if, PHY_ADDR_MARV, PHY_MARV_INT_MASK, 0);
 		/* Disable Rx/Tx MAC. */
 		gmac = GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL);
 		if ((gmac & (GM_GPCR_RX_ENA | GM_GPCR_TX_ENA)) != 0) {
 			gmac &= ~(GM_GPCR_RX_ENA | GM_GPCR_TX_ENA);
 			GMAC_WRITE_2(sc, sc_if->msk_port, GM_GP_CTRL, gmac);
 			/* Read again to ensure writing. */
 			GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL);
 		}
 	}
 }
 
 static u_int
 msk_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
 {
 	uint32_t *mchash = arg;
 	uint32_t crc;
 
 	crc = ether_crc32_be(LLADDR(sdl), ETHER_ADDR_LEN);
 	/* Just want the 6 least significant bits. */
 	crc &= 0x3f;
 	/* Set the corresponding bit in the hash table. */
 	mchash[crc >> 5] |= 1 << (crc & 0x1f);
 
 	return (1);
 }
 
 static void
 msk_rxfilter(struct msk_if_softc *sc_if)
 {
 	struct msk_softc *sc;
 	struct ifnet *ifp;
 	uint32_t mchash[2];
 	uint16_t mode;
 
 	sc = sc_if->msk_softc;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 
 	ifp = sc_if->msk_ifp;
 
 	bzero(mchash, sizeof(mchash));
 	mode = GMAC_READ_2(sc, sc_if->msk_port, GM_RX_CTRL);
 	if ((ifp->if_flags & IFF_PROMISC) != 0)
 		mode &= ~(GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA);
 	else if ((ifp->if_flags & IFF_ALLMULTI) != 0) {
 		mode |= GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA;
 		mchash[0] = 0xffff;
 		mchash[1] = 0xffff;
 	} else {
 		mode |= GM_RXCR_UCF_ENA;
 		if_foreach_llmaddr(ifp, msk_hash_maddr, mchash);
 		if (mchash[0] != 0 || mchash[1] != 0)
 			mode |= GM_RXCR_MCF_ENA;
 	}
 
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_MC_ADDR_H1,
 	    mchash[0] & 0xffff);
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_MC_ADDR_H2,
 	    (mchash[0] >> 16) & 0xffff);
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_MC_ADDR_H3,
 	    mchash[1] & 0xffff);
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_MC_ADDR_H4,
 	    (mchash[1] >> 16) & 0xffff);
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_RX_CTRL, mode);
 }
 
 static void
 msk_setvlan(struct msk_if_softc *sc_if, struct ifnet *ifp)
 {
 	struct msk_softc *sc;
 
 	sc = sc_if->msk_softc;
 	if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) {
 		CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T),
 		    RX_VLAN_STRIP_ON);
 		CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T),
 		    TX_VLAN_TAG_ON);
 	} else {
 		CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T),
 		    RX_VLAN_STRIP_OFF);
 		CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T),
 		    TX_VLAN_TAG_OFF);
 	}
 }
 
 static int
 msk_rx_fill(struct msk_if_softc *sc_if, int jumbo)
 {
 	uint16_t idx;
 	int i;
 
 	if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 &&
 	    (sc_if->msk_ifp->if_capenable & IFCAP_RXCSUM) != 0) {
 		/* Wait until controller executes OP_TCPSTART command. */
 		for (i = 100; i > 0; i--) {
 			DELAY(100);
 			idx = CSR_READ_2(sc_if->msk_softc,
 			    Y2_PREF_Q_ADDR(sc_if->msk_rxq,
 			    PREF_UNIT_GET_IDX_REG));
 			if (idx != 0)
 				break;
 		}
 		if (i == 0) {
 			device_printf(sc_if->msk_if_dev,
 			    "prefetch unit stuck?\n");
 			return (ETIMEDOUT);
 		}
 		/*
 		 * Fill consumed LE with free buffer. This can be done
 		 * in Rx handler but we don't want to add special code
 		 * in fast handler.
 		 */
 		if (jumbo > 0) {
 			if (msk_jumbo_newbuf(sc_if, 0) != 0)
 				return (ENOBUFS);
 			bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_ring_tag,
 			    sc_if->msk_cdata.msk_jumbo_rx_ring_map,
 			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 		} else {
 			if (msk_newbuf(sc_if, 0) != 0)
 				return (ENOBUFS);
 			bus_dmamap_sync(sc_if->msk_cdata.msk_rx_ring_tag,
 			    sc_if->msk_cdata.msk_rx_ring_map,
 			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 		}
 		sc_if->msk_cdata.msk_rx_prod = 0;
 		CSR_WRITE_2(sc_if->msk_softc,
 		    Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG),
 		    sc_if->msk_cdata.msk_rx_prod);
 	}
 	return (0);
 }
 
 static int
 msk_init_rx_ring(struct msk_if_softc *sc_if)
 {
 	struct msk_ring_data *rd;
 	struct msk_rxdesc *rxd;
 	int i, nbuf, prod;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 
 	sc_if->msk_cdata.msk_rx_cons = 0;
 	sc_if->msk_cdata.msk_rx_prod = 0;
 	sc_if->msk_cdata.msk_rx_putwm = MSK_PUT_WM;
 
 	rd = &sc_if->msk_rdata;
 	bzero(rd->msk_rx_ring, sizeof(struct msk_rx_desc) * MSK_RX_RING_CNT);
 	for (i = prod = 0; i < MSK_RX_RING_CNT; i++) {
 		rxd = &sc_if->msk_cdata.msk_rxdesc[prod];
 		rxd->rx_m = NULL;
 		rxd->rx_le = &rd->msk_rx_ring[prod];
 		MSK_INC(prod, MSK_RX_RING_CNT);
 	}
 	nbuf = MSK_RX_BUF_CNT;
 	prod = 0;
 	/* Have controller know how to compute Rx checksum. */
 	if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 &&
 	    (sc_if->msk_ifp->if_capenable & IFCAP_RXCSUM) != 0) {
 #ifdef MSK_64BIT_DMA
 		rxd = &sc_if->msk_cdata.msk_rxdesc[prod];
 		rxd->rx_m = NULL;
 		rxd->rx_le = &rd->msk_rx_ring[prod];
 		rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 |
 		    ETHER_HDR_LEN);
 		rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER);
 		MSK_INC(prod, MSK_RX_RING_CNT);
 		MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT);
 #endif
 		rxd = &sc_if->msk_cdata.msk_rxdesc[prod];
 		rxd->rx_m = NULL;
 		rxd->rx_le = &rd->msk_rx_ring[prod];
 		rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 |
 		    ETHER_HDR_LEN);
 		rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER);
 		MSK_INC(prod, MSK_RX_RING_CNT);
 		MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT);
 		nbuf--;
 	}
 	for (i = 0; i < nbuf; i++) {
 		if (msk_newbuf(sc_if, prod) != 0)
 			return (ENOBUFS);
 		MSK_RX_INC(prod, MSK_RX_RING_CNT);
 	}
 
 	bus_dmamap_sync(sc_if->msk_cdata.msk_rx_ring_tag,
 	    sc_if->msk_cdata.msk_rx_ring_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	/* Update prefetch unit. */
 	sc_if->msk_cdata.msk_rx_prod = prod;
 	CSR_WRITE_2(sc_if->msk_softc,
 	    Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG),
 	    (sc_if->msk_cdata.msk_rx_prod + MSK_RX_RING_CNT - 1) %
 	    MSK_RX_RING_CNT);
 	if (msk_rx_fill(sc_if, 0) != 0)
 		return (ENOBUFS);
 	return (0);
 }
 
 static int
 msk_init_jumbo_rx_ring(struct msk_if_softc *sc_if)
 {
 	struct msk_ring_data *rd;
 	struct msk_rxdesc *rxd;
 	int i, nbuf, prod;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 
 	sc_if->msk_cdata.msk_rx_cons = 0;
 	sc_if->msk_cdata.msk_rx_prod = 0;
 	sc_if->msk_cdata.msk_rx_putwm = MSK_PUT_WM;
 
 	rd = &sc_if->msk_rdata;
 	bzero(rd->msk_jumbo_rx_ring,
 	    sizeof(struct msk_rx_desc) * MSK_JUMBO_RX_RING_CNT);
 	for (i = prod = 0; i < MSK_JUMBO_RX_RING_CNT; i++) {
 		rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod];
 		rxd->rx_m = NULL;
 		rxd->rx_le = &rd->msk_jumbo_rx_ring[prod];
 		MSK_INC(prod, MSK_JUMBO_RX_RING_CNT);
 	}
 	nbuf = MSK_RX_BUF_CNT;
 	prod = 0;
 	/* Have controller know how to compute Rx checksum. */
 	if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 &&
 	    (sc_if->msk_ifp->if_capenable & IFCAP_RXCSUM) != 0) {
 #ifdef MSK_64BIT_DMA
 		rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod];
 		rxd->rx_m = NULL;
 		rxd->rx_le = &rd->msk_jumbo_rx_ring[prod];
 		rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 |
 		    ETHER_HDR_LEN);
 		rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER);
 		MSK_INC(prod, MSK_JUMBO_RX_RING_CNT);
 		MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT);
 #endif
 		rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod];
 		rxd->rx_m = NULL;
 		rxd->rx_le = &rd->msk_jumbo_rx_ring[prod];
 		rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 |
 		    ETHER_HDR_LEN);
 		rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER);
 		MSK_INC(prod, MSK_JUMBO_RX_RING_CNT);
 		MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT);
 		nbuf--;
 	}
 	for (i = 0; i < nbuf; i++) {
 		if (msk_jumbo_newbuf(sc_if, prod) != 0)
 			return (ENOBUFS);
 		MSK_RX_INC(prod, MSK_JUMBO_RX_RING_CNT);
 	}
 
 	bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_ring_tag,
 	    sc_if->msk_cdata.msk_jumbo_rx_ring_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	/* Update prefetch unit. */
 	sc_if->msk_cdata.msk_rx_prod = prod;
 	CSR_WRITE_2(sc_if->msk_softc,
 	    Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG),
 	    (sc_if->msk_cdata.msk_rx_prod + MSK_JUMBO_RX_RING_CNT - 1) %
 	    MSK_JUMBO_RX_RING_CNT);
 	if (msk_rx_fill(sc_if, 1) != 0)
 		return (ENOBUFS);
 	return (0);
 }
 
 static void
 msk_init_tx_ring(struct msk_if_softc *sc_if)
 {
 	struct msk_ring_data *rd;
 	struct msk_txdesc *txd;
 	int i;
 
 	sc_if->msk_cdata.msk_tso_mtu = 0;
 	sc_if->msk_cdata.msk_last_csum = 0;
 	sc_if->msk_cdata.msk_tx_prod = 0;
 	sc_if->msk_cdata.msk_tx_cons = 0;
 	sc_if->msk_cdata.msk_tx_cnt = 0;
 	sc_if->msk_cdata.msk_tx_high_addr = 0;
 
 	rd = &sc_if->msk_rdata;
 	bzero(rd->msk_tx_ring, sizeof(struct msk_tx_desc) * MSK_TX_RING_CNT);
 	for (i = 0; i < MSK_TX_RING_CNT; i++) {
 		txd = &sc_if->msk_cdata.msk_txdesc[i];
 		txd->tx_m = NULL;
 		txd->tx_le = &rd->msk_tx_ring[i];
 	}
 
 	bus_dmamap_sync(sc_if->msk_cdata.msk_tx_ring_tag,
 	    sc_if->msk_cdata.msk_tx_ring_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 }
 
 static __inline void
 msk_discard_rxbuf(struct msk_if_softc *sc_if, int idx)
 {
 	struct msk_rx_desc *rx_le;
 	struct msk_rxdesc *rxd;
 	struct mbuf *m;
 
 #ifdef MSK_64BIT_DMA
 	rxd = &sc_if->msk_cdata.msk_rxdesc[idx];
 	rx_le = rxd->rx_le;
 	rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
 	MSK_INC(idx, MSK_RX_RING_CNT);
 #endif
 	rxd = &sc_if->msk_cdata.msk_rxdesc[idx];
 	m = rxd->rx_m;
 	rx_le = rxd->rx_le;
 	rx_le->msk_control = htole32(m->m_len | OP_PACKET | HW_OWNER);
 }
 
 static __inline void
 msk_discard_jumbo_rxbuf(struct msk_if_softc *sc_if, int	idx)
 {
 	struct msk_rx_desc *rx_le;
 	struct msk_rxdesc *rxd;
 	struct mbuf *m;
 
 #ifdef MSK_64BIT_DMA
 	rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx];
 	rx_le = rxd->rx_le;
 	rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
 	MSK_INC(idx, MSK_JUMBO_RX_RING_CNT);
 #endif
 	rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx];
 	m = rxd->rx_m;
 	rx_le = rxd->rx_le;
 	rx_le->msk_control = htole32(m->m_len | OP_PACKET | HW_OWNER);
 }
 
 static int
 msk_newbuf(struct msk_if_softc *sc_if, int idx)
 {
 	struct msk_rx_desc *rx_le;
 	struct msk_rxdesc *rxd;
 	struct mbuf *m;
 	bus_dma_segment_t segs[1];
 	bus_dmamap_t map;
 	int nsegs;
 
 	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	m->m_len = m->m_pkthdr.len = MCLBYTES;
 	if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0)
 		m_adj(m, ETHER_ALIGN);
 #ifndef __NO_STRICT_ALIGNMENT
 	else
 		m_adj(m, MSK_RX_BUF_ALIGN);
 #endif
 
 	if (bus_dmamap_load_mbuf_sg(sc_if->msk_cdata.msk_rx_tag,
 	    sc_if->msk_cdata.msk_rx_sparemap, m, segs, &nsegs,
 	    BUS_DMA_NOWAIT) != 0) {
 		m_freem(m);
 		return (ENOBUFS);
 	}
 	KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
 
 	rxd = &sc_if->msk_cdata.msk_rxdesc[idx];
 #ifdef MSK_64BIT_DMA
 	rx_le = rxd->rx_le;
 	rx_le->msk_addr = htole32(MSK_ADDR_HI(segs[0].ds_addr));
 	rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
 	MSK_INC(idx, MSK_RX_RING_CNT);
 	rxd = &sc_if->msk_cdata.msk_rxdesc[idx];
 #endif
 	if (rxd->rx_m != NULL) {
 		bus_dmamap_sync(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap,
 		    BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap);
 		rxd->rx_m = NULL;
 	}
 	map = rxd->rx_dmamap;
 	rxd->rx_dmamap = sc_if->msk_cdata.msk_rx_sparemap;
 	sc_if->msk_cdata.msk_rx_sparemap = map;
 	bus_dmamap_sync(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap,
 	    BUS_DMASYNC_PREREAD);
 	rxd->rx_m = m;
 	rx_le = rxd->rx_le;
 	rx_le->msk_addr = htole32(MSK_ADDR_LO(segs[0].ds_addr));
 	rx_le->msk_control =
 	    htole32(segs[0].ds_len | OP_PACKET | HW_OWNER);
 
 	return (0);
 }
 
 static int
 msk_jumbo_newbuf(struct msk_if_softc *sc_if, int idx)
 {
 	struct msk_rx_desc *rx_le;
 	struct msk_rxdesc *rxd;
 	struct mbuf *m;
 	bus_dma_segment_t segs[1];
 	bus_dmamap_t map;
 	int nsegs;
 
 	m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES);
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_len = m->m_pkthdr.len = MJUM9BYTES;
 	if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0)
 		m_adj(m, ETHER_ALIGN);
 #ifndef __NO_STRICT_ALIGNMENT
 	else
 		m_adj(m, MSK_RX_BUF_ALIGN);
 #endif
 
 	if (bus_dmamap_load_mbuf_sg(sc_if->msk_cdata.msk_jumbo_rx_tag,
 	    sc_if->msk_cdata.msk_jumbo_rx_sparemap, m, segs, &nsegs,
 	    BUS_DMA_NOWAIT) != 0) {
 		m_freem(m);
 		return (ENOBUFS);
 	}
 	KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
 
 	rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx];
 #ifdef MSK_64BIT_DMA
 	rx_le = rxd->rx_le;
 	rx_le->msk_addr = htole32(MSK_ADDR_HI(segs[0].ds_addr));
 	rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
 	MSK_INC(idx, MSK_JUMBO_RX_RING_CNT);
 	rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx];
 #endif
 	if (rxd->rx_m != NULL) {
 		bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_tag,
 		    rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc_if->msk_cdata.msk_jumbo_rx_tag,
 		    rxd->rx_dmamap);
 		rxd->rx_m = NULL;
 	}
 	map = rxd->rx_dmamap;
 	rxd->rx_dmamap = sc_if->msk_cdata.msk_jumbo_rx_sparemap;
 	sc_if->msk_cdata.msk_jumbo_rx_sparemap = map;
 	bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_tag, rxd->rx_dmamap,
 	    BUS_DMASYNC_PREREAD);
 	rxd->rx_m = m;
 	rx_le = rxd->rx_le;
 	rx_le->msk_addr = htole32(MSK_ADDR_LO(segs[0].ds_addr));
 	rx_le->msk_control =
 	    htole32(segs[0].ds_len | OP_PACKET | HW_OWNER);
 
 	return (0);
 }
 
 /*
  * Set media options.
  */
 static int
 msk_mediachange(struct ifnet *ifp)
 {
 	struct msk_if_softc *sc_if;
 	struct mii_data	*mii;
 	int error;
 
 	sc_if = ifp->if_softc;
 
 	MSK_IF_LOCK(sc_if);
 	mii = device_get_softc(sc_if->msk_miibus);
 	error = mii_mediachg(mii);
 	MSK_IF_UNLOCK(sc_if);
 
 	return (error);
 }
 
 /*
  * Report current media status.
  */
 static void
 msk_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct msk_if_softc *sc_if;
 	struct mii_data	*mii;
 
 	sc_if = ifp->if_softc;
 	MSK_IF_LOCK(sc_if);
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		MSK_IF_UNLOCK(sc_if);
 		return;
 	}
 	mii = device_get_softc(sc_if->msk_miibus);
 
 	mii_pollstat(mii);
 	ifmr->ifm_active = mii->mii_media_active;
 	ifmr->ifm_status = mii->mii_media_status;
 	MSK_IF_UNLOCK(sc_if);
 }
 
 static int
 msk_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct msk_if_softc *sc_if;
 	struct ifreq *ifr;
 	struct mii_data	*mii;
 	int error, mask, reinit;
 
 	sc_if = ifp->if_softc;
 	ifr = (struct ifreq *)data;
 	error = 0;
 
 	switch(command) {
 	case SIOCSIFMTU:
 		MSK_IF_LOCK(sc_if);
 		if (ifr->ifr_mtu > MSK_JUMBO_MTU || ifr->ifr_mtu < ETHERMIN)
 			error = EINVAL;
 		else if (ifp->if_mtu != ifr->ifr_mtu) {
 			if (ifr->ifr_mtu > ETHERMTU) {
 				if ((sc_if->msk_flags & MSK_FLAG_JUMBO) == 0) {
 					error = EINVAL;
 					MSK_IF_UNLOCK(sc_if);
 					break;
 				}
 				if ((sc_if->msk_flags &
 				    MSK_FLAG_JUMBO_NOCSUM) != 0) {
 					ifp->if_hwassist &=
 					    ~(MSK_CSUM_FEATURES | CSUM_TSO);
 					ifp->if_capenable &=
 					    ~(IFCAP_TSO4 | IFCAP_TXCSUM);
 					VLAN_CAPABILITIES(ifp);
 				}
 			}
 			ifp->if_mtu = ifr->ifr_mtu;
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 				msk_init_locked(sc_if);
 			}
 		}
 		MSK_IF_UNLOCK(sc_if);
 		break;
 	case SIOCSIFFLAGS:
 		MSK_IF_LOCK(sc_if);
 		if ((ifp->if_flags & IFF_UP) != 0) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 &&
 			    ((ifp->if_flags ^ sc_if->msk_if_flags) &
 			    (IFF_PROMISC | IFF_ALLMULTI)) != 0)
 				msk_rxfilter(sc_if);
 			else if ((sc_if->msk_flags & MSK_FLAG_DETACH) == 0)
 				msk_init_locked(sc_if);
 		} else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 			msk_stop(sc_if);
 		sc_if->msk_if_flags = ifp->if_flags;
 		MSK_IF_UNLOCK(sc_if);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		MSK_IF_LOCK(sc_if);
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 			msk_rxfilter(sc_if);
 		MSK_IF_UNLOCK(sc_if);
 		break;
 	case SIOCGIFMEDIA:
 	case SIOCSIFMEDIA:
 		mii = device_get_softc(sc_if->msk_miibus);
 		error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
 		break;
 	case SIOCSIFCAP:
 		reinit = 0;
 		MSK_IF_LOCK(sc_if);
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		if ((mask & IFCAP_TXCSUM) != 0 &&
 		    (IFCAP_TXCSUM & ifp->if_capabilities) != 0) {
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 			if ((IFCAP_TXCSUM & ifp->if_capenable) != 0)
 				ifp->if_hwassist |= MSK_CSUM_FEATURES;
 			else
 				ifp->if_hwassist &= ~MSK_CSUM_FEATURES;
 		}
 		if ((mask & IFCAP_RXCSUM) != 0 &&
 		    (IFCAP_RXCSUM & ifp->if_capabilities) != 0) {
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 			if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0)
 				reinit = 1;
 		}
 		if ((mask & IFCAP_VLAN_HWCSUM) != 0 &&
 		    (IFCAP_VLAN_HWCSUM & ifp->if_capabilities) != 0)
 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
 		if ((mask & IFCAP_TSO4) != 0 &&
 		    (IFCAP_TSO4 & ifp->if_capabilities) != 0) {
 			ifp->if_capenable ^= IFCAP_TSO4;
 			if ((IFCAP_TSO4 & ifp->if_capenable) != 0)
 				ifp->if_hwassist |= CSUM_TSO;
 			else
 				ifp->if_hwassist &= ~CSUM_TSO;
 		}
 		if ((mask & IFCAP_VLAN_HWTSO) != 0 &&
 		    (IFCAP_VLAN_HWTSO & ifp->if_capabilities) != 0)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
 		if ((mask & IFCAP_VLAN_HWTAGGING) != 0 &&
 		    (IFCAP_VLAN_HWTAGGING & ifp->if_capabilities) != 0) {
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 			if ((IFCAP_VLAN_HWTAGGING & ifp->if_capenable) == 0)
 				ifp->if_capenable &=
 				    ~(IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM);
 			msk_setvlan(sc_if, ifp);
 		}
 		if (ifp->if_mtu > ETHERMTU &&
 		    (sc_if->msk_flags & MSK_FLAG_JUMBO_NOCSUM) != 0) {
 			ifp->if_hwassist &= ~(MSK_CSUM_FEATURES | CSUM_TSO);
 			ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TXCSUM);
 		}
 		VLAN_CAPABILITIES(ifp);
 		if (reinit > 0 && (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			msk_init_locked(sc_if);
 		}
 		MSK_IF_UNLOCK(sc_if);
 		break;
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 static int
 mskc_probe(device_t dev)
 {
 	const struct msk_product *mp;
 	uint16_t vendor, devid;
 	int i;
 
 	vendor = pci_get_vendor(dev);
 	devid = pci_get_device(dev);
 	mp = msk_products;
 	for (i = 0; i < nitems(msk_products); i++, mp++) {
 		if (vendor == mp->msk_vendorid && devid == mp->msk_deviceid) {
 			device_set_desc(dev, mp->msk_name);
 			return (BUS_PROBE_DEFAULT);
 		}
 	}
 
 	return (ENXIO);
 }
 
 static int
 mskc_setup_rambuffer(struct msk_softc *sc)
 {
 	int next;
 	int i;
 
 	/* Get adapter SRAM size. */
 	sc->msk_ramsize = CSR_READ_1(sc, B2_E_0) * 4;
 	if (bootverbose)
 		device_printf(sc->msk_dev,
 		    "RAM buffer size : %dKB\n", sc->msk_ramsize);
 	if (sc->msk_ramsize == 0)
 		return (0);
 
 	sc->msk_pflags |= MSK_FLAG_RAMBUF;
 	/*
 	 * Give receiver 2/3 of memory and round down to the multiple
 	 * of 1024. Tx/Rx RAM buffer size of Yukon II should be multiple
 	 * of 1024.
 	 */
 	sc->msk_rxqsize = rounddown((sc->msk_ramsize * 1024 * 2) / 3, 1024);
 	sc->msk_txqsize = (sc->msk_ramsize * 1024) - sc->msk_rxqsize;
 	for (i = 0, next = 0; i < sc->msk_num_port; i++) {
 		sc->msk_rxqstart[i] = next;
 		sc->msk_rxqend[i] = next + sc->msk_rxqsize - 1;
 		next = sc->msk_rxqend[i] + 1;
 		sc->msk_txqstart[i] = next;
 		sc->msk_txqend[i] = next + sc->msk_txqsize - 1;
 		next = sc->msk_txqend[i] + 1;
 		if (bootverbose) {
 			device_printf(sc->msk_dev,
 			    "Port %d : Rx Queue %dKB(0x%08x:0x%08x)\n", i,
 			    sc->msk_rxqsize / 1024, sc->msk_rxqstart[i],
 			    sc->msk_rxqend[i]);
 			device_printf(sc->msk_dev,
 			    "Port %d : Tx Queue %dKB(0x%08x:0x%08x)\n", i,
 			    sc->msk_txqsize / 1024, sc->msk_txqstart[i],
 			    sc->msk_txqend[i]);
 		}
 	}
 
 	return (0);
 }
 
 static void
 msk_phy_power(struct msk_softc *sc, int mode)
 {
 	uint32_t our, val;
 	int i;
 
 	switch (mode) {
 	case MSK_PHY_POWERUP:
 		/* Switch power to VCC (WA for VAUX problem). */
 		CSR_WRITE_1(sc, B0_POWER_CTRL,
 		    PC_VAUX_ENA | PC_VCC_ENA | PC_VAUX_OFF | PC_VCC_ON);
 		/* Disable Core Clock Division, set Clock Select to 0. */
 		CSR_WRITE_4(sc, B2_Y2_CLK_CTRL, Y2_CLK_DIV_DIS);
 
 		val = 0;
 		if (sc->msk_hw_id == CHIP_ID_YUKON_XL &&
 		    sc->msk_hw_rev > CHIP_REV_YU_XL_A1) {
 			/* Enable bits are inverted. */
 			val = Y2_PCI_CLK_LNK1_DIS | Y2_COR_CLK_LNK1_DIS |
 			      Y2_CLK_GAT_LNK1_DIS | Y2_PCI_CLK_LNK2_DIS |
 			      Y2_COR_CLK_LNK2_DIS | Y2_CLK_GAT_LNK2_DIS;
 		}
 		/*
 		 * Enable PCI & Core Clock, enable clock gating for both Links.
 		 */
 		CSR_WRITE_1(sc, B2_Y2_CLK_GATE, val);
 
 		our = CSR_PCI_READ_4(sc, PCI_OUR_REG_1);
 		our &= ~(PCI_Y2_PHY1_POWD | PCI_Y2_PHY2_POWD);
 		if (sc->msk_hw_id == CHIP_ID_YUKON_XL) {
 			if (sc->msk_hw_rev > CHIP_REV_YU_XL_A1) {
 				/* Deassert Low Power for 1st PHY. */
 				our |= PCI_Y2_PHY1_COMA;
 				if (sc->msk_num_port > 1)
 					our |= PCI_Y2_PHY2_COMA;
 			}
 		}
 		if (sc->msk_hw_id == CHIP_ID_YUKON_EC_U ||
 		    sc->msk_hw_id == CHIP_ID_YUKON_EX ||
 		    sc->msk_hw_id >= CHIP_ID_YUKON_FE_P) {
 			val = CSR_PCI_READ_4(sc, PCI_OUR_REG_4);
 			val &= (PCI_FORCE_ASPM_REQUEST |
 			    PCI_ASPM_GPHY_LINK_DOWN | PCI_ASPM_INT_FIFO_EMPTY |
 			    PCI_ASPM_CLKRUN_REQUEST);
 			/* Set all bits to 0 except bits 15..12. */
 			CSR_PCI_WRITE_4(sc, PCI_OUR_REG_4, val);
 			val = CSR_PCI_READ_4(sc, PCI_OUR_REG_5);
 			val &= PCI_CTL_TIM_VMAIN_AV_MSK;
 			CSR_PCI_WRITE_4(sc, PCI_OUR_REG_5, val);
 			CSR_PCI_WRITE_4(sc, PCI_CFG_REG_1, 0);
 			CSR_WRITE_2(sc, B0_CTST, Y2_HW_WOL_ON);
 			/*
 			 * Disable status race, workaround for
 			 * Yukon EC Ultra & Yukon EX.
 			 */
 			val = CSR_READ_4(sc, B2_GP_IO);
 			val |= GLB_GPIO_STAT_RACE_DIS;
 			CSR_WRITE_4(sc, B2_GP_IO, val);
 			CSR_READ_4(sc, B2_GP_IO);
 		}
 		/* Release PHY from PowerDown/COMA mode. */
 		CSR_PCI_WRITE_4(sc, PCI_OUR_REG_1, our);
 
 		for (i = 0; i < sc->msk_num_port; i++) {
 			CSR_WRITE_2(sc, MR_ADDR(i, GMAC_LINK_CTRL),
 			    GMLC_RST_SET);
 			CSR_WRITE_2(sc, MR_ADDR(i, GMAC_LINK_CTRL),
 			    GMLC_RST_CLR);
 		}
 		break;
 	case MSK_PHY_POWERDOWN:
 		val = CSR_PCI_READ_4(sc, PCI_OUR_REG_1);
 		val |= PCI_Y2_PHY1_POWD | PCI_Y2_PHY2_POWD;
 		if (sc->msk_hw_id == CHIP_ID_YUKON_XL &&
 		    sc->msk_hw_rev > CHIP_REV_YU_XL_A1) {
 			val &= ~PCI_Y2_PHY1_COMA;
 			if (sc->msk_num_port > 1)
 				val &= ~PCI_Y2_PHY2_COMA;
 		}
 		CSR_PCI_WRITE_4(sc, PCI_OUR_REG_1, val);
 
 		val = Y2_PCI_CLK_LNK1_DIS | Y2_COR_CLK_LNK1_DIS |
 		      Y2_CLK_GAT_LNK1_DIS | Y2_PCI_CLK_LNK2_DIS |
 		      Y2_COR_CLK_LNK2_DIS | Y2_CLK_GAT_LNK2_DIS;
 		if (sc->msk_hw_id == CHIP_ID_YUKON_XL &&
 		    sc->msk_hw_rev > CHIP_REV_YU_XL_A1) {
 			/* Enable bits are inverted. */
 			val = 0;
 		}
 		/*
 		 * Disable PCI & Core Clock, disable clock gating for
 		 * both Links.
 		 */
 		CSR_WRITE_1(sc, B2_Y2_CLK_GATE, val);
 		CSR_WRITE_1(sc, B0_POWER_CTRL,
 		    PC_VAUX_ENA | PC_VCC_ENA | PC_VAUX_ON | PC_VCC_OFF);
 		break;
 	default:
 		break;
 	}
 }
 
 static void
 mskc_reset(struct msk_softc *sc)
 {
 	bus_addr_t addr;
 	uint16_t status;
 	uint32_t val;
 	int i, initram;
 
 	/* Disable ASF. */
 	if (sc->msk_hw_id >= CHIP_ID_YUKON_XL &&
 	    sc->msk_hw_id <= CHIP_ID_YUKON_SUPR) {
 		if (sc->msk_hw_id == CHIP_ID_YUKON_EX ||
 		    sc->msk_hw_id == CHIP_ID_YUKON_SUPR) {
 			CSR_WRITE_4(sc, B28_Y2_CPU_WDOG, 0);
 			status = CSR_READ_2(sc, B28_Y2_ASF_HCU_CCSR);
 			/* Clear AHB bridge & microcontroller reset. */
 			status &= ~(Y2_ASF_HCU_CCSR_AHB_RST |
 			    Y2_ASF_HCU_CCSR_CPU_RST_MODE);
 			/* Clear ASF microcontroller state. */
 			status &= ~Y2_ASF_HCU_CCSR_UC_STATE_MSK;
 			status &= ~Y2_ASF_HCU_CCSR_CPU_CLK_DIVIDE_MSK;
 			CSR_WRITE_2(sc, B28_Y2_ASF_HCU_CCSR, status);
 			CSR_WRITE_4(sc, B28_Y2_CPU_WDOG, 0);
 		} else
 			CSR_WRITE_1(sc, B28_Y2_ASF_STAT_CMD, Y2_ASF_RESET);
 		CSR_WRITE_2(sc, B0_CTST, Y2_ASF_DISABLE);
 		/*
 		 * Since we disabled ASF, S/W reset is required for
 		 * Power Management.
 		 */
 		CSR_WRITE_2(sc, B0_CTST, CS_RST_SET);
 		CSR_WRITE_2(sc, B0_CTST, CS_RST_CLR);
 	}
 
 	/* Clear all error bits in the PCI status register. */
 	status = pci_read_config(sc->msk_dev, PCIR_STATUS, 2);
 	CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_ON);
 
 	pci_write_config(sc->msk_dev, PCIR_STATUS, status |
 	    PCIM_STATUS_PERR | PCIM_STATUS_SERR | PCIM_STATUS_RMABORT |
 	    PCIM_STATUS_RTABORT | PCIM_STATUS_MDPERR, 2);
 	CSR_WRITE_2(sc, B0_CTST, CS_MRST_CLR);
 
 	switch (sc->msk_bustype) {
 	case MSK_PEX_BUS:
 		/* Clear all PEX errors. */
 		CSR_PCI_WRITE_4(sc, PEX_UNC_ERR_STAT, 0xffffffff);
 		val = CSR_PCI_READ_4(sc, PEX_UNC_ERR_STAT);
 		if ((val & PEX_RX_OV) != 0) {
 			sc->msk_intrmask &= ~Y2_IS_HW_ERR;
 			sc->msk_intrhwemask &= ~Y2_IS_PCI_EXP;
 		}
 		break;
 	case MSK_PCI_BUS:
 	case MSK_PCIX_BUS:
 		/* Set Cache Line Size to 2(8bytes) if configured to 0. */
 		val = pci_read_config(sc->msk_dev, PCIR_CACHELNSZ, 1);
 		if (val == 0)
 			pci_write_config(sc->msk_dev, PCIR_CACHELNSZ, 2, 1);
 		if (sc->msk_bustype == MSK_PCIX_BUS) {
 			/* Set Cache Line Size opt. */
 			val = pci_read_config(sc->msk_dev, PCI_OUR_REG_1, 4);
 			val |= PCI_CLS_OPT;
 			pci_write_config(sc->msk_dev, PCI_OUR_REG_1, val, 4);
 		}
 		break;
 	}
 	/* Set PHY power state. */
 	msk_phy_power(sc, MSK_PHY_POWERUP);
 
 	/* Reset GPHY/GMAC Control */
 	for (i = 0; i < sc->msk_num_port; i++) {
 		/* GPHY Control reset. */
 		CSR_WRITE_1(sc, MR_ADDR(i, GPHY_CTRL), GPC_RST_SET);
 		CSR_WRITE_1(sc, MR_ADDR(i, GPHY_CTRL), GPC_RST_CLR);
 		/* GMAC Control reset. */
 		CSR_WRITE_4(sc, MR_ADDR(i, GMAC_CTRL), GMC_RST_SET);
 		CSR_WRITE_4(sc, MR_ADDR(i, GMAC_CTRL), GMC_RST_CLR);
 		CSR_WRITE_4(sc, MR_ADDR(i, GMAC_CTRL), GMC_F_LOOPB_OFF);
 		if (sc->msk_hw_id == CHIP_ID_YUKON_EX ||
 		    sc->msk_hw_id == CHIP_ID_YUKON_SUPR)
 			CSR_WRITE_4(sc, MR_ADDR(i, GMAC_CTRL),
 			    GMC_BYP_MACSECRX_ON | GMC_BYP_MACSECTX_ON |
 			    GMC_BYP_RETR_ON);
 	}
 
 	if (sc->msk_hw_id == CHIP_ID_YUKON_SUPR &&
 	    sc->msk_hw_rev > CHIP_REV_YU_SU_B0)
 		CSR_PCI_WRITE_4(sc, PCI_OUR_REG_3, PCI_CLK_MACSEC_DIS);
 	if (sc->msk_hw_id == CHIP_ID_YUKON_OPT && sc->msk_hw_rev == 0) {
 		/* Disable PCIe PHY powerdown(reg 0x80, bit7). */
 		CSR_WRITE_4(sc, Y2_PEX_PHY_DATA, (0x0080 << 16) | 0x0080);
 	}
 	CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
 
 	/* LED On. */
 	CSR_WRITE_2(sc, B0_CTST, Y2_LED_STAT_ON);
 
 	/* Clear TWSI IRQ. */
 	CSR_WRITE_4(sc, B2_I2C_IRQ, I2C_CLR_IRQ);
 
 	/* Turn off hardware timer. */
 	CSR_WRITE_1(sc, B2_TI_CTRL, TIM_STOP);
 	CSR_WRITE_1(sc, B2_TI_CTRL, TIM_CLR_IRQ);
 
 	/* Turn off descriptor polling. */
 	CSR_WRITE_1(sc, B28_DPT_CTRL, DPT_STOP);
 
 	/* Turn off time stamps. */
 	CSR_WRITE_1(sc, GMAC_TI_ST_CTRL, GMT_ST_STOP);
 	CSR_WRITE_1(sc, GMAC_TI_ST_CTRL, GMT_ST_CLR_IRQ);
 
 	initram = 0;
 	if (sc->msk_hw_id == CHIP_ID_YUKON_XL ||
 	    sc->msk_hw_id == CHIP_ID_YUKON_EC ||
 	    sc->msk_hw_id == CHIP_ID_YUKON_FE)
 		initram++;
 
 	/* Configure timeout values. */
 	for (i = 0; initram > 0 && i < sc->msk_num_port; i++) {
 		CSR_WRITE_2(sc, SELECT_RAM_BUFFER(i, B3_RI_CTRL), RI_RST_SET);
 		CSR_WRITE_2(sc, SELECT_RAM_BUFFER(i, B3_RI_CTRL), RI_RST_CLR);
 		CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_R1),
 		    MSK_RI_TO_53);
 		CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_XA1),
 		    MSK_RI_TO_53);
 		CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_XS1),
 		    MSK_RI_TO_53);
 		CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_R1),
 		    MSK_RI_TO_53);
 		CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_XA1),
 		    MSK_RI_TO_53);
 		CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_XS1),
 		    MSK_RI_TO_53);
 		CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_R2),
 		    MSK_RI_TO_53);
 		CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_XA2),
 		    MSK_RI_TO_53);
 		CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_XS2),
 		    MSK_RI_TO_53);
 		CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_R2),
 		    MSK_RI_TO_53);
 		CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_XA2),
 		    MSK_RI_TO_53);
 		CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_XS2),
 		    MSK_RI_TO_53);
 	}
 
 	/* Disable all interrupts. */
 	CSR_WRITE_4(sc, B0_HWE_IMSK, 0);
 	CSR_READ_4(sc, B0_HWE_IMSK);
 	CSR_WRITE_4(sc, B0_IMSK, 0);
 	CSR_READ_4(sc, B0_IMSK);
 
         /*
          * On dual port PCI-X card, there is an problem where status
          * can be received out of order due to split transactions.
          */
 	if (sc->msk_pcixcap != 0 && sc->msk_num_port > 1) {
 		uint16_t pcix_cmd;
 
 		pcix_cmd = pci_read_config(sc->msk_dev,
 		    sc->msk_pcixcap + PCIXR_COMMAND, 2);
 		/* Clear Max Outstanding Split Transactions. */
 		pcix_cmd &= ~PCIXM_COMMAND_MAX_SPLITS;
 		CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_ON);
 		pci_write_config(sc->msk_dev,
 		    sc->msk_pcixcap + PCIXR_COMMAND, pcix_cmd, 2);
 		CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
         }
 	if (sc->msk_expcap != 0) {
 		/* Change Max. Read Request Size to 2048 bytes. */
 		if (pci_get_max_read_req(sc->msk_dev) == 512)
 			pci_set_max_read_req(sc->msk_dev, 2048);
 	}
 
 	/* Clear status list. */
 	bzero(sc->msk_stat_ring,
 	    sizeof(struct msk_stat_desc) * sc->msk_stat_count);
 	sc->msk_stat_cons = 0;
 	bus_dmamap_sync(sc->msk_stat_tag, sc->msk_stat_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	CSR_WRITE_4(sc, STAT_CTRL, SC_STAT_RST_SET);
 	CSR_WRITE_4(sc, STAT_CTRL, SC_STAT_RST_CLR);
 	/* Set the status list base address. */
 	addr = sc->msk_stat_ring_paddr;
 	CSR_WRITE_4(sc, STAT_LIST_ADDR_LO, MSK_ADDR_LO(addr));
 	CSR_WRITE_4(sc, STAT_LIST_ADDR_HI, MSK_ADDR_HI(addr));
 	/* Set the status list last index. */
 	CSR_WRITE_2(sc, STAT_LAST_IDX, sc->msk_stat_count - 1);
 	if (sc->msk_hw_id == CHIP_ID_YUKON_EC &&
 	    sc->msk_hw_rev == CHIP_REV_YU_EC_A1) {
 		/* WA for dev. #4.3 */
 		CSR_WRITE_2(sc, STAT_TX_IDX_TH, ST_TXTH_IDX_MASK);
 		/* WA for dev. #4.18 */
 		CSR_WRITE_1(sc, STAT_FIFO_WM, 0x21);
 		CSR_WRITE_1(sc, STAT_FIFO_ISR_WM, 0x07);
 	} else {
 		CSR_WRITE_2(sc, STAT_TX_IDX_TH, 0x0a);
 		CSR_WRITE_1(sc, STAT_FIFO_WM, 0x10);
 		if (sc->msk_hw_id == CHIP_ID_YUKON_XL &&
 		    sc->msk_hw_rev == CHIP_REV_YU_XL_A0)
 			CSR_WRITE_1(sc, STAT_FIFO_ISR_WM, 0x04);
 		else
 			CSR_WRITE_1(sc, STAT_FIFO_ISR_WM, 0x10);
 		CSR_WRITE_4(sc, STAT_ISR_TIMER_INI, 0x0190);
 	}
 	/*
 	 * Use default value for STAT_ISR_TIMER_INI, STAT_LEV_TIMER_INI.
 	 */
 	CSR_WRITE_4(sc, STAT_TX_TIMER_INI, MSK_USECS(sc, 1000));
 
 	/* Enable status unit. */
 	CSR_WRITE_4(sc, STAT_CTRL, SC_STAT_OP_ON);
 
 	CSR_WRITE_1(sc, STAT_TX_TIMER_CTRL, TIM_START);
 	CSR_WRITE_1(sc, STAT_LEV_TIMER_CTRL, TIM_START);
 	CSR_WRITE_1(sc, STAT_ISR_TIMER_CTRL, TIM_START);
 }
 
 static int
 msk_probe(device_t dev)
 {
 	struct msk_softc *sc;
 	char desc[100];
 
 	sc = device_get_softc(device_get_parent(dev));
 	/*
 	 * Not much to do here. We always know there will be
 	 * at least one GMAC present, and if there are two,
 	 * mskc_attach() will create a second device instance
 	 * for us.
 	 */
 	snprintf(desc, sizeof(desc),
 	    "Marvell Technology Group Ltd. %s Id 0x%02x Rev 0x%02x",
 	    model_name[sc->msk_hw_id - CHIP_ID_YUKON_XL], sc->msk_hw_id,
 	    sc->msk_hw_rev);
 	device_set_desc_copy(dev, desc);
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 msk_attach(device_t dev)
 {
 	struct msk_softc *sc;
 	struct msk_if_softc *sc_if;
 	struct ifnet *ifp;
 	struct msk_mii_data *mmd;
 	int i, port, error;
 	uint8_t eaddr[6];
 
 	if (dev == NULL)
 		return (EINVAL);
 
 	error = 0;
 	sc_if = device_get_softc(dev);
 	sc = device_get_softc(device_get_parent(dev));
 	mmd = device_get_ivars(dev);
 	port = mmd->port;
 
 	sc_if->msk_if_dev = dev;
 	sc_if->msk_port = port;
 	sc_if->msk_softc = sc;
 	sc_if->msk_flags = sc->msk_pflags;
 	sc->msk_if[port] = sc_if;
 	/* Setup Tx/Rx queue register offsets. */
 	if (port == MSK_PORT_A) {
 		sc_if->msk_txq = Q_XA1;
 		sc_if->msk_txsq = Q_XS1;
 		sc_if->msk_rxq = Q_R1;
 	} else {
 		sc_if->msk_txq = Q_XA2;
 		sc_if->msk_txsq = Q_XS2;
 		sc_if->msk_rxq = Q_R2;
 	}
 
 	callout_init_mtx(&sc_if->msk_tick_ch, &sc_if->msk_softc->msk_mtx, 0);
 	msk_sysctl_node(sc_if);
 
 	if ((error = msk_txrx_dma_alloc(sc_if)) != 0)
 		goto fail;
 	msk_rx_dma_jalloc(sc_if);
 
 	ifp = sc_if->msk_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(sc_if->msk_if_dev, "can not if_alloc()\n");
 		error = ENOSPC;
 		goto fail;
 	}
 	ifp->if_softc = sc_if;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_TSO4;
 	/*
 	 * Enable Rx checksum offloading if controller supports
 	 * new descriptor formant and controller is not Yukon XL.
 	 */
 	if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 &&
 	    sc->msk_hw_id != CHIP_ID_YUKON_XL)
 		ifp->if_capabilities |= IFCAP_RXCSUM;
 	if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0 &&
 	    (sc_if->msk_flags & MSK_FLAG_NORX_CSUM) == 0)
 		ifp->if_capabilities |= IFCAP_RXCSUM;
 	ifp->if_hwassist = MSK_CSUM_FEATURES | CSUM_TSO;
 	ifp->if_capenable = ifp->if_capabilities;
 	ifp->if_ioctl = msk_ioctl;
 	ifp->if_start = msk_start;
 	ifp->if_init = msk_init;
 	IFQ_SET_MAXLEN(&ifp->if_snd, MSK_TX_RING_CNT - 1);
 	ifp->if_snd.ifq_drv_maxlen = MSK_TX_RING_CNT - 1;
 	IFQ_SET_READY(&ifp->if_snd);
 	/*
 	 * Get station address for this interface. Note that
 	 * dual port cards actually come with three station
 	 * addresses: one for each port, plus an extra. The
 	 * extra one is used by the SysKonnect driver software
 	 * as a 'virtual' station address for when both ports
 	 * are operating in failover mode. Currently we don't
 	 * use this extra address.
 	 */
 	MSK_IF_LOCK(sc_if);
 	for (i = 0; i < ETHER_ADDR_LEN; i++)
 		eaddr[i] = CSR_READ_1(sc, B2_MAC_1 + (port * 8) + i);
 
 	/*
 	 * Call MI attach routine.  Can't hold locks when calling into ether_*.
 	 */
 	MSK_IF_UNLOCK(sc_if);
 	ether_ifattach(ifp, eaddr);
 	MSK_IF_LOCK(sc_if);
 
 	/* VLAN capability setup */
 	ifp->if_capabilities |= IFCAP_VLAN_MTU;
 	if ((sc_if->msk_flags & MSK_FLAG_NOHWVLAN) == 0) {
 		/*
 		 * Due to Tx checksum offload hardware bugs, msk(4) manually
 		 * computes checksum for short frames. For VLAN tagged frames
 		 * this workaround does not work so disable checksum offload
 		 * for VLAN interface.
 		 */
 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO;
 		/*
 		 * Enable Rx checksum offloading for VLAN tagged frames
 		 * if controller support new descriptor format.
 		 */
 		if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0 &&
 		    (sc_if->msk_flags & MSK_FLAG_NORX_CSUM) == 0)
 			ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
 	}
 	ifp->if_capenable = ifp->if_capabilities;
 	/*
 	 * Disable RX checksum offloading on controllers that don't use
 	 * new descriptor format but give chance to enable it.
 	 */
 	if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0)
 		ifp->if_capenable &= ~IFCAP_RXCSUM;
 
 	/*
 	 * Tell the upper layer(s) we support long frames.
 	 * Must appear after the call to ether_ifattach() because
 	 * ether_ifattach() sets ifi_hdrlen to the default value.
 	 */
         ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 
 	/*
 	 * Do miibus setup.
 	 */
 	MSK_IF_UNLOCK(sc_if);
 	error = mii_attach(dev, &sc_if->msk_miibus, ifp, msk_mediachange,
 	    msk_mediastatus, BMSR_DEFCAPMASK, PHY_ADDR_MARV, MII_OFFSET_ANY,
 	    mmd->mii_flags);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev, "attaching PHYs failed\n");
 		ether_ifdetach(ifp);
 		error = ENXIO;
 		goto fail;
 	}
 
 fail:
 	if (error != 0) {
 		/* Access should be ok even though lock has been dropped */
 		sc->msk_if[port] = NULL;
 		msk_detach(dev);
 	}
 
 	return (error);
 }
 
 /*
  * Attach the interface. Allocate softc structures, do ifmedia
  * setup and ethernet/BPF attach.
  */
 static int
 mskc_attach(device_t dev)
 {
 	struct msk_softc *sc;
 	struct msk_mii_data *mmd;
 	int error, msic, msir, reg;
 
 	sc = device_get_softc(dev);
 	sc->msk_dev = dev;
 	mtx_init(&sc->msk_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
 	    MTX_DEF);
 
 	/*
 	 * Map control/status registers.
 	 */
 	pci_enable_busmaster(dev);
 
 	/* Allocate I/O resource */
 #ifdef MSK_USEIOSPACE
 	sc->msk_res_spec = msk_res_spec_io;
 #else
 	sc->msk_res_spec = msk_res_spec_mem;
 #endif
 	sc->msk_irq_spec = msk_irq_spec_legacy;
 	error = bus_alloc_resources(dev, sc->msk_res_spec, sc->msk_res);
 	if (error) {
 		if (sc->msk_res_spec == msk_res_spec_mem)
 			sc->msk_res_spec = msk_res_spec_io;
 		else
 			sc->msk_res_spec = msk_res_spec_mem;
 		error = bus_alloc_resources(dev, sc->msk_res_spec, sc->msk_res);
 		if (error) {
 			device_printf(dev, "couldn't allocate %s resources\n",
 			    sc->msk_res_spec == msk_res_spec_mem ? "memory" :
 			    "I/O");
 			mtx_destroy(&sc->msk_mtx);
 			return (ENXIO);
 		}
 	}
 
 	/* Enable all clocks before accessing any registers. */
 	CSR_PCI_WRITE_4(sc, PCI_OUR_REG_3, 0);
 
 	CSR_WRITE_2(sc, B0_CTST, CS_RST_CLR);
 	sc->msk_hw_id = CSR_READ_1(sc, B2_CHIP_ID);
 	sc->msk_hw_rev = (CSR_READ_1(sc, B2_MAC_CFG) >> 4) & 0x0f;
 	/* Bail out if chip is not recognized. */
 	if (sc->msk_hw_id < CHIP_ID_YUKON_XL ||
 	    sc->msk_hw_id > CHIP_ID_YUKON_OPT ||
 	    sc->msk_hw_id == CHIP_ID_YUKON_UNKNOWN) {
 		device_printf(dev, "unknown device: id=0x%02x, rev=0x%02x\n",
 		    sc->msk_hw_id, sc->msk_hw_rev);
 		mtx_destroy(&sc->msk_mtx);
 		return (ENXIO);
 	}
 
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "process_limit", CTLTYPE_INT | CTLFLAG_RW,
 	    &sc->msk_process_limit, 0, sysctl_hw_msk_proc_limit, "I",
 	    "max number of Rx events to process");
 
 	sc->msk_process_limit = MSK_PROC_DEFAULT;
 	error = resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "process_limit", &sc->msk_process_limit);
 	if (error == 0) {
 		if (sc->msk_process_limit < MSK_PROC_MIN ||
 		    sc->msk_process_limit > MSK_PROC_MAX) {
 			device_printf(dev, "process_limit value out of range; "
 			    "using default: %d\n", MSK_PROC_DEFAULT);
 			sc->msk_process_limit = MSK_PROC_DEFAULT;
 		}
 	}
 
 	sc->msk_int_holdoff = MSK_INT_HOLDOFF_DEFAULT;
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
 	    "int_holdoff", CTLFLAG_RW, &sc->msk_int_holdoff, 0,
 	    "Maximum number of time to delay interrupts");
 	resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "int_holdoff", &sc->msk_int_holdoff);
 
 	sc->msk_pmd = CSR_READ_1(sc, B2_PMD_TYP);
 	/* Check number of MACs. */
 	sc->msk_num_port = 1;
 	if ((CSR_READ_1(sc, B2_Y2_HW_RES) & CFG_DUAL_MAC_MSK) ==
 	    CFG_DUAL_MAC_MSK) {
 		if (!(CSR_READ_1(sc, B2_Y2_CLK_GATE) & Y2_STATUS_LNK2_INAC))
 			sc->msk_num_port++;
 	}
 
 	/* Check bus type. */
 	if (pci_find_cap(sc->msk_dev, PCIY_EXPRESS, &reg) == 0) {
 		sc->msk_bustype = MSK_PEX_BUS;
 		sc->msk_expcap = reg;
 	} else if (pci_find_cap(sc->msk_dev, PCIY_PCIX, &reg) == 0) {
 		sc->msk_bustype = MSK_PCIX_BUS;
 		sc->msk_pcixcap = reg;
 	} else
 		sc->msk_bustype = MSK_PCI_BUS;
 
 	switch (sc->msk_hw_id) {
 	case CHIP_ID_YUKON_EC:
 		sc->msk_clock = 125;	/* 125 MHz */
 		sc->msk_pflags |= MSK_FLAG_JUMBO;
 		break;
 	case CHIP_ID_YUKON_EC_U:
 		sc->msk_clock = 125;	/* 125 MHz */
 		sc->msk_pflags |= MSK_FLAG_JUMBO | MSK_FLAG_JUMBO_NOCSUM;
 		break;
 	case CHIP_ID_YUKON_EX:
 		sc->msk_clock = 125;	/* 125 MHz */
 		sc->msk_pflags |= MSK_FLAG_JUMBO | MSK_FLAG_DESCV2 |
 		    MSK_FLAG_AUTOTX_CSUM;
 		/*
 		 * Yukon Extreme seems to have silicon bug for
 		 * automatic Tx checksum calculation capability.
 		 */
 		if (sc->msk_hw_rev == CHIP_REV_YU_EX_B0)
 			sc->msk_pflags &= ~MSK_FLAG_AUTOTX_CSUM;
 		/*
 		 * Yukon Extreme A0 could not use store-and-forward
 		 * for jumbo frames, so disable Tx checksum
 		 * offloading for jumbo frames.
 		 */
 		if (sc->msk_hw_rev == CHIP_REV_YU_EX_A0)
 			sc->msk_pflags |= MSK_FLAG_JUMBO_NOCSUM;
 		break;
 	case CHIP_ID_YUKON_FE:
 		sc->msk_clock = 100;	/* 100 MHz */
 		sc->msk_pflags |= MSK_FLAG_FASTETHER;
 		break;
 	case CHIP_ID_YUKON_FE_P:
 		sc->msk_clock = 50;	/* 50 MHz */
 		sc->msk_pflags |= MSK_FLAG_FASTETHER | MSK_FLAG_DESCV2 |
 		    MSK_FLAG_AUTOTX_CSUM;
 		if (sc->msk_hw_rev == CHIP_REV_YU_FE_P_A0) {
 			/*
 			 * XXX
 			 * FE+ A0 has status LE writeback bug so msk(4)
 			 * does not rely on status word of received frame
 			 * in msk_rxeof() which in turn disables all
 			 * hardware assistance bits reported by the status
 			 * word as well as validity of the received frame.
 			 * Just pass received frames to upper stack with
 			 * minimal test and let upper stack handle them.
 			 */
 			sc->msk_pflags |= MSK_FLAG_NOHWVLAN |
 			    MSK_FLAG_NORXCHK | MSK_FLAG_NORX_CSUM;
 		}
 		break;
 	case CHIP_ID_YUKON_XL:
 		sc->msk_clock = 156;	/* 156 MHz */
 		sc->msk_pflags |= MSK_FLAG_JUMBO;
 		break;
 	case CHIP_ID_YUKON_SUPR:
 		sc->msk_clock = 125;	/* 125 MHz */
 		sc->msk_pflags |= MSK_FLAG_JUMBO | MSK_FLAG_DESCV2 |
 		    MSK_FLAG_AUTOTX_CSUM;
 		break;
 	case CHIP_ID_YUKON_UL_2:
 		sc->msk_clock = 125;	/* 125 MHz */
 		sc->msk_pflags |= MSK_FLAG_JUMBO;
 		break;
 	case CHIP_ID_YUKON_OPT:
 		sc->msk_clock = 125;	/* 125 MHz */
 		sc->msk_pflags |= MSK_FLAG_JUMBO | MSK_FLAG_DESCV2;
 		break;
 	default:
 		sc->msk_clock = 156;	/* 156 MHz */
 		break;
 	}
 
 	/* Allocate IRQ resources. */
 	msic = pci_msi_count(dev);
 	if (bootverbose)
 		device_printf(dev, "MSI count : %d\n", msic);
 	if (legacy_intr != 0)
 		msi_disable = 1;
 	if (msi_disable == 0 && msic > 0) {
 		msir = 1;
 		if (pci_alloc_msi(dev, &msir) == 0) {
 			if (msir == 1) {
 				sc->msk_pflags |= MSK_FLAG_MSI;
 				sc->msk_irq_spec = msk_irq_spec_msi;
 			} else
 				pci_release_msi(dev);
 		}
 	}
 
 	error = bus_alloc_resources(dev, sc->msk_irq_spec, sc->msk_irq);
 	if (error) {
 		device_printf(dev, "couldn't allocate IRQ resources\n");
 		goto fail;
 	}
 
 	if ((error = msk_status_dma_alloc(sc)) != 0)
 		goto fail;
 
 	/* Set base interrupt mask. */
 	sc->msk_intrmask = Y2_IS_HW_ERR | Y2_IS_STAT_BMU;
 	sc->msk_intrhwemask = Y2_IS_TIST_OV | Y2_IS_MST_ERR |
 	    Y2_IS_IRQ_STAT | Y2_IS_PCI_EXP | Y2_IS_PCI_NEXP;
 
 	/* Reset the adapter. */
 	mskc_reset(sc);
 
 	if ((error = mskc_setup_rambuffer(sc)) != 0)
 		goto fail;
 
 	sc->msk_devs[MSK_PORT_A] = device_add_child(dev, "msk", -1);
 	if (sc->msk_devs[MSK_PORT_A] == NULL) {
 		device_printf(dev, "failed to add child for PORT_A\n");
 		error = ENXIO;
 		goto fail;
 	}
 	mmd = malloc(sizeof(struct msk_mii_data), M_DEVBUF, M_WAITOK | M_ZERO);
 	mmd->port = MSK_PORT_A;
 	mmd->pmd = sc->msk_pmd;
 	mmd->mii_flags |= MIIF_DOPAUSE;
 	if (sc->msk_pmd == 'L' || sc->msk_pmd == 'S')
 		mmd->mii_flags |= MIIF_HAVEFIBER;
 	if (sc->msk_pmd == 'P')
 		mmd->mii_flags |= MIIF_HAVEFIBER | MIIF_MACPRIV0;
 	device_set_ivars(sc->msk_devs[MSK_PORT_A], mmd);
 
 	if (sc->msk_num_port > 1) {
 		sc->msk_devs[MSK_PORT_B] = device_add_child(dev, "msk", -1);
 		if (sc->msk_devs[MSK_PORT_B] == NULL) {
 			device_printf(dev, "failed to add child for PORT_B\n");
 			error = ENXIO;
 			goto fail;
 		}
 		mmd = malloc(sizeof(struct msk_mii_data), M_DEVBUF, M_WAITOK |
 		    M_ZERO);
 		mmd->port = MSK_PORT_B;
 		mmd->pmd = sc->msk_pmd;
 		if (sc->msk_pmd == 'L' || sc->msk_pmd == 'S')
 			mmd->mii_flags |= MIIF_HAVEFIBER;
 		if (sc->msk_pmd == 'P')
 			mmd->mii_flags |= MIIF_HAVEFIBER | MIIF_MACPRIV0;
 		device_set_ivars(sc->msk_devs[MSK_PORT_B], mmd);
 	}
 
 	error = bus_generic_attach(dev);
 	if (error) {
 		device_printf(dev, "failed to attach port(s)\n");
 		goto fail;
 	}
 
 	/* Hook interrupt last to avoid having to lock softc. */
 	error = bus_setup_intr(dev, sc->msk_irq[0], INTR_TYPE_NET |
 	    INTR_MPSAFE, NULL, msk_intr, sc, &sc->msk_intrhand);
 	if (error != 0) {
 		device_printf(dev, "couldn't set up interrupt handler\n");
 		goto fail;
 	}
 fail:
 	if (error != 0)
 		mskc_detach(dev);
 
 	return (error);
 }
 
 /*
  * Shutdown hardware and free up resources. This can be called any
  * time after the mutex has been initialized. It is called in both
  * the error case in attach and the normal detach case so it needs
  * to be careful about only freeing resources that have actually been
  * allocated.
  */
 static int
 msk_detach(device_t dev)
 {
 	struct msk_softc *sc;
 	struct msk_if_softc *sc_if;
 	struct ifnet *ifp;
 
 	sc_if = device_get_softc(dev);
 	KASSERT(mtx_initialized(&sc_if->msk_softc->msk_mtx),
 	    ("msk mutex not initialized in msk_detach"));
 	MSK_IF_LOCK(sc_if);
 
 	ifp = sc_if->msk_ifp;
 	if (device_is_attached(dev)) {
 		/* XXX */
 		sc_if->msk_flags |= MSK_FLAG_DETACH;
 		msk_stop(sc_if);
 		/* Can't hold locks while calling detach. */
 		MSK_IF_UNLOCK(sc_if);
 		callout_drain(&sc_if->msk_tick_ch);
 		if (ifp)
 			ether_ifdetach(ifp);
 		MSK_IF_LOCK(sc_if);
 	}
 
 	/*
 	 * We're generally called from mskc_detach() which is using
 	 * device_delete_child() to get to here. It's already trashed
 	 * miibus for us, so don't do it here or we'll panic.
 	 *
 	 * if (sc_if->msk_miibus != NULL) {
 	 * 	device_delete_child(dev, sc_if->msk_miibus);
 	 * 	sc_if->msk_miibus = NULL;
 	 * }
 	 */
 
 	msk_rx_dma_jfree(sc_if);
 	msk_txrx_dma_free(sc_if);
 	bus_generic_detach(dev);
 
 	sc = sc_if->msk_softc;
 	sc->msk_if[sc_if->msk_port] = NULL;
 	MSK_IF_UNLOCK(sc_if);
 	if (ifp)
 		if_free(ifp);
 
 	return (0);
 }
 
 static int
 mskc_detach(device_t dev)
 {
 	struct msk_softc *sc;
 
 	sc = device_get_softc(dev);
 	KASSERT(mtx_initialized(&sc->msk_mtx), ("msk mutex not initialized"));
 
 	if (device_is_alive(dev)) {
 		if (sc->msk_devs[MSK_PORT_A] != NULL) {
 			free(device_get_ivars(sc->msk_devs[MSK_PORT_A]),
 			    M_DEVBUF);
 			device_delete_child(dev, sc->msk_devs[MSK_PORT_A]);
 		}
 		if (sc->msk_devs[MSK_PORT_B] != NULL) {
 			free(device_get_ivars(sc->msk_devs[MSK_PORT_B]),
 			    M_DEVBUF);
 			device_delete_child(dev, sc->msk_devs[MSK_PORT_B]);
 		}
 		bus_generic_detach(dev);
 	}
 
 	/* Disable all interrupts. */
 	CSR_WRITE_4(sc, B0_IMSK, 0);
 	CSR_READ_4(sc, B0_IMSK);
 	CSR_WRITE_4(sc, B0_HWE_IMSK, 0);
 	CSR_READ_4(sc, B0_HWE_IMSK);
 
 	/* LED Off. */
 	CSR_WRITE_2(sc, B0_CTST, Y2_LED_STAT_OFF);
 
 	/* Put hardware reset. */
 	CSR_WRITE_2(sc, B0_CTST, CS_RST_SET);
 
 	msk_status_dma_free(sc);
 
 	if (sc->msk_intrhand) {
 		bus_teardown_intr(dev, sc->msk_irq[0], sc->msk_intrhand);
 		sc->msk_intrhand = NULL;
 	}
 	bus_release_resources(dev, sc->msk_irq_spec, sc->msk_irq);
 	if ((sc->msk_pflags & MSK_FLAG_MSI) != 0)
 		pci_release_msi(dev);
 	bus_release_resources(dev, sc->msk_res_spec, sc->msk_res);
 	mtx_destroy(&sc->msk_mtx);
 
 	return (0);
 }
 
 static bus_dma_tag_t
 mskc_get_dma_tag(device_t bus, device_t child __unused)
 {
 
 	return (bus_get_dma_tag(bus));
 }
 
 struct msk_dmamap_arg {
 	bus_addr_t	msk_busaddr;
 };
 
 static void
 msk_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	struct msk_dmamap_arg *ctx;
 
 	if (error != 0)
 		return;
 	ctx = arg;
 	ctx->msk_busaddr = segs[0].ds_addr;
 }
 
 /* Create status DMA region. */
 static int
 msk_status_dma_alloc(struct msk_softc *sc)
 {
 	struct msk_dmamap_arg ctx;
 	bus_size_t stat_sz;
 	int count, error;
 
 	/*
 	 * It seems controller requires number of status LE entries
 	 * is power of 2 and the maximum number of status LE entries
 	 * is 4096.  For dual-port controllers, the number of status
 	 * LE entries should be large enough to hold both port's
 	 * status updates.
 	 */
 	count = 3 * MSK_RX_RING_CNT + MSK_TX_RING_CNT;
 	count = imin(4096, roundup2(count, 1024));
 	sc->msk_stat_count = count;
 	stat_sz = count * sizeof(struct msk_stat_desc);
 	error = bus_dma_tag_create(
 		    bus_get_dma_tag(sc->msk_dev),	/* parent */
 		    MSK_STAT_ALIGN, 0,		/* alignment, boundary */
 		    BUS_SPACE_MAXADDR,		/* lowaddr */
 		    BUS_SPACE_MAXADDR,		/* highaddr */
 		    NULL, NULL,			/* filter, filterarg */
 		    stat_sz,			/* maxsize */
 		    1,				/* nsegments */
 		    stat_sz,			/* maxsegsize */
 		    0,				/* flags */
 		    NULL, NULL,			/* lockfunc, lockarg */
 		    &sc->msk_stat_tag);
 	if (error != 0) {
 		device_printf(sc->msk_dev,
 		    "failed to create status DMA tag\n");
 		return (error);
 	}
 
 	/* Allocate DMA'able memory and load the DMA map for status ring. */
 	error = bus_dmamem_alloc(sc->msk_stat_tag,
 	    (void **)&sc->msk_stat_ring, BUS_DMA_WAITOK | BUS_DMA_COHERENT |
 	    BUS_DMA_ZERO, &sc->msk_stat_map);
 	if (error != 0) {
 		device_printf(sc->msk_dev,
 		    "failed to allocate DMA'able memory for status ring\n");
 		return (error);
 	}
 
 	ctx.msk_busaddr = 0;
 	error = bus_dmamap_load(sc->msk_stat_tag, sc->msk_stat_map,
 	    sc->msk_stat_ring, stat_sz, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc->msk_dev,
 		    "failed to load DMA'able memory for status ring\n");
 		return (error);
 	}
 	sc->msk_stat_ring_paddr = ctx.msk_busaddr;
 
 	return (0);
 }
 
 static void
 msk_status_dma_free(struct msk_softc *sc)
 {
 
 	/* Destroy status block. */
 	if (sc->msk_stat_tag) {
 		if (sc->msk_stat_ring_paddr) {
 			bus_dmamap_unload(sc->msk_stat_tag, sc->msk_stat_map);
 			sc->msk_stat_ring_paddr = 0;
 		}
 		if (sc->msk_stat_ring) {
 			bus_dmamem_free(sc->msk_stat_tag,
 			    sc->msk_stat_ring, sc->msk_stat_map);
 			sc->msk_stat_ring = NULL;
 		}
 		bus_dma_tag_destroy(sc->msk_stat_tag);
 		sc->msk_stat_tag = NULL;
 	}
 }
 
 static int
 msk_txrx_dma_alloc(struct msk_if_softc *sc_if)
 {
 	struct msk_dmamap_arg ctx;
 	struct msk_txdesc *txd;
 	struct msk_rxdesc *rxd;
 	bus_size_t rxalign;
 	int error, i;
 
 	/* Create parent DMA tag. */
 	error = bus_dma_tag_create(
 		    bus_get_dma_tag(sc_if->msk_if_dev),	/* parent */
 		    1, 0,			/* alignment, boundary */
 		    BUS_SPACE_MAXADDR,		/* lowaddr */
 		    BUS_SPACE_MAXADDR,		/* highaddr */
 		    NULL, NULL,			/* filter, filterarg */
 		    BUS_SPACE_MAXSIZE_32BIT,	/* maxsize */
 		    0,				/* nsegments */
 		    BUS_SPACE_MAXSIZE_32BIT,	/* maxsegsize */
 		    0,				/* flags */
 		    NULL, NULL,			/* lockfunc, lockarg */
 		    &sc_if->msk_cdata.msk_parent_tag);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to create parent DMA tag\n");
 		goto fail;
 	}
 	/* Create tag for Tx ring. */
 	error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */
 		    MSK_RING_ALIGN, 0,		/* alignment, boundary */
 		    BUS_SPACE_MAXADDR,		/* lowaddr */
 		    BUS_SPACE_MAXADDR,		/* highaddr */
 		    NULL, NULL,			/* filter, filterarg */
 		    MSK_TX_RING_SZ,		/* maxsize */
 		    1,				/* nsegments */
 		    MSK_TX_RING_SZ,		/* maxsegsize */
 		    0,				/* flags */
 		    NULL, NULL,			/* lockfunc, lockarg */
 		    &sc_if->msk_cdata.msk_tx_ring_tag);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to create Tx ring DMA tag\n");
 		goto fail;
 	}
 
 	/* Create tag for Rx ring. */
 	error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */
 		    MSK_RING_ALIGN, 0,		/* alignment, boundary */
 		    BUS_SPACE_MAXADDR,		/* lowaddr */
 		    BUS_SPACE_MAXADDR,		/* highaddr */
 		    NULL, NULL,			/* filter, filterarg */
 		    MSK_RX_RING_SZ,		/* maxsize */
 		    1,				/* nsegments */
 		    MSK_RX_RING_SZ,		/* maxsegsize */
 		    0,				/* flags */
 		    NULL, NULL,			/* lockfunc, lockarg */
 		    &sc_if->msk_cdata.msk_rx_ring_tag);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to create Rx ring DMA tag\n");
 		goto fail;
 	}
 
 	/* Create tag for Tx buffers. */
 	error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */
 		    1, 0,			/* alignment, boundary */
 		    BUS_SPACE_MAXADDR,		/* lowaddr */
 		    BUS_SPACE_MAXADDR,		/* highaddr */
 		    NULL, NULL,			/* filter, filterarg */
 		    MSK_TSO_MAXSIZE,		/* maxsize */
 		    MSK_MAXTXSEGS,		/* nsegments */
 		    MSK_TSO_MAXSGSIZE,		/* maxsegsize */
 		    0,				/* flags */
 		    NULL, NULL,			/* lockfunc, lockarg */
 		    &sc_if->msk_cdata.msk_tx_tag);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to create Tx DMA tag\n");
 		goto fail;
 	}
 
 	rxalign = 1;
 	/*
 	 * Workaround hardware hang which seems to happen when Rx buffer
 	 * is not aligned on multiple of FIFO word(8 bytes).
 	 */
 	if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) != 0)
 		rxalign = MSK_RX_BUF_ALIGN;
 	/* Create tag for Rx buffers. */
 	error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */
 		    rxalign, 0,			/* alignment, boundary */
 		    BUS_SPACE_MAXADDR,		/* lowaddr */
 		    BUS_SPACE_MAXADDR,		/* highaddr */
 		    NULL, NULL,			/* filter, filterarg */
 		    MCLBYTES,			/* maxsize */
 		    1,				/* nsegments */
 		    MCLBYTES,			/* maxsegsize */
 		    0,				/* flags */
 		    NULL, NULL,			/* lockfunc, lockarg */
 		    &sc_if->msk_cdata.msk_rx_tag);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to create Rx DMA tag\n");
 		goto fail;
 	}
 
 	/* Allocate DMA'able memory and load the DMA map for Tx ring. */
 	error = bus_dmamem_alloc(sc_if->msk_cdata.msk_tx_ring_tag,
 	    (void **)&sc_if->msk_rdata.msk_tx_ring, BUS_DMA_WAITOK |
 	    BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc_if->msk_cdata.msk_tx_ring_map);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to allocate DMA'able memory for Tx ring\n");
 		goto fail;
 	}
 
 	ctx.msk_busaddr = 0;
 	error = bus_dmamap_load(sc_if->msk_cdata.msk_tx_ring_tag,
 	    sc_if->msk_cdata.msk_tx_ring_map, sc_if->msk_rdata.msk_tx_ring,
 	    MSK_TX_RING_SZ, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to load DMA'able memory for Tx ring\n");
 		goto fail;
 	}
 	sc_if->msk_rdata.msk_tx_ring_paddr = ctx.msk_busaddr;
 
 	/* Allocate DMA'able memory and load the DMA map for Rx ring. */
 	error = bus_dmamem_alloc(sc_if->msk_cdata.msk_rx_ring_tag,
 	    (void **)&sc_if->msk_rdata.msk_rx_ring, BUS_DMA_WAITOK |
 	    BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc_if->msk_cdata.msk_rx_ring_map);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to allocate DMA'able memory for Rx ring\n");
 		goto fail;
 	}
 
 	ctx.msk_busaddr = 0;
 	error = bus_dmamap_load(sc_if->msk_cdata.msk_rx_ring_tag,
 	    sc_if->msk_cdata.msk_rx_ring_map, sc_if->msk_rdata.msk_rx_ring,
 	    MSK_RX_RING_SZ, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to load DMA'able memory for Rx ring\n");
 		goto fail;
 	}
 	sc_if->msk_rdata.msk_rx_ring_paddr = ctx.msk_busaddr;
 
 	/* Create DMA maps for Tx buffers. */
 	for (i = 0; i < MSK_TX_RING_CNT; i++) {
 		txd = &sc_if->msk_cdata.msk_txdesc[i];
 		txd->tx_m = NULL;
 		txd->tx_dmamap = NULL;
 		error = bus_dmamap_create(sc_if->msk_cdata.msk_tx_tag, 0,
 		    &txd->tx_dmamap);
 		if (error != 0) {
 			device_printf(sc_if->msk_if_dev,
 			    "failed to create Tx dmamap\n");
 			goto fail;
 		}
 	}
 	/* Create DMA maps for Rx buffers. */
 	if ((error = bus_dmamap_create(sc_if->msk_cdata.msk_rx_tag, 0,
 	    &sc_if->msk_cdata.msk_rx_sparemap)) != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to create spare Rx dmamap\n");
 		goto fail;
 	}
 	for (i = 0; i < MSK_RX_RING_CNT; i++) {
 		rxd = &sc_if->msk_cdata.msk_rxdesc[i];
 		rxd->rx_m = NULL;
 		rxd->rx_dmamap = NULL;
 		error = bus_dmamap_create(sc_if->msk_cdata.msk_rx_tag, 0,
 		    &rxd->rx_dmamap);
 		if (error != 0) {
 			device_printf(sc_if->msk_if_dev,
 			    "failed to create Rx dmamap\n");
 			goto fail;
 		}
 	}
 
 fail:
 	return (error);
 }
 
 static int
 msk_rx_dma_jalloc(struct msk_if_softc *sc_if)
 {
 	struct msk_dmamap_arg ctx;
 	struct msk_rxdesc *jrxd;
 	bus_size_t rxalign;
 	int error, i;
 
 	if (jumbo_disable != 0 || (sc_if->msk_flags & MSK_FLAG_JUMBO) == 0) {
 		sc_if->msk_flags &= ~MSK_FLAG_JUMBO;
 		device_printf(sc_if->msk_if_dev,
 		    "disabling jumbo frame support\n");
 		return (0);
 	}
 	/* Create tag for jumbo Rx ring. */
 	error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */
 		    MSK_RING_ALIGN, 0,		/* alignment, boundary */
 		    BUS_SPACE_MAXADDR,		/* lowaddr */
 		    BUS_SPACE_MAXADDR,		/* highaddr */
 		    NULL, NULL,			/* filter, filterarg */
 		    MSK_JUMBO_RX_RING_SZ,	/* maxsize */
 		    1,				/* nsegments */
 		    MSK_JUMBO_RX_RING_SZ,	/* maxsegsize */
 		    0,				/* flags */
 		    NULL, NULL,			/* lockfunc, lockarg */
 		    &sc_if->msk_cdata.msk_jumbo_rx_ring_tag);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to create jumbo Rx ring DMA tag\n");
 		goto jumbo_fail;
 	}
 
 	rxalign = 1;
 	/*
 	 * Workaround hardware hang which seems to happen when Rx buffer
 	 * is not aligned on multiple of FIFO word(8 bytes).
 	 */
 	if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) != 0)
 		rxalign = MSK_RX_BUF_ALIGN;
 	/* Create tag for jumbo Rx buffers. */
 	error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */
 		    rxalign, 0,			/* alignment, boundary */
 		    BUS_SPACE_MAXADDR,		/* lowaddr */
 		    BUS_SPACE_MAXADDR,		/* highaddr */
 		    NULL, NULL,			/* filter, filterarg */
 		    MJUM9BYTES,			/* maxsize */
 		    1,				/* nsegments */
 		    MJUM9BYTES,			/* maxsegsize */
 		    0,				/* flags */
 		    NULL, NULL,			/* lockfunc, lockarg */
 		    &sc_if->msk_cdata.msk_jumbo_rx_tag);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to create jumbo Rx DMA tag\n");
 		goto jumbo_fail;
 	}
 
 	/* Allocate DMA'able memory and load the DMA map for jumbo Rx ring. */
 	error = bus_dmamem_alloc(sc_if->msk_cdata.msk_jumbo_rx_ring_tag,
 	    (void **)&sc_if->msk_rdata.msk_jumbo_rx_ring,
 	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
 	    &sc_if->msk_cdata.msk_jumbo_rx_ring_map);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to allocate DMA'able memory for jumbo Rx ring\n");
 		goto jumbo_fail;
 	}
 
 	ctx.msk_busaddr = 0;
 	error = bus_dmamap_load(sc_if->msk_cdata.msk_jumbo_rx_ring_tag,
 	    sc_if->msk_cdata.msk_jumbo_rx_ring_map,
 	    sc_if->msk_rdata.msk_jumbo_rx_ring, MSK_JUMBO_RX_RING_SZ,
 	    msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to load DMA'able memory for jumbo Rx ring\n");
 		goto jumbo_fail;
 	}
 	sc_if->msk_rdata.msk_jumbo_rx_ring_paddr = ctx.msk_busaddr;
 
 	/* Create DMA maps for jumbo Rx buffers. */
 	if ((error = bus_dmamap_create(sc_if->msk_cdata.msk_jumbo_rx_tag, 0,
 	    &sc_if->msk_cdata.msk_jumbo_rx_sparemap)) != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to create spare jumbo Rx dmamap\n");
 		goto jumbo_fail;
 	}
 	for (i = 0; i < MSK_JUMBO_RX_RING_CNT; i++) {
 		jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[i];
 		jrxd->rx_m = NULL;
 		jrxd->rx_dmamap = NULL;
 		error = bus_dmamap_create(sc_if->msk_cdata.msk_jumbo_rx_tag, 0,
 		    &jrxd->rx_dmamap);
 		if (error != 0) {
 			device_printf(sc_if->msk_if_dev,
 			    "failed to create jumbo Rx dmamap\n");
 			goto jumbo_fail;
 		}
 	}
 
 	return (0);
 
 jumbo_fail:
 	msk_rx_dma_jfree(sc_if);
 	device_printf(sc_if->msk_if_dev, "disabling jumbo frame support "
 	    "due to resource shortage\n");
 	sc_if->msk_flags &= ~MSK_FLAG_JUMBO;
 	return (error);
 }
 
 static void
 msk_txrx_dma_free(struct msk_if_softc *sc_if)
 {
 	struct msk_txdesc *txd;
 	struct msk_rxdesc *rxd;
 	int i;
 
 	/* Tx ring. */
 	if (sc_if->msk_cdata.msk_tx_ring_tag) {
 		if (sc_if->msk_rdata.msk_tx_ring_paddr)
 			bus_dmamap_unload(sc_if->msk_cdata.msk_tx_ring_tag,
 			    sc_if->msk_cdata.msk_tx_ring_map);
 		if (sc_if->msk_rdata.msk_tx_ring)
 			bus_dmamem_free(sc_if->msk_cdata.msk_tx_ring_tag,
 			    sc_if->msk_rdata.msk_tx_ring,
 			    sc_if->msk_cdata.msk_tx_ring_map);
 		sc_if->msk_rdata.msk_tx_ring = NULL;
 		sc_if->msk_rdata.msk_tx_ring_paddr = 0;
 		bus_dma_tag_destroy(sc_if->msk_cdata.msk_tx_ring_tag);
 		sc_if->msk_cdata.msk_tx_ring_tag = NULL;
 	}
 	/* Rx ring. */
 	if (sc_if->msk_cdata.msk_rx_ring_tag) {
 		if (sc_if->msk_rdata.msk_rx_ring_paddr)
 			bus_dmamap_unload(sc_if->msk_cdata.msk_rx_ring_tag,
 			    sc_if->msk_cdata.msk_rx_ring_map);
 		if (sc_if->msk_rdata.msk_rx_ring)
 			bus_dmamem_free(sc_if->msk_cdata.msk_rx_ring_tag,
 			    sc_if->msk_rdata.msk_rx_ring,
 			    sc_if->msk_cdata.msk_rx_ring_map);
 		sc_if->msk_rdata.msk_rx_ring = NULL;
 		sc_if->msk_rdata.msk_rx_ring_paddr = 0;
 		bus_dma_tag_destroy(sc_if->msk_cdata.msk_rx_ring_tag);
 		sc_if->msk_cdata.msk_rx_ring_tag = NULL;
 	}
 	/* Tx buffers. */
 	if (sc_if->msk_cdata.msk_tx_tag) {
 		for (i = 0; i < MSK_TX_RING_CNT; i++) {
 			txd = &sc_if->msk_cdata.msk_txdesc[i];
 			if (txd->tx_dmamap) {
 				bus_dmamap_destroy(sc_if->msk_cdata.msk_tx_tag,
 				    txd->tx_dmamap);
 				txd->tx_dmamap = NULL;
 			}
 		}
 		bus_dma_tag_destroy(sc_if->msk_cdata.msk_tx_tag);
 		sc_if->msk_cdata.msk_tx_tag = NULL;
 	}
 	/* Rx buffers. */
 	if (sc_if->msk_cdata.msk_rx_tag) {
 		for (i = 0; i < MSK_RX_RING_CNT; i++) {
 			rxd = &sc_if->msk_cdata.msk_rxdesc[i];
 			if (rxd->rx_dmamap) {
 				bus_dmamap_destroy(sc_if->msk_cdata.msk_rx_tag,
 				    rxd->rx_dmamap);
 				rxd->rx_dmamap = NULL;
 			}
 		}
 		if (sc_if->msk_cdata.msk_rx_sparemap) {
 			bus_dmamap_destroy(sc_if->msk_cdata.msk_rx_tag,
 			    sc_if->msk_cdata.msk_rx_sparemap);
 			sc_if->msk_cdata.msk_rx_sparemap = 0;
 		}
 		bus_dma_tag_destroy(sc_if->msk_cdata.msk_rx_tag);
 		sc_if->msk_cdata.msk_rx_tag = NULL;
 	}
 	if (sc_if->msk_cdata.msk_parent_tag) {
 		bus_dma_tag_destroy(sc_if->msk_cdata.msk_parent_tag);
 		sc_if->msk_cdata.msk_parent_tag = NULL;
 	}
 }
 
 static void
 msk_rx_dma_jfree(struct msk_if_softc *sc_if)
 {
 	struct msk_rxdesc *jrxd;
 	int i;
 
 	/* Jumbo Rx ring. */
 	if (sc_if->msk_cdata.msk_jumbo_rx_ring_tag) {
 		if (sc_if->msk_rdata.msk_jumbo_rx_ring_paddr)
 			bus_dmamap_unload(sc_if->msk_cdata.msk_jumbo_rx_ring_tag,
 			    sc_if->msk_cdata.msk_jumbo_rx_ring_map);
 		if (sc_if->msk_rdata.msk_jumbo_rx_ring)
 			bus_dmamem_free(sc_if->msk_cdata.msk_jumbo_rx_ring_tag,
 			    sc_if->msk_rdata.msk_jumbo_rx_ring,
 			    sc_if->msk_cdata.msk_jumbo_rx_ring_map);
 		sc_if->msk_rdata.msk_jumbo_rx_ring = NULL;
 		sc_if->msk_rdata.msk_jumbo_rx_ring_paddr = 0;
 		bus_dma_tag_destroy(sc_if->msk_cdata.msk_jumbo_rx_ring_tag);
 		sc_if->msk_cdata.msk_jumbo_rx_ring_tag = NULL;
 	}
 	/* Jumbo Rx buffers. */
 	if (sc_if->msk_cdata.msk_jumbo_rx_tag) {
 		for (i = 0; i < MSK_JUMBO_RX_RING_CNT; i++) {
 			jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[i];
 			if (jrxd->rx_dmamap) {
 				bus_dmamap_destroy(
 				    sc_if->msk_cdata.msk_jumbo_rx_tag,
 				    jrxd->rx_dmamap);
 				jrxd->rx_dmamap = NULL;
 			}
 		}
 		if (sc_if->msk_cdata.msk_jumbo_rx_sparemap) {
 			bus_dmamap_destroy(sc_if->msk_cdata.msk_jumbo_rx_tag,
 			    sc_if->msk_cdata.msk_jumbo_rx_sparemap);
 			sc_if->msk_cdata.msk_jumbo_rx_sparemap = 0;
 		}
 		bus_dma_tag_destroy(sc_if->msk_cdata.msk_jumbo_rx_tag);
 		sc_if->msk_cdata.msk_jumbo_rx_tag = NULL;
 	}
 }
 
 static int
 msk_encap(struct msk_if_softc *sc_if, struct mbuf **m_head)
 {
 	struct msk_txdesc *txd, *txd_last;
 	struct msk_tx_desc *tx_le;
 	struct mbuf *m;
 	bus_dmamap_t map;
 	bus_dma_segment_t txsegs[MSK_MAXTXSEGS];
 	uint32_t control, csum, prod, si;
 	uint16_t offset, tcp_offset, tso_mtu;
 	int error, i, nseg, tso;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 
 	tcp_offset = offset = 0;
 	m = *m_head;
 	if (((sc_if->msk_flags & MSK_FLAG_AUTOTX_CSUM) == 0 &&
 	    (m->m_pkthdr.csum_flags & MSK_CSUM_FEATURES) != 0) ||
 	    ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 &&
 	    (m->m_pkthdr.csum_flags & CSUM_TSO) != 0)) {
 		/*
 		 * Since mbuf has no protocol specific structure information
 		 * in it we have to inspect protocol information here to
 		 * setup TSO and checksum offload. I don't know why Marvell
 		 * made a such decision in chip design because other GigE
 		 * hardwares normally takes care of all these chores in
 		 * hardware. However, TSO performance of Yukon II is very
 		 * good such that it's worth to implement it.
 		 */
 		struct ether_header *eh;
 		struct ip *ip;
 		struct tcphdr *tcp;
 
 		if (M_WRITABLE(m) == 0) {
 			/* Get a writable copy. */
 			m = m_dup(*m_head, M_NOWAIT);
 			m_freem(*m_head);
 			if (m == NULL) {
 				*m_head = NULL;
 				return (ENOBUFS);
 			}
 			*m_head = m;
 		}
 
 		offset = sizeof(struct ether_header);
 		m = m_pullup(m, offset);
 		if (m == NULL) {
 			*m_head = NULL;
 			return (ENOBUFS);
 		}
 		eh = mtod(m, struct ether_header *);
 		/* Check if hardware VLAN insertion is off. */
 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 			offset = sizeof(struct ether_vlan_header);
 			m = m_pullup(m, offset);
 			if (m == NULL) {
 				*m_head = NULL;
 				return (ENOBUFS);
 			}
 		}
 		m = m_pullup(m, offset + sizeof(struct ip));
 		if (m == NULL) {
 			*m_head = NULL;
 			return (ENOBUFS);
 		}
 		ip = (struct ip *)(mtod(m, char *) + offset);
 		offset += (ip->ip_hl << 2);
 		tcp_offset = offset;
 		if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
 			m = m_pullup(m, offset + sizeof(struct tcphdr));
 			if (m == NULL) {
 				*m_head = NULL;
 				return (ENOBUFS);
 			}
 			tcp = (struct tcphdr *)(mtod(m, char *) + offset);
 			offset += (tcp->th_off << 2);
 		} else if ((sc_if->msk_flags & MSK_FLAG_AUTOTX_CSUM) == 0 &&
 		    (m->m_pkthdr.len < MSK_MIN_FRAMELEN) &&
 		    (m->m_pkthdr.csum_flags & CSUM_TCP) != 0) {
 			/*
 			 * It seems that Yukon II has Tx checksum offload bug
 			 * for small TCP packets that's less than 60 bytes in
 			 * size (e.g. TCP window probe packet, pure ACK packet).
 			 * Common work around like padding with zeros to make
 			 * the frame minimum ethernet frame size didn't work at
 			 * all.
 			 * Instead of disabling checksum offload completely we
 			 * resort to S/W checksum routine when we encounter
 			 * short TCP frames.
 			 * Short UDP packets appear to be handled correctly by
 			 * Yukon II. Also I assume this bug does not happen on
 			 * controllers that use newer descriptor format or
 			 * automatic Tx checksum calculation.
 			 */
 			m = m_pullup(m, offset + sizeof(struct tcphdr));
 			if (m == NULL) {
 				*m_head = NULL;
 				return (ENOBUFS);
 			}
 			*(uint16_t *)(m->m_data + offset +
 			    m->m_pkthdr.csum_data) = in_cksum_skip(m,
 			    m->m_pkthdr.len, offset);
 			m->m_pkthdr.csum_flags &= ~CSUM_TCP;
 		}
 		*m_head = m;
 	}
 
 	prod = sc_if->msk_cdata.msk_tx_prod;
 	txd = &sc_if->msk_cdata.msk_txdesc[prod];
 	txd_last = txd;
 	map = txd->tx_dmamap;
 	error = bus_dmamap_load_mbuf_sg(sc_if->msk_cdata.msk_tx_tag, map,
 	    *m_head, txsegs, &nseg, BUS_DMA_NOWAIT);
 	if (error == EFBIG) {
 		m = m_collapse(*m_head, M_NOWAIT, MSK_MAXTXSEGS);
 		if (m == NULL) {
 			m_freem(*m_head);
 			*m_head = NULL;
 			return (ENOBUFS);
 		}
 		*m_head = m;
 		error = bus_dmamap_load_mbuf_sg(sc_if->msk_cdata.msk_tx_tag,
 		    map, *m_head, txsegs, &nseg, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			m_freem(*m_head);
 			*m_head = NULL;
 			return (error);
 		}
 	} else if (error != 0)
 		return (error);
 	if (nseg == 0) {
 		m_freem(*m_head);
 		*m_head = NULL;
 		return (EIO);
 	}
 
 	/* Check number of available descriptors. */
 	if (sc_if->msk_cdata.msk_tx_cnt + nseg >=
 	    (MSK_TX_RING_CNT - MSK_RESERVED_TX_DESC_CNT)) {
 		bus_dmamap_unload(sc_if->msk_cdata.msk_tx_tag, map);
 		return (ENOBUFS);
 	}
 
 	control = 0;
 	tso = 0;
 	tx_le = NULL;
 
 	/* Check TSO support. */
 	if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
 		if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0)
 			tso_mtu = m->m_pkthdr.tso_segsz;
 		else
 			tso_mtu = offset + m->m_pkthdr.tso_segsz;
 		if (tso_mtu != sc_if->msk_cdata.msk_tso_mtu) {
 			tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
 			tx_le->msk_addr = htole32(tso_mtu);
 			if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0)
 				tx_le->msk_control = htole32(OP_MSS | HW_OWNER);
 			else
 				tx_le->msk_control =
 				    htole32(OP_LRGLEN | HW_OWNER);
 			sc_if->msk_cdata.msk_tx_cnt++;
 			MSK_INC(prod, MSK_TX_RING_CNT);
 			sc_if->msk_cdata.msk_tso_mtu = tso_mtu;
 		}
 		tso++;
 	}
 	/* Check if we have a VLAN tag to insert. */
 	if ((m->m_flags & M_VLANTAG) != 0) {
 		if (tx_le == NULL) {
 			tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
 			tx_le->msk_addr = htole32(0);
 			tx_le->msk_control = htole32(OP_VLAN | HW_OWNER |
 			    htons(m->m_pkthdr.ether_vtag));
 			sc_if->msk_cdata.msk_tx_cnt++;
 			MSK_INC(prod, MSK_TX_RING_CNT);
 		} else {
 			tx_le->msk_control |= htole32(OP_VLAN |
 			    htons(m->m_pkthdr.ether_vtag));
 		}
 		control |= INS_VLAN;
 	}
 	/* Check if we have to handle checksum offload. */
 	if (tso == 0 && (m->m_pkthdr.csum_flags & MSK_CSUM_FEATURES) != 0) {
 		if ((sc_if->msk_flags & MSK_FLAG_AUTOTX_CSUM) != 0)
 			control |= CALSUM;
 		else {
 			control |= CALSUM | WR_SUM | INIT_SUM | LOCK_SUM;
 			if ((m->m_pkthdr.csum_flags & CSUM_UDP) != 0)
 				control |= UDPTCP;
 			/* Checksum write position. */
 			csum = (tcp_offset + m->m_pkthdr.csum_data) & 0xffff;
 			/* Checksum start position. */
 			csum |= (uint32_t)tcp_offset << 16;
 			if (csum != sc_if->msk_cdata.msk_last_csum) {
 				tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
 				tx_le->msk_addr = htole32(csum);
 				tx_le->msk_control = htole32(1 << 16 |
 				    (OP_TCPLISW | HW_OWNER));
 				sc_if->msk_cdata.msk_tx_cnt++;
 				MSK_INC(prod, MSK_TX_RING_CNT);
 				sc_if->msk_cdata.msk_last_csum = csum;
 			}
 		}
 	}
 
 #ifdef MSK_64BIT_DMA
 	if (MSK_ADDR_HI(txsegs[0].ds_addr) !=
 	    sc_if->msk_cdata.msk_tx_high_addr) {
 		sc_if->msk_cdata.msk_tx_high_addr =
 		    MSK_ADDR_HI(txsegs[0].ds_addr);
 		tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
 		tx_le->msk_addr = htole32(MSK_ADDR_HI(txsegs[0].ds_addr));
 		tx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
 		sc_if->msk_cdata.msk_tx_cnt++;
 		MSK_INC(prod, MSK_TX_RING_CNT);
 	}
 #endif
 	si = prod;
 	tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
 	tx_le->msk_addr = htole32(MSK_ADDR_LO(txsegs[0].ds_addr));
 	if (tso == 0)
 		tx_le->msk_control = htole32(txsegs[0].ds_len | control |
 		    OP_PACKET);
 	else
 		tx_le->msk_control = htole32(txsegs[0].ds_len | control |
 		    OP_LARGESEND);
 	sc_if->msk_cdata.msk_tx_cnt++;
 	MSK_INC(prod, MSK_TX_RING_CNT);
 
 	for (i = 1; i < nseg; i++) {
 		tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
 #ifdef MSK_64BIT_DMA
 		if (MSK_ADDR_HI(txsegs[i].ds_addr) !=
 		    sc_if->msk_cdata.msk_tx_high_addr) {
 			sc_if->msk_cdata.msk_tx_high_addr =
 			    MSK_ADDR_HI(txsegs[i].ds_addr);
 			tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
 			tx_le->msk_addr =
 			    htole32(MSK_ADDR_HI(txsegs[i].ds_addr));
 			tx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
 			sc_if->msk_cdata.msk_tx_cnt++;
 			MSK_INC(prod, MSK_TX_RING_CNT);
 			tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
 		}
 #endif
 		tx_le->msk_addr = htole32(MSK_ADDR_LO(txsegs[i].ds_addr));
 		tx_le->msk_control = htole32(txsegs[i].ds_len | control |
 		    OP_BUFFER | HW_OWNER);
 		sc_if->msk_cdata.msk_tx_cnt++;
 		MSK_INC(prod, MSK_TX_RING_CNT);
 	}
 	/* Update producer index. */
 	sc_if->msk_cdata.msk_tx_prod = prod;
 
 	/* Set EOP on the last descriptor. */
 	prod = (prod + MSK_TX_RING_CNT - 1) % MSK_TX_RING_CNT;
 	tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
 	tx_le->msk_control |= htole32(EOP);
 
 	/* Turn the first descriptor ownership to hardware. */
 	tx_le = &sc_if->msk_rdata.msk_tx_ring[si];
 	tx_le->msk_control |= htole32(HW_OWNER);
 
 	txd = &sc_if->msk_cdata.msk_txdesc[prod];
 	map = txd_last->tx_dmamap;
 	txd_last->tx_dmamap = txd->tx_dmamap;
 	txd->tx_dmamap = map;
 	txd->tx_m = m;
 
 	/* Sync descriptors. */
 	bus_dmamap_sync(sc_if->msk_cdata.msk_tx_tag, map, BUS_DMASYNC_PREWRITE);
 	bus_dmamap_sync(sc_if->msk_cdata.msk_tx_ring_tag,
 	    sc_if->msk_cdata.msk_tx_ring_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	return (0);
 }
 
 static void
 msk_start(struct ifnet *ifp)
 {
 	struct msk_if_softc *sc_if;
 
 	sc_if = ifp->if_softc;
 	MSK_IF_LOCK(sc_if);
 	msk_start_locked(ifp);
 	MSK_IF_UNLOCK(sc_if);
 }
 
 static void
 msk_start_locked(struct ifnet *ifp)
 {
 	struct msk_if_softc *sc_if;
 	struct mbuf *m_head;
 	int enq;
 
 	sc_if = ifp->if_softc;
 	MSK_IF_LOCK_ASSERT(sc_if);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING || (sc_if->msk_flags & MSK_FLAG_LINK) == 0)
 		return;
 
 	for (enq = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
 	    sc_if->msk_cdata.msk_tx_cnt <
 	    (MSK_TX_RING_CNT - MSK_RESERVED_TX_DESC_CNT); ) {
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 		/*
 		 * Pack the data into the transmit ring. If we
 		 * don't have room, set the OACTIVE flag and wait
 		 * for the NIC to drain the ring.
 		 */
 		if (msk_encap(sc_if, &m_head) != 0) {
 			if (m_head == NULL)
 				break;
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			break;
 		}
 
 		enq++;
 		/*
 		 * If there's a BPF listener, bounce a copy of this frame
 		 * to him.
 		 */
 		ETHER_BPF_MTAP(ifp, m_head);
 	}
 
 	if (enq > 0) {
 		/* Transmit */
 		CSR_WRITE_2(sc_if->msk_softc,
 		    Y2_PREF_Q_ADDR(sc_if->msk_txq, PREF_UNIT_PUT_IDX_REG),
 		    sc_if->msk_cdata.msk_tx_prod);
 
 		/* Set a timeout in case the chip goes out to lunch. */
 		sc_if->msk_watchdog_timer = MSK_TX_TIMEOUT;
 	}
 }
 
 static void
 msk_watchdog(struct msk_if_softc *sc_if)
 {
 	struct ifnet *ifp;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 
 	if (sc_if->msk_watchdog_timer == 0 || --sc_if->msk_watchdog_timer)
 		return;
 	ifp = sc_if->msk_ifp;
 	if ((sc_if->msk_flags & MSK_FLAG_LINK) == 0) {
 		if (bootverbose)
 			if_printf(sc_if->msk_ifp, "watchdog timeout "
 			   "(missed link)\n");
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		msk_init_locked(sc_if);
 		return;
 	}
 
 	if_printf(ifp, "watchdog timeout\n");
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	msk_init_locked(sc_if);
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		msk_start_locked(ifp);
 }
 
 static int
 mskc_shutdown(device_t dev)
 {
 	struct msk_softc *sc;
 	int i;
 
 	sc = device_get_softc(dev);
 	MSK_LOCK(sc);
 	for (i = 0; i < sc->msk_num_port; i++) {
 		if (sc->msk_if[i] != NULL && sc->msk_if[i]->msk_ifp != NULL &&
 		    ((sc->msk_if[i]->msk_ifp->if_drv_flags &
 		    IFF_DRV_RUNNING) != 0))
 			msk_stop(sc->msk_if[i]);
 	}
 	MSK_UNLOCK(sc);
 
 	/* Put hardware reset. */
 	CSR_WRITE_2(sc, B0_CTST, CS_RST_SET);
 	return (0);
 }
 
 static int
 mskc_suspend(device_t dev)
 {
 	struct msk_softc *sc;
 	int i;
 
 	sc = device_get_softc(dev);
 
 	MSK_LOCK(sc);
 
 	for (i = 0; i < sc->msk_num_port; i++) {
 		if (sc->msk_if[i] != NULL && sc->msk_if[i]->msk_ifp != NULL &&
 		    ((sc->msk_if[i]->msk_ifp->if_drv_flags &
 		    IFF_DRV_RUNNING) != 0))
 			msk_stop(sc->msk_if[i]);
 	}
 
 	/* Disable all interrupts. */
 	CSR_WRITE_4(sc, B0_IMSK, 0);
 	CSR_READ_4(sc, B0_IMSK);
 	CSR_WRITE_4(sc, B0_HWE_IMSK, 0);
 	CSR_READ_4(sc, B0_HWE_IMSK);
 
 	msk_phy_power(sc, MSK_PHY_POWERDOWN);
 
 	/* Put hardware reset. */
 	CSR_WRITE_2(sc, B0_CTST, CS_RST_SET);
 	sc->msk_pflags |= MSK_FLAG_SUSPEND;
 
 	MSK_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 mskc_resume(device_t dev)
 {
 	struct msk_softc *sc;
 	int i;
 
 	sc = device_get_softc(dev);
 
 	MSK_LOCK(sc);
 
 	CSR_PCI_WRITE_4(sc, PCI_OUR_REG_3, 0);
 	mskc_reset(sc);
 	for (i = 0; i < sc->msk_num_port; i++) {
 		if (sc->msk_if[i] != NULL && sc->msk_if[i]->msk_ifp != NULL &&
 		    ((sc->msk_if[i]->msk_ifp->if_flags & IFF_UP) != 0)) {
 			sc->msk_if[i]->msk_ifp->if_drv_flags &=
 			    ~IFF_DRV_RUNNING;
 			msk_init_locked(sc->msk_if[i]);
 		}
 	}
 	sc->msk_pflags &= ~MSK_FLAG_SUSPEND;
 
 	MSK_UNLOCK(sc);
 
 	return (0);
 }
 
 #ifndef __NO_STRICT_ALIGNMENT
 static __inline void
 msk_fixup_rx(struct mbuf *m)
 {
         int i;
         uint16_t *src, *dst;
 
 	src = mtod(m, uint16_t *);
 	dst = src - 3;
 
 	for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++)
 		*dst++ = *src++;
 
 	m->m_data -= (MSK_RX_BUF_ALIGN - ETHER_ALIGN);
 }
 #endif
 
 static __inline void
 msk_rxcsum(struct msk_if_softc *sc_if, uint32_t control, struct mbuf *m)
 {
 	struct ether_header *eh;
 	struct ip *ip;
 	struct udphdr *uh;
 	int32_t hlen, len, pktlen, temp32;
 	uint16_t csum, *opts;
 
 	if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0) {
 		if ((control & (CSS_IPV4 | CSS_IPFRAG)) == CSS_IPV4) {
 			m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
 			if ((control & CSS_IPV4_CSUM_OK) != 0)
 				m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
 			if ((control & (CSS_TCP | CSS_UDP)) != 0 &&
 			    (control & (CSS_TCPUDP_CSUM_OK)) != 0) {
 				m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
 				    CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 		}
 		return;
 	}
 	/*
 	 * Marvell Yukon controllers that support OP_RXCHKS has known
 	 * to have various Rx checksum offloading bugs. These
 	 * controllers can be configured to compute simple checksum
 	 * at two different positions. So we can compute IP and TCP/UDP
 	 * checksum at the same time. We intentionally have controller
 	 * compute TCP/UDP checksum twice by specifying the same
 	 * checksum start position and compare the result. If the value
 	 * is different it would indicate the hardware logic was wrong.
 	 */
 	if ((sc_if->msk_csum & 0xFFFF) != (sc_if->msk_csum >> 16)) {
 		if (bootverbose)
 			device_printf(sc_if->msk_if_dev,
 			    "Rx checksum value mismatch!\n");
 		return;
 	}
 	pktlen = m->m_pkthdr.len;
 	if (pktlen < sizeof(struct ether_header) + sizeof(struct ip))
 		return;
 	eh = mtod(m, struct ether_header *);
 	if (eh->ether_type != htons(ETHERTYPE_IP))
 		return;
 	ip = (struct ip *)(eh + 1);
 	if (ip->ip_v != IPVERSION)
 		return;
 
 	hlen = ip->ip_hl << 2;
 	pktlen -= sizeof(struct ether_header);
 	if (hlen < sizeof(struct ip))
 		return;
 	if (ntohs(ip->ip_len) < hlen)
 		return;
 	if (ntohs(ip->ip_len) != pktlen)
 		return;
 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
 		return;	/* can't handle fragmented packet. */
 
 	switch (ip->ip_p) {
 	case IPPROTO_TCP:
 		if (pktlen < (hlen + sizeof(struct tcphdr)))
 			return;
 		break;
 	case IPPROTO_UDP:
 		if (pktlen < (hlen + sizeof(struct udphdr)))
 			return;
 		uh = (struct udphdr *)((caddr_t)ip + hlen);
 		if (uh->uh_sum == 0)
 			return; /* no checksum */
 		break;
 	default:
 		return;
 	}
 	csum = bswap16(sc_if->msk_csum & 0xFFFF);
 	/* Checksum fixup for IP options. */
 	len = hlen - sizeof(struct ip);
 	if (len > 0) {
 		opts = (uint16_t *)(ip + 1);
 		for (; len > 0; len -= sizeof(uint16_t), opts++) {
 			temp32 = csum - *opts;
 			temp32 = (temp32 >> 16) + (temp32 & 65535);
 			csum = temp32 & 65535;
 		}
 	}
 	m->m_pkthdr.csum_flags |= CSUM_DATA_VALID;
 	m->m_pkthdr.csum_data = csum;
 }
 
 static void
 msk_rxeof(struct msk_if_softc *sc_if, uint32_t status, uint32_t control,
     int len)
 {
 	struct mbuf *m;
 	struct ifnet *ifp;
 	struct msk_rxdesc *rxd;
 	int cons, rxlen;
 
 	ifp = sc_if->msk_ifp;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 
 	cons = sc_if->msk_cdata.msk_rx_cons;
 	do {
 		rxlen = status >> 16;
 		if ((status & GMR_FS_VLAN) != 0 &&
 		    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0)
 			rxlen -= ETHER_VLAN_ENCAP_LEN;
 		if ((sc_if->msk_flags & MSK_FLAG_NORXCHK) != 0) {
 			/*
 			 * For controllers that returns bogus status code
 			 * just do minimal check and let upper stack
 			 * handle this frame.
 			 */
 			if (len > MSK_MAX_FRAMELEN || len < ETHER_HDR_LEN) {
 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 				msk_discard_rxbuf(sc_if, cons);
 				break;
 			}
 		} else if (len > sc_if->msk_framesize ||
 		    ((status & GMR_FS_ANY_ERR) != 0) ||
 		    ((status & GMR_FS_RX_OK) == 0) || (rxlen != len)) {
 			/* Don't count flow-control packet as errors. */
 			if ((status & GMR_FS_GOOD_FC) == 0)
 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			msk_discard_rxbuf(sc_if, cons);
 			break;
 		}
 #ifdef MSK_64BIT_DMA
 		rxd = &sc_if->msk_cdata.msk_rxdesc[(cons + 1) %
 		    MSK_RX_RING_CNT];
 #else
 		rxd = &sc_if->msk_cdata.msk_rxdesc[cons];
 #endif
 		m = rxd->rx_m;
 		if (msk_newbuf(sc_if, cons) != 0) {
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 			/* Reuse old buffer. */
 			msk_discard_rxbuf(sc_if, cons);
 			break;
 		}
 		m->m_pkthdr.rcvif = ifp;
 		m->m_pkthdr.len = m->m_len = len;
 #ifndef __NO_STRICT_ALIGNMENT
 		if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) != 0)
 			msk_fixup_rx(m);
 #endif
 		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 		if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
 			msk_rxcsum(sc_if, control, m);
 		/* Check for VLAN tagged packets. */
 		if ((status & GMR_FS_VLAN) != 0 &&
 		    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) {
 			m->m_pkthdr.ether_vtag = sc_if->msk_vtag;
 			m->m_flags |= M_VLANTAG;
 		}
 		MSK_IF_UNLOCK(sc_if);
 		(*ifp->if_input)(ifp, m);
 		MSK_IF_LOCK(sc_if);
 	} while (0);
 
 	MSK_RX_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT);
 	MSK_RX_INC(sc_if->msk_cdata.msk_rx_prod, MSK_RX_RING_CNT);
 }
 
 static void
 msk_jumbo_rxeof(struct msk_if_softc *sc_if, uint32_t status, uint32_t control,
     int len)
 {
 	struct mbuf *m;
 	struct ifnet *ifp;
 	struct msk_rxdesc *jrxd;
 	int cons, rxlen;
 
 	ifp = sc_if->msk_ifp;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 
 	cons = sc_if->msk_cdata.msk_rx_cons;
 	do {
 		rxlen = status >> 16;
 		if ((status & GMR_FS_VLAN) != 0 &&
 		    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0)
 			rxlen -= ETHER_VLAN_ENCAP_LEN;
 		if (len > sc_if->msk_framesize ||
 		    ((status & GMR_FS_ANY_ERR) != 0) ||
 		    ((status & GMR_FS_RX_OK) == 0) || (rxlen != len)) {
 			/* Don't count flow-control packet as errors. */
 			if ((status & GMR_FS_GOOD_FC) == 0)
 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			msk_discard_jumbo_rxbuf(sc_if, cons);
 			break;
 		}
 #ifdef MSK_64BIT_DMA
 		jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[(cons + 1) %
 		    MSK_JUMBO_RX_RING_CNT];
 #else
 		jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[cons];
 #endif
 		m = jrxd->rx_m;
 		if (msk_jumbo_newbuf(sc_if, cons) != 0) {
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 			/* Reuse old buffer. */
 			msk_discard_jumbo_rxbuf(sc_if, cons);
 			break;
 		}
 		m->m_pkthdr.rcvif = ifp;
 		m->m_pkthdr.len = m->m_len = len;
 #ifndef __NO_STRICT_ALIGNMENT
 		if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) != 0)
 			msk_fixup_rx(m);
 #endif
 		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 		if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
 			msk_rxcsum(sc_if, control, m);
 		/* Check for VLAN tagged packets. */
 		if ((status & GMR_FS_VLAN) != 0 &&
 		    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) {
 			m->m_pkthdr.ether_vtag = sc_if->msk_vtag;
 			m->m_flags |= M_VLANTAG;
 		}
 		MSK_IF_UNLOCK(sc_if);
 		(*ifp->if_input)(ifp, m);
 		MSK_IF_LOCK(sc_if);
 	} while (0);
 
 	MSK_RX_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT);
 	MSK_RX_INC(sc_if->msk_cdata.msk_rx_prod, MSK_JUMBO_RX_RING_CNT);
 }
 
 static void
 msk_txeof(struct msk_if_softc *sc_if, int idx)
 {
 	struct msk_txdesc *txd;
 	struct msk_tx_desc *cur_tx;
 	struct ifnet *ifp;
 	uint32_t control;
 	int cons, prog;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 
 	ifp = sc_if->msk_ifp;
 
 	bus_dmamap_sync(sc_if->msk_cdata.msk_tx_ring_tag,
 	    sc_if->msk_cdata.msk_tx_ring_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	/*
 	 * Go through our tx ring and free mbufs for those
 	 * frames that have been sent.
 	 */
 	cons = sc_if->msk_cdata.msk_tx_cons;
 	prog = 0;
 	for (; cons != idx; MSK_INC(cons, MSK_TX_RING_CNT)) {
 		if (sc_if->msk_cdata.msk_tx_cnt <= 0)
 			break;
 		prog++;
 		cur_tx = &sc_if->msk_rdata.msk_tx_ring[cons];
 		control = le32toh(cur_tx->msk_control);
 		sc_if->msk_cdata.msk_tx_cnt--;
 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 		if ((control & EOP) == 0)
 			continue;
 		txd = &sc_if->msk_cdata.msk_txdesc[cons];
 		bus_dmamap_sync(sc_if->msk_cdata.msk_tx_tag, txd->tx_dmamap,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(sc_if->msk_cdata.msk_tx_tag, txd->tx_dmamap);
 
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		KASSERT(txd->tx_m != NULL, ("%s: freeing NULL mbuf!",
 		    __func__));
 		m_freem(txd->tx_m);
 		txd->tx_m = NULL;
 	}
 
 	if (prog > 0) {
 		sc_if->msk_cdata.msk_tx_cons = cons;
 		if (sc_if->msk_cdata.msk_tx_cnt == 0)
 			sc_if->msk_watchdog_timer = 0;
 		/* No need to sync LEs as we didn't update LEs. */
 	}
 }
 
 static void
 msk_tick(void *xsc_if)
 {
+	struct epoch_tracker et;
 	struct msk_if_softc *sc_if;
 	struct mii_data *mii;
 
 	sc_if = xsc_if;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 
 	mii = device_get_softc(sc_if->msk_miibus);
 
 	mii_tick(mii);
 	if ((sc_if->msk_flags & MSK_FLAG_LINK) == 0)
 		msk_miibus_statchg(sc_if->msk_if_dev);
+	NET_EPOCH_ENTER(et);
 	msk_handle_events(sc_if->msk_softc);
+	NET_EPOCH_EXIT(et);
 	msk_watchdog(sc_if);
 	callout_reset(&sc_if->msk_tick_ch, hz, msk_tick, sc_if);
 }
 
 static void
 msk_intr_phy(struct msk_if_softc *sc_if)
 {
 	uint16_t status;
 
 	msk_phy_readreg(sc_if, PHY_ADDR_MARV, PHY_MARV_INT_STAT);
 	status = msk_phy_readreg(sc_if, PHY_ADDR_MARV, PHY_MARV_INT_STAT);
 	/* Handle FIFO Underrun/Overflow? */
 	if ((status & PHY_M_IS_FIFO_ERROR))
 		device_printf(sc_if->msk_if_dev,
 		    "PHY FIFO underrun/overflow.\n");
 }
 
 static void
 msk_intr_gmac(struct msk_if_softc *sc_if)
 {
 	struct msk_softc *sc;
 	uint8_t status;
 
 	sc = sc_if->msk_softc;
 	status = CSR_READ_1(sc, MR_ADDR(sc_if->msk_port, GMAC_IRQ_SRC));
 
 	/* GMAC Rx FIFO overrun. */
 	if ((status & GM_IS_RX_FF_OR) != 0)
 		CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T),
 		    GMF_CLI_RX_FO);
 	/* GMAC Tx FIFO underrun. */
 	if ((status & GM_IS_TX_FF_UR) != 0) {
 		CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T),
 		    GMF_CLI_TX_FU);
 		device_printf(sc_if->msk_if_dev, "Tx FIFO underrun!\n");
 		/*
 		 * XXX
 		 * In case of Tx underrun, we may need to flush/reset
 		 * Tx MAC but that would also require resynchronization
 		 * with status LEs. Reinitializing status LEs would
 		 * affect other port in dual MAC configuration so it
 		 * should be avoided as possible as we can.
 		 * Due to lack of documentation it's all vague guess but
 		 * it needs more investigation.
 		 */
 	}
 }
 
 static void
 msk_handle_hwerr(struct msk_if_softc *sc_if, uint32_t status)
 {
 	struct msk_softc *sc;
 
 	sc = sc_if->msk_softc;
 	if ((status & Y2_IS_PAR_RD1) != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "RAM buffer read parity error\n");
 		/* Clear IRQ. */
 		CSR_WRITE_2(sc, SELECT_RAM_BUFFER(sc_if->msk_port, B3_RI_CTRL),
 		    RI_CLR_RD_PERR);
 	}
 	if ((status & Y2_IS_PAR_WR1) != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "RAM buffer write parity error\n");
 		/* Clear IRQ. */
 		CSR_WRITE_2(sc, SELECT_RAM_BUFFER(sc_if->msk_port, B3_RI_CTRL),
 		    RI_CLR_WR_PERR);
 	}
 	if ((status & Y2_IS_PAR_MAC1) != 0) {
 		device_printf(sc_if->msk_if_dev, "Tx MAC parity error\n");
 		/* Clear IRQ. */
 		CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T),
 		    GMF_CLI_TX_PE);
 	}
 	if ((status & Y2_IS_PAR_RX1) != 0) {
 		device_printf(sc_if->msk_if_dev, "Rx parity error\n");
 		/* Clear IRQ. */
 		CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), BMU_CLR_IRQ_PAR);
 	}
 	if ((status & (Y2_IS_TCP_TXS1 | Y2_IS_TCP_TXA1)) != 0) {
 		device_printf(sc_if->msk_if_dev, "TCP segmentation error\n");
 		/* Clear IRQ. */
 		CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_CLR_IRQ_TCP);
 	}
 }
 
 static void
 msk_intr_hwerr(struct msk_softc *sc)
 {
 	uint32_t status;
 	uint32_t tlphead[4];
 
 	status = CSR_READ_4(sc, B0_HWE_ISRC);
 	/* Time Stamp timer overflow. */
 	if ((status & Y2_IS_TIST_OV) != 0)
 		CSR_WRITE_1(sc, GMAC_TI_ST_CTRL, GMT_ST_CLR_IRQ);
 	if ((status & Y2_IS_PCI_NEXP) != 0) {
 		/*
 		 * PCI Express Error occurred which is not described in PEX
 		 * spec.
 		 * This error is also mapped either to Master Abort(
 		 * Y2_IS_MST_ERR) or Target Abort (Y2_IS_IRQ_STAT) bit and
 		 * can only be cleared there.
                  */
 		device_printf(sc->msk_dev,
 		    "PCI Express protocol violation error\n");
 	}
 
 	if ((status & (Y2_IS_MST_ERR | Y2_IS_IRQ_STAT)) != 0) {
 		uint16_t v16;
 
 		if ((status & Y2_IS_MST_ERR) != 0)
 			device_printf(sc->msk_dev,
 			    "unexpected IRQ Status error\n");
 		else
 			device_printf(sc->msk_dev,
 			    "unexpected IRQ Master error\n");
 		/* Reset all bits in the PCI status register. */
 		v16 = pci_read_config(sc->msk_dev, PCIR_STATUS, 2);
 		CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_ON);
 		pci_write_config(sc->msk_dev, PCIR_STATUS, v16 |
 		    PCIM_STATUS_PERR | PCIM_STATUS_SERR | PCIM_STATUS_RMABORT |
 		    PCIM_STATUS_RTABORT | PCIM_STATUS_MDPERR, 2);
 		CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
 	}
 
 	/* Check for PCI Express Uncorrectable Error. */
 	if ((status & Y2_IS_PCI_EXP) != 0) {
 		uint32_t v32;
 
 		/*
 		 * On PCI Express bus bridges are called root complexes (RC).
 		 * PCI Express errors are recognized by the root complex too,
 		 * which requests the system to handle the problem. After
 		 * error occurrence it may be that no access to the adapter
 		 * may be performed any longer.
 		 */
 
 		v32 = CSR_PCI_READ_4(sc, PEX_UNC_ERR_STAT);
 		if ((v32 & PEX_UNSUP_REQ) != 0) {
 			/* Ignore unsupported request error. */
 			device_printf(sc->msk_dev,
 			    "Uncorrectable PCI Express error\n");
 		}
 		if ((v32 & (PEX_FATAL_ERRORS | PEX_POIS_TLP)) != 0) {
 			int i;
 
 			/* Get TLP header form Log Registers. */
 			for (i = 0; i < 4; i++)
 				tlphead[i] = CSR_PCI_READ_4(sc,
 				    PEX_HEADER_LOG + i * 4);
 			/* Check for vendor defined broadcast message. */
 			if (!(tlphead[0] == 0x73004001 && tlphead[1] == 0x7f)) {
 				sc->msk_intrhwemask &= ~Y2_IS_PCI_EXP;
 				CSR_WRITE_4(sc, B0_HWE_IMSK,
 				    sc->msk_intrhwemask);
 				CSR_READ_4(sc, B0_HWE_IMSK);
 			}
 		}
 		/* Clear the interrupt. */
 		CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_ON);
 		CSR_PCI_WRITE_4(sc, PEX_UNC_ERR_STAT, 0xffffffff);
 		CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
 	}
 
 	if ((status & Y2_HWE_L1_MASK) != 0 && sc->msk_if[MSK_PORT_A] != NULL)
 		msk_handle_hwerr(sc->msk_if[MSK_PORT_A], status);
 	if ((status & Y2_HWE_L2_MASK) != 0 && sc->msk_if[MSK_PORT_B] != NULL)
 		msk_handle_hwerr(sc->msk_if[MSK_PORT_B], status >> 8);
 }
 
 static __inline void
 msk_rxput(struct msk_if_softc *sc_if)
 {
 	struct msk_softc *sc;
 
 	sc = sc_if->msk_softc;
 	if (sc_if->msk_framesize > (MCLBYTES - MSK_RX_BUF_ALIGN))
 		bus_dmamap_sync(
 		    sc_if->msk_cdata.msk_jumbo_rx_ring_tag,
 		    sc_if->msk_cdata.msk_jumbo_rx_ring_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	else
 		bus_dmamap_sync(
 		    sc_if->msk_cdata.msk_rx_ring_tag,
 		    sc_if->msk_cdata.msk_rx_ring_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	CSR_WRITE_2(sc, Y2_PREF_Q_ADDR(sc_if->msk_rxq,
 	    PREF_UNIT_PUT_IDX_REG), sc_if->msk_cdata.msk_rx_prod);
 }
 
 static int
 msk_handle_events(struct msk_softc *sc)
 {
 	struct msk_if_softc *sc_if;
 	int rxput[2];
 	struct msk_stat_desc *sd;
 	uint32_t control, status;
 	int cons, len, port, rxprog;
 
 	if (sc->msk_stat_cons == CSR_READ_2(sc, STAT_PUT_IDX))
 		return (0);
 
 	/* Sync status LEs. */
 	bus_dmamap_sync(sc->msk_stat_tag, sc->msk_stat_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	rxput[MSK_PORT_A] = rxput[MSK_PORT_B] = 0;
 	rxprog = 0;
 	cons = sc->msk_stat_cons;
 	for (;;) {
 		sd = &sc->msk_stat_ring[cons];
 		control = le32toh(sd->msk_control);
 		if ((control & HW_OWNER) == 0)
 			break;
 		control &= ~HW_OWNER;
 		sd->msk_control = htole32(control);
 		status = le32toh(sd->msk_status);
 		len = control & STLE_LEN_MASK;
 		port = (control >> 16) & 0x01;
 		sc_if = sc->msk_if[port];
 		if (sc_if == NULL) {
 			device_printf(sc->msk_dev, "invalid port opcode "
 			    "0x%08x\n", control & STLE_OP_MASK);
 			continue;
 		}
 
 		switch (control & STLE_OP_MASK) {
 		case OP_RXVLAN:
 			sc_if->msk_vtag = ntohs(len);
 			break;
 		case OP_RXCHKSVLAN:
 			sc_if->msk_vtag = ntohs(len);
 			/* FALLTHROUGH */
 		case OP_RXCHKS:
 			sc_if->msk_csum = status;
 			break;
 		case OP_RXSTAT:
 			if (!(sc_if->msk_ifp->if_drv_flags & IFF_DRV_RUNNING))
 				break;
 			if (sc_if->msk_framesize >
 			    (MCLBYTES - MSK_RX_BUF_ALIGN))
 				msk_jumbo_rxeof(sc_if, status, control, len);
 			else
 				msk_rxeof(sc_if, status, control, len);
 			rxprog++;
 			/*
 			 * Because there is no way to sync single Rx LE
 			 * put the DMA sync operation off until the end of
 			 * event processing.
 			 */
 			rxput[port]++;
 			/* Update prefetch unit if we've passed water mark. */
 			if (rxput[port] >= sc_if->msk_cdata.msk_rx_putwm) {
 				msk_rxput(sc_if);
 				rxput[port] = 0;
 			}
 			break;
 		case OP_TXINDEXLE:
 			if (sc->msk_if[MSK_PORT_A] != NULL)
 				msk_txeof(sc->msk_if[MSK_PORT_A],
 				    status & STLE_TXA1_MSKL);
 			if (sc->msk_if[MSK_PORT_B] != NULL)
 				msk_txeof(sc->msk_if[MSK_PORT_B],
 				    ((status & STLE_TXA2_MSKL) >>
 				    STLE_TXA2_SHIFTL) |
 				    ((len & STLE_TXA2_MSKH) <<
 				    STLE_TXA2_SHIFTH));
 			break;
 		default:
 			device_printf(sc->msk_dev, "unhandled opcode 0x%08x\n",
 			    control & STLE_OP_MASK);
 			break;
 		}
 		MSK_INC(cons, sc->msk_stat_count);
 		if (rxprog > sc->msk_process_limit)
 			break;
 	}
 
 	sc->msk_stat_cons = cons;
 	bus_dmamap_sync(sc->msk_stat_tag, sc->msk_stat_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	if (rxput[MSK_PORT_A] > 0)
 		msk_rxput(sc->msk_if[MSK_PORT_A]);
 	if (rxput[MSK_PORT_B] > 0)
 		msk_rxput(sc->msk_if[MSK_PORT_B]);
 
 	return (sc->msk_stat_cons != CSR_READ_2(sc, STAT_PUT_IDX));
 }
 
 static void
 msk_intr(void *xsc)
 {
 	struct msk_softc *sc;
 	struct msk_if_softc *sc_if0, *sc_if1;
 	struct ifnet *ifp0, *ifp1;
 	uint32_t status;
 	int domore;
 
 	sc = xsc;
 	MSK_LOCK(sc);
 
 	/* Reading B0_Y2_SP_ISRC2 masks further interrupts. */
 	status = CSR_READ_4(sc, B0_Y2_SP_ISRC2);
 	if (status == 0 || status == 0xffffffff ||
 	    (sc->msk_pflags & MSK_FLAG_SUSPEND) != 0 ||
 	    (status & sc->msk_intrmask) == 0) {
 		CSR_WRITE_4(sc, B0_Y2_SP_ICR, 2);
 		MSK_UNLOCK(sc);
 		return;
 	}
 
 	sc_if0 = sc->msk_if[MSK_PORT_A];
 	sc_if1 = sc->msk_if[MSK_PORT_B];
 	ifp0 = ifp1 = NULL;
 	if (sc_if0 != NULL)
 		ifp0 = sc_if0->msk_ifp;
 	if (sc_if1 != NULL)
 		ifp1 = sc_if1->msk_ifp;
 
 	if ((status & Y2_IS_IRQ_PHY1) != 0 && sc_if0 != NULL)
 		msk_intr_phy(sc_if0);
 	if ((status & Y2_IS_IRQ_PHY2) != 0 && sc_if1 != NULL)
 		msk_intr_phy(sc_if1);
 	if ((status & Y2_IS_IRQ_MAC1) != 0 && sc_if0 != NULL)
 		msk_intr_gmac(sc_if0);
 	if ((status & Y2_IS_IRQ_MAC2) != 0 && sc_if1 != NULL)
 		msk_intr_gmac(sc_if1);
 	if ((status & (Y2_IS_CHK_RX1 | Y2_IS_CHK_RX2)) != 0) {
 		device_printf(sc->msk_dev, "Rx descriptor error\n");
 		sc->msk_intrmask &= ~(Y2_IS_CHK_RX1 | Y2_IS_CHK_RX2);
 		CSR_WRITE_4(sc, B0_IMSK, sc->msk_intrmask);
 		CSR_READ_4(sc, B0_IMSK);
 	}
         if ((status & (Y2_IS_CHK_TXA1 | Y2_IS_CHK_TXA2)) != 0) {
 		device_printf(sc->msk_dev, "Tx descriptor error\n");
 		sc->msk_intrmask &= ~(Y2_IS_CHK_TXA1 | Y2_IS_CHK_TXA2);
 		CSR_WRITE_4(sc, B0_IMSK, sc->msk_intrmask);
 		CSR_READ_4(sc, B0_IMSK);
 	}
 	if ((status & Y2_IS_HW_ERR) != 0)
 		msk_intr_hwerr(sc);
 
 	domore = msk_handle_events(sc);
 	if ((status & Y2_IS_STAT_BMU) != 0 && domore == 0)
 		CSR_WRITE_4(sc, STAT_CTRL, SC_STAT_CLR_IRQ);
 
 	/* Reenable interrupts. */
 	CSR_WRITE_4(sc, B0_Y2_SP_ICR, 2);
 
 	if (ifp0 != NULL && (ifp0->if_drv_flags & IFF_DRV_RUNNING) != 0 &&
 	    !IFQ_DRV_IS_EMPTY(&ifp0->if_snd))
 		msk_start_locked(ifp0);
 	if (ifp1 != NULL && (ifp1->if_drv_flags & IFF_DRV_RUNNING) != 0 &&
 	    !IFQ_DRV_IS_EMPTY(&ifp1->if_snd))
 		msk_start_locked(ifp1);
 
 	MSK_UNLOCK(sc);
 }
 
 static void
 msk_set_tx_stfwd(struct msk_if_softc *sc_if)
 {
 	struct msk_softc *sc;
 	struct ifnet *ifp;
 
 	ifp = sc_if->msk_ifp;
 	sc = sc_if->msk_softc;
 	if ((sc->msk_hw_id == CHIP_ID_YUKON_EX &&
 	    sc->msk_hw_rev != CHIP_REV_YU_EX_A0) ||
 	    sc->msk_hw_id >= CHIP_ID_YUKON_SUPR) {
 		CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T),
 		    TX_STFW_ENA);
 	} else {
 		if (ifp->if_mtu > ETHERMTU) {
 			/* Set Tx GMAC FIFO Almost Empty Threshold. */
 			CSR_WRITE_4(sc,
 			    MR_ADDR(sc_if->msk_port, TX_GMF_AE_THR),
 			    MSK_ECU_JUMBO_WM << 16 | MSK_ECU_AE_THR);
 			/* Disable Store & Forward mode for Tx. */
 			CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T),
 			    TX_STFW_DIS);
 		} else {
 			CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T),
 			    TX_STFW_ENA);
 		}
 	}
 }
 
 static void
 msk_init(void *xsc)
 {
 	struct msk_if_softc *sc_if = xsc;
 
 	MSK_IF_LOCK(sc_if);
 	msk_init_locked(sc_if);
 	MSK_IF_UNLOCK(sc_if);
 }
 
 static void
 msk_init_locked(struct msk_if_softc *sc_if)
 {
 	struct msk_softc *sc;
 	struct ifnet *ifp;
 	struct mii_data	 *mii;
 	uint8_t *eaddr;
 	uint16_t gmac;
 	uint32_t reg;
 	int error;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 
 	ifp = sc_if->msk_ifp;
 	sc = sc_if->msk_softc;
 	mii = device_get_softc(sc_if->msk_miibus);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 		return;
 
 	error = 0;
 	/* Cancel pending I/O and free all Rx/Tx buffers. */
 	msk_stop(sc_if);
 
 	if (ifp->if_mtu < ETHERMTU)
 		sc_if->msk_framesize = ETHERMTU;
 	else
 		sc_if->msk_framesize = ifp->if_mtu;
 	sc_if->msk_framesize += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 	if (ifp->if_mtu > ETHERMTU &&
 	    (sc_if->msk_flags & MSK_FLAG_JUMBO_NOCSUM) != 0) {
 		ifp->if_hwassist &= ~(MSK_CSUM_FEATURES | CSUM_TSO);
 		ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TXCSUM);
 	}
 
 	/* GMAC Control reset. */
 	CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_RST_SET);
 	CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_RST_CLR);
 	CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_F_LOOPB_OFF);
 	if (sc->msk_hw_id == CHIP_ID_YUKON_EX ||
 	    sc->msk_hw_id == CHIP_ID_YUKON_SUPR)
 		CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL),
 		    GMC_BYP_MACSECRX_ON | GMC_BYP_MACSECTX_ON |
 		    GMC_BYP_RETR_ON);
 
 	/*
 	 * Initialize GMAC first such that speed/duplex/flow-control
 	 * parameters are renegotiated when interface is brought up.
 	 */
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_GP_CTRL, 0);
 
 	/* Dummy read the Interrupt Source Register. */
 	CSR_READ_1(sc, MR_ADDR(sc_if->msk_port, GMAC_IRQ_SRC));
 
 	/* Clear MIB stats. */
 	msk_stats_clear(sc_if);
 
 	/* Disable FCS. */
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_RX_CTRL, GM_RXCR_CRC_DIS);
 
 	/* Setup Transmit Control Register. */
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_TX_CTRL, TX_COL_THR(TX_COL_DEF));
 
 	/* Setup Transmit Flow Control Register. */
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_TX_FLOW_CTRL, 0xffff);
 
 	/* Setup Transmit Parameter Register. */
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_TX_PARAM,
 	    TX_JAM_LEN_VAL(TX_JAM_LEN_DEF) | TX_JAM_IPG_VAL(TX_JAM_IPG_DEF) |
 	    TX_IPG_JAM_DATA(TX_IPG_JAM_DEF) | TX_BACK_OFF_LIM(TX_BOF_LIM_DEF));
 
 	gmac = DATA_BLIND_VAL(DATA_BLIND_DEF) |
 	    GM_SMOD_VLAN_ENA | IPG_DATA_VAL(IPG_DATA_DEF);
 
 	if (ifp->if_mtu > ETHERMTU)
 		gmac |= GM_SMOD_JUMBO_ENA;
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_SERIAL_MODE, gmac);
 
 	/* Set station address. */
 	eaddr = IF_LLADDR(ifp);
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_1L,
 	    eaddr[0] | (eaddr[1] << 8));
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_1M,
 	    eaddr[2] | (eaddr[3] << 8));
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_1H,
 	    eaddr[4] | (eaddr[5] << 8));
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_2L,
 	    eaddr[0] | (eaddr[1] << 8));
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_2M,
 	    eaddr[2] | (eaddr[3] << 8));
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_2H,
 	    eaddr[4] | (eaddr[5] << 8));
 
 	/* Disable interrupts for counter overflows. */
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_TX_IRQ_MSK, 0);
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_RX_IRQ_MSK, 0);
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_TR_IRQ_MSK, 0);
 
 	/* Configure Rx MAC FIFO. */
 	CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), GMF_RST_SET);
 	CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), GMF_RST_CLR);
 	reg = GMF_OPER_ON | GMF_RX_F_FL_ON;
 	if (sc->msk_hw_id == CHIP_ID_YUKON_FE_P ||
 	    sc->msk_hw_id == CHIP_ID_YUKON_EX)
 		reg |= GMF_RX_OVER_ON;
 	CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), reg);
 
 	/* Set receive filter. */
 	msk_rxfilter(sc_if);
 
 	if (sc->msk_hw_id == CHIP_ID_YUKON_XL) {
 		/* Clear flush mask - HW bug. */
 		CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_FL_MSK), 0);
 	} else {
 		/* Flush Rx MAC FIFO on any flow control or error. */
 		CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_FL_MSK),
 		    GMR_FS_ANY_ERR);
 	}
 
 	/*
 	 * Set Rx FIFO flush threshold to 64 bytes + 1 FIFO word
 	 * due to hardware hang on receipt of pause frames.
 	 */
 	reg = RX_GMF_FL_THR_DEF + 1;
 	/* Another magic for Yukon FE+ - From Linux. */
 	if (sc->msk_hw_id == CHIP_ID_YUKON_FE_P &&
 	    sc->msk_hw_rev == CHIP_REV_YU_FE_P_A0)
 		reg = 0x178;
 	CSR_WRITE_2(sc, MR_ADDR(sc_if->msk_port, RX_GMF_FL_THR), reg);
 
 	/* Configure Tx MAC FIFO. */
 	CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_RST_SET);
 	CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_RST_CLR);
 	CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_OPER_ON);
 
 	/* Configure hardware VLAN tag insertion/stripping. */
 	msk_setvlan(sc_if, ifp);
 
 	if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0) {
 		/* Set Rx Pause threshold. */
 		CSR_WRITE_2(sc, MR_ADDR(sc_if->msk_port, RX_GMF_LP_THR),
 		    MSK_ECU_LLPP);
 		CSR_WRITE_2(sc, MR_ADDR(sc_if->msk_port, RX_GMF_UP_THR),
 		    MSK_ECU_ULPP);
 		/* Configure store-and-forward for Tx. */
 		msk_set_tx_stfwd(sc_if);
 	}
 
 	if (sc->msk_hw_id == CHIP_ID_YUKON_FE_P &&
 	    sc->msk_hw_rev == CHIP_REV_YU_FE_P_A0) {
 		/* Disable dynamic watermark - from Linux. */
 		reg = CSR_READ_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_EA));
 		reg &= ~0x03;
 		CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_EA), reg);
 	}
 
 	/*
 	 * Disable Force Sync bit and Alloc bit in Tx RAM interface
 	 * arbiter as we don't use Sync Tx queue.
 	 */
 	CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, TXA_CTRL),
 	    TXA_DIS_FSYNC | TXA_DIS_ALLOC | TXA_STOP_RC);
 	/* Enable the RAM Interface Arbiter. */
 	CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, TXA_CTRL), TXA_ENA_ARB);
 
 	/* Setup RAM buffer. */
 	msk_set_rambuffer(sc_if);
 
 	/* Disable Tx sync Queue. */
 	CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txsq, RB_CTRL), RB_RST_SET);
 
 	/* Setup Tx Queue Bus Memory Interface. */
 	CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_CLR_RESET);
 	CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_OPER_INIT);
 	CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_FIFO_OP_ON);
 	CSR_WRITE_2(sc, Q_ADDR(sc_if->msk_txq, Q_WM), MSK_BMU_TX_WM);
 	switch (sc->msk_hw_id) {
 	case CHIP_ID_YUKON_EC_U:
 		if (sc->msk_hw_rev == CHIP_REV_YU_EC_U_A0) {
 			/* Fix for Yukon-EC Ultra: set BMU FIFO level */
 			CSR_WRITE_2(sc, Q_ADDR(sc_if->msk_txq, Q_AL),
 			    MSK_ECU_TXFF_LEV);
 		}
 		break;
 	case CHIP_ID_YUKON_EX:
 		/*
 		 * Yukon Extreme seems to have silicon bug for
 		 * automatic Tx checksum calculation capability.
 		 */
 		if (sc->msk_hw_rev == CHIP_REV_YU_EX_B0)
 			CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_F),
 			    F_TX_CHK_AUTO_OFF);
 		break;
 	}
 
 	/* Setup Rx Queue Bus Memory Interface. */
 	CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), BMU_CLR_RESET);
 	CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), BMU_OPER_INIT);
 	CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), BMU_FIFO_OP_ON);
 	CSR_WRITE_2(sc, Q_ADDR(sc_if->msk_rxq, Q_WM), MSK_BMU_RX_WM);
         if (sc->msk_hw_id == CHIP_ID_YUKON_EC_U &&
 	    sc->msk_hw_rev >= CHIP_REV_YU_EC_U_A1) {
 		/* MAC Rx RAM Read is controlled by hardware. */
                 CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_F), F_M_RX_RAM_DIS);
 	}
 
 	msk_set_prefetch(sc, sc_if->msk_txq,
 	    sc_if->msk_rdata.msk_tx_ring_paddr, MSK_TX_RING_CNT - 1);
 	msk_init_tx_ring(sc_if);
 
 	/* Disable Rx checksum offload and RSS hash. */
 	reg = BMU_DIS_RX_RSS_HASH;
 	if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 &&
 	    (ifp->if_capenable & IFCAP_RXCSUM) != 0)
 		reg |= BMU_ENA_RX_CHKSUM;
 	else
 		reg |= BMU_DIS_RX_CHKSUM;
 	CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), reg);
 	if (sc_if->msk_framesize > (MCLBYTES - MSK_RX_BUF_ALIGN)) {
 		msk_set_prefetch(sc, sc_if->msk_rxq,
 		    sc_if->msk_rdata.msk_jumbo_rx_ring_paddr,
 		    MSK_JUMBO_RX_RING_CNT - 1);
 		error = msk_init_jumbo_rx_ring(sc_if);
 	 } else {
 		msk_set_prefetch(sc, sc_if->msk_rxq,
 		    sc_if->msk_rdata.msk_rx_ring_paddr,
 		    MSK_RX_RING_CNT - 1);
 		error = msk_init_rx_ring(sc_if);
 	}
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "initialization failed: no memory for Rx buffers\n");
 		msk_stop(sc_if);
 		return;
 	}
 	if (sc->msk_hw_id == CHIP_ID_YUKON_EX ||
 	    sc->msk_hw_id == CHIP_ID_YUKON_SUPR) {
 		/* Disable flushing of non-ASF packets. */
 		CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T),
 		    GMF_RX_MACSEC_FLUSH_OFF);
 	}
 
 	/* Configure interrupt handling. */
 	if (sc_if->msk_port == MSK_PORT_A) {
 		sc->msk_intrmask |= Y2_IS_PORT_A;
 		sc->msk_intrhwemask |= Y2_HWE_L1_MASK;
 	} else {
 		sc->msk_intrmask |= Y2_IS_PORT_B;
 		sc->msk_intrhwemask |= Y2_HWE_L2_MASK;
 	}
 	/* Configure IRQ moderation mask. */
 	CSR_WRITE_4(sc, B2_IRQM_MSK, sc->msk_intrmask);
 	if (sc->msk_int_holdoff > 0) {
 		/* Configure initial IRQ moderation timer value. */
 		CSR_WRITE_4(sc, B2_IRQM_INI,
 		    MSK_USECS(sc, sc->msk_int_holdoff));
 		CSR_WRITE_4(sc, B2_IRQM_VAL,
 		    MSK_USECS(sc, sc->msk_int_holdoff));
 		/* Start IRQ moderation. */
 		CSR_WRITE_1(sc, B2_IRQM_CTRL, TIM_START);
 	}
 	CSR_WRITE_4(sc, B0_HWE_IMSK, sc->msk_intrhwemask);
 	CSR_READ_4(sc, B0_HWE_IMSK);
 	CSR_WRITE_4(sc, B0_IMSK, sc->msk_intrmask);
 	CSR_READ_4(sc, B0_IMSK);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	sc_if->msk_flags &= ~MSK_FLAG_LINK;
 	mii_mediachg(mii);
 
 	callout_reset(&sc_if->msk_tick_ch, hz, msk_tick, sc_if);
 }
 
 static void
 msk_set_rambuffer(struct msk_if_softc *sc_if)
 {
 	struct msk_softc *sc;
 	int ltpp, utpp;
 
 	sc = sc_if->msk_softc;
 	if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0)
 		return;
 
 	/* Setup Rx Queue. */
 	CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL), RB_RST_CLR);
 	CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_START),
 	    sc->msk_rxqstart[sc_if->msk_port] / 8);
 	CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_END),
 	    sc->msk_rxqend[sc_if->msk_port] / 8);
 	CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_WP),
 	    sc->msk_rxqstart[sc_if->msk_port] / 8);
 	CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_RP),
 	    sc->msk_rxqstart[sc_if->msk_port] / 8);
 
 	utpp = (sc->msk_rxqend[sc_if->msk_port] + 1 -
 	    sc->msk_rxqstart[sc_if->msk_port] - MSK_RB_ULPP) / 8;
 	ltpp = (sc->msk_rxqend[sc_if->msk_port] + 1 -
 	    sc->msk_rxqstart[sc_if->msk_port] - MSK_RB_LLPP_B) / 8;
 	if (sc->msk_rxqsize < MSK_MIN_RXQ_SIZE)
 		ltpp += (MSK_RB_LLPP_B - MSK_RB_LLPP_S) / 8;
 	CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_RX_UTPP), utpp);
 	CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_RX_LTPP), ltpp);
 	/* Set Rx priority(RB_RX_UTHP/RB_RX_LTHP) thresholds? */
 
 	CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL), RB_ENA_OP_MD);
 	CSR_READ_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL));
 
 	/* Setup Tx Queue. */
 	CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL), RB_RST_CLR);
 	CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_txq, RB_START),
 	    sc->msk_txqstart[sc_if->msk_port] / 8);
 	CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_txq, RB_END),
 	    sc->msk_txqend[sc_if->msk_port] / 8);
 	CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_txq, RB_WP),
 	    sc->msk_txqstart[sc_if->msk_port] / 8);
 	CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_txq, RB_RP),
 	    sc->msk_txqstart[sc_if->msk_port] / 8);
 	/* Enable Store & Forward for Tx side. */
 	CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL), RB_ENA_STFWD);
 	CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL), RB_ENA_OP_MD);
 	CSR_READ_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL));
 }
 
 static void
 msk_set_prefetch(struct msk_softc *sc, int qaddr, bus_addr_t addr,
     uint32_t count)
 {
 
 	/* Reset the prefetch unit. */
 	CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_CTRL_REG),
 	    PREF_UNIT_RST_SET);
 	CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_CTRL_REG),
 	    PREF_UNIT_RST_CLR);
 	/* Set LE base address. */
 	CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_ADDR_LOW_REG),
 	    MSK_ADDR_LO(addr));
 	CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_ADDR_HI_REG),
 	    MSK_ADDR_HI(addr));
 	/* Set the list last index. */
 	CSR_WRITE_2(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_LAST_IDX_REG),
 	    count);
 	/* Turn on prefetch unit. */
 	CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_CTRL_REG),
 	    PREF_UNIT_OP_ON);
 	/* Dummy read to ensure write. */
 	CSR_READ_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_CTRL_REG));
 }
 
 static void
 msk_stop(struct msk_if_softc *sc_if)
 {
 	struct msk_softc *sc;
 	struct msk_txdesc *txd;
 	struct msk_rxdesc *rxd;
 	struct msk_rxdesc *jrxd;
 	struct ifnet *ifp;
 	uint32_t val;
 	int i;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 	sc = sc_if->msk_softc;
 	ifp = sc_if->msk_ifp;
 
 	callout_stop(&sc_if->msk_tick_ch);
 	sc_if->msk_watchdog_timer = 0;
 
 	/* Disable interrupts. */
 	if (sc_if->msk_port == MSK_PORT_A) {
 		sc->msk_intrmask &= ~Y2_IS_PORT_A;
 		sc->msk_intrhwemask &= ~Y2_HWE_L1_MASK;
 	} else {
 		sc->msk_intrmask &= ~Y2_IS_PORT_B;
 		sc->msk_intrhwemask &= ~Y2_HWE_L2_MASK;
 	}
 	CSR_WRITE_4(sc, B0_HWE_IMSK, sc->msk_intrhwemask);
 	CSR_READ_4(sc, B0_HWE_IMSK);
 	CSR_WRITE_4(sc, B0_IMSK, sc->msk_intrmask);
 	CSR_READ_4(sc, B0_IMSK);
 
 	/* Disable Tx/Rx MAC. */
 	val = GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL);
 	val &= ~(GM_GPCR_RX_ENA | GM_GPCR_TX_ENA);
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_GP_CTRL, val);
 	/* Read again to ensure writing. */
 	GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL);
 	/* Update stats and clear counters. */
 	msk_stats_update(sc_if);
 
 	/* Stop Tx BMU. */
 	CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_STOP);
 	val = CSR_READ_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR));
 	for (i = 0; i < MSK_TIMEOUT; i++) {
 		if ((val & (BMU_STOP | BMU_IDLE)) == 0) {
 			CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR),
 			    BMU_STOP);
 			val = CSR_READ_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR));
 		} else
 			break;
 		DELAY(1);
 	}
 	if (i == MSK_TIMEOUT)
 		device_printf(sc_if->msk_if_dev, "Tx BMU stop failed\n");
 	CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL),
 	    RB_RST_SET | RB_DIS_OP_MD);
 
 	/* Disable all GMAC interrupt. */
 	CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, GMAC_IRQ_MSK), 0);
 	/* Disable PHY interrupt. */
 	msk_phy_writereg(sc_if, PHY_ADDR_MARV, PHY_MARV_INT_MASK, 0);
 
 	/* Disable the RAM Interface Arbiter. */
 	CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, TXA_CTRL), TXA_DIS_ARB);
 
 	/* Reset the PCI FIFO of the async Tx queue */
 	CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR),
 	    BMU_RST_SET | BMU_FIFO_RST);
 
 	/* Reset the Tx prefetch units. */
 	CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(sc_if->msk_txq, PREF_UNIT_CTRL_REG),
 	    PREF_UNIT_RST_SET);
 
 	/* Reset the RAM Buffer async Tx queue. */
 	CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL), RB_RST_SET);
 
 	/* Reset Tx MAC FIFO. */
 	CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_RST_SET);
 	/* Set Pause Off. */
 	CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_PAUSE_OFF);
 
 	/*
 	 * The Rx Stop command will not work for Yukon-2 if the BMU does not
 	 * reach the end of packet and since we can't make sure that we have
 	 * incoming data, we must reset the BMU while it is not during a DMA
 	 * transfer. Since it is possible that the Rx path is still active,
 	 * the Rx RAM buffer will be stopped first, so any possible incoming
 	 * data will not trigger a DMA. After the RAM buffer is stopped, the
 	 * BMU is polled until any DMA in progress is ended and only then it
 	 * will be reset.
 	 */
 
 	/* Disable the RAM Buffer receive queue. */
 	CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL), RB_DIS_OP_MD);
 	for (i = 0; i < MSK_TIMEOUT; i++) {
 		if (CSR_READ_1(sc, RB_ADDR(sc_if->msk_rxq, Q_RSL)) ==
 		    CSR_READ_1(sc, RB_ADDR(sc_if->msk_rxq, Q_RL)))
 			break;
 		DELAY(1);
 	}
 	if (i == MSK_TIMEOUT)
 		device_printf(sc_if->msk_if_dev, "Rx BMU stop failed\n");
 	CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR),
 	    BMU_RST_SET | BMU_FIFO_RST);
 	/* Reset the Rx prefetch unit. */
 	CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_CTRL_REG),
 	    PREF_UNIT_RST_SET);
 	/* Reset the RAM Buffer receive queue. */
 	CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL), RB_RST_SET);
 	/* Reset Rx MAC FIFO. */
 	CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), GMF_RST_SET);
 
 	/* Free Rx and Tx mbufs still in the queues. */
 	for (i = 0; i < MSK_RX_RING_CNT; i++) {
 		rxd = &sc_if->msk_cdata.msk_rxdesc[i];
 		if (rxd->rx_m != NULL) {
 			bus_dmamap_sync(sc_if->msk_cdata.msk_rx_tag,
 			    rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(sc_if->msk_cdata.msk_rx_tag,
 			    rxd->rx_dmamap);
 			m_freem(rxd->rx_m);
 			rxd->rx_m = NULL;
 		}
 	}
 	for (i = 0; i < MSK_JUMBO_RX_RING_CNT; i++) {
 		jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[i];
 		if (jrxd->rx_m != NULL) {
 			bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_tag,
 			    jrxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(sc_if->msk_cdata.msk_jumbo_rx_tag,
 			    jrxd->rx_dmamap);
 			m_freem(jrxd->rx_m);
 			jrxd->rx_m = NULL;
 		}
 	}
 	for (i = 0; i < MSK_TX_RING_CNT; i++) {
 		txd = &sc_if->msk_cdata.msk_txdesc[i];
 		if (txd->tx_m != NULL) {
 			bus_dmamap_sync(sc_if->msk_cdata.msk_tx_tag,
 			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(sc_if->msk_cdata.msk_tx_tag,
 			    txd->tx_dmamap);
 			m_freem(txd->tx_m);
 			txd->tx_m = NULL;
 		}
 	}
 
 	/*
 	 * Mark the interface down.
 	 */
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 	sc_if->msk_flags &= ~MSK_FLAG_LINK;
 }
 
 /*
  * When GM_PAR_MIB_CLR bit of GM_PHY_ADDR is set, reading lower
  * counter clears high 16 bits of the counter such that accessing
  * lower 16 bits should be the last operation.
  */
 #define	MSK_READ_MIB32(x, y)					\
 	(((uint32_t)GMAC_READ_2(sc, x, (y) + 4)) << 16) +	\
 	(uint32_t)GMAC_READ_2(sc, x, y)
 #define	MSK_READ_MIB64(x, y)					\
 	(((uint64_t)MSK_READ_MIB32(x, (y) + 8)) << 32) +	\
 	(uint64_t)MSK_READ_MIB32(x, y)
 
 static void
 msk_stats_clear(struct msk_if_softc *sc_if)
 {
 	struct msk_softc *sc;
 	uint32_t reg;
 	uint16_t gmac;
 	int i;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 
 	sc = sc_if->msk_softc;
 	/* Set MIB Clear Counter Mode. */
 	gmac = GMAC_READ_2(sc, sc_if->msk_port, GM_PHY_ADDR);
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_PHY_ADDR, gmac | GM_PAR_MIB_CLR);
 	/* Read all MIB Counters with Clear Mode set. */
 	for (i = GM_RXF_UC_OK; i <= GM_TXE_FIFO_UR; i += sizeof(uint32_t))
 		reg = MSK_READ_MIB32(sc_if->msk_port, i);
 	/* Clear MIB Clear Counter Mode. */
 	gmac &= ~GM_PAR_MIB_CLR;
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_PHY_ADDR, gmac);
 }
 
 static void
 msk_stats_update(struct msk_if_softc *sc_if)
 {
 	struct msk_softc *sc;
 	struct ifnet *ifp;
 	struct msk_hw_stats *stats;
 	uint16_t gmac;
 	uint32_t reg;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 
 	ifp = sc_if->msk_ifp;
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 	sc = sc_if->msk_softc;
 	stats = &sc_if->msk_stats;
 	/* Set MIB Clear Counter Mode. */
 	gmac = GMAC_READ_2(sc, sc_if->msk_port, GM_PHY_ADDR);
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_PHY_ADDR, gmac | GM_PAR_MIB_CLR);
 
 	/* Rx stats. */
 	stats->rx_ucast_frames +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXF_UC_OK);
 	stats->rx_bcast_frames +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXF_BC_OK);
 	stats->rx_pause_frames +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXF_MPAUSE);
 	stats->rx_mcast_frames +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXF_MC_OK);
 	stats->rx_crc_errs +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXF_FCS_ERR);
 	reg = MSK_READ_MIB32(sc_if->msk_port, GM_RXF_SPARE1);
 	stats->rx_good_octets +=
 	    MSK_READ_MIB64(sc_if->msk_port, GM_RXO_OK_LO);
 	stats->rx_bad_octets +=
 	    MSK_READ_MIB64(sc_if->msk_port, GM_RXO_ERR_LO);
 	stats->rx_runts +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXF_SHT);
 	stats->rx_runt_errs +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXE_FRAG);
 	stats->rx_pkts_64 +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXF_64B);
 	stats->rx_pkts_65_127 +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXF_127B);
 	stats->rx_pkts_128_255 +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXF_255B);
 	stats->rx_pkts_256_511 +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXF_511B);
 	stats->rx_pkts_512_1023 +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXF_1023B);
 	stats->rx_pkts_1024_1518 +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXF_1518B);
 	stats->rx_pkts_1519_max +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXF_MAX_SZ);
 	stats->rx_pkts_too_long +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXF_LNG_ERR);
 	stats->rx_pkts_jabbers +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXF_JAB_PKT);
 	reg = MSK_READ_MIB32(sc_if->msk_port, GM_RXF_SPARE2);
 	stats->rx_fifo_oflows +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_RXE_FIFO_OV);
 	reg = MSK_READ_MIB32(sc_if->msk_port, GM_RXF_SPARE3);
 
 	/* Tx stats. */
 	stats->tx_ucast_frames +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_UC_OK);
 	stats->tx_bcast_frames +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_BC_OK);
 	stats->tx_pause_frames +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_MPAUSE);
 	stats->tx_mcast_frames +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_MC_OK);
 	stats->tx_octets +=
 	    MSK_READ_MIB64(sc_if->msk_port, GM_TXO_OK_LO);
 	stats->tx_pkts_64 +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_64B);
 	stats->tx_pkts_65_127 +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_127B);
 	stats->tx_pkts_128_255 +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_255B);
 	stats->tx_pkts_256_511 +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_511B);
 	stats->tx_pkts_512_1023 +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_1023B);
 	stats->tx_pkts_1024_1518 +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_1518B);
 	stats->tx_pkts_1519_max +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_MAX_SZ);
 	reg = MSK_READ_MIB32(sc_if->msk_port, GM_TXF_SPARE1);
 	stats->tx_colls +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_COL);
 	stats->tx_late_colls +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_LAT_COL);
 	stats->tx_excess_colls +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_ABO_COL);
 	stats->tx_multi_colls +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_MUL_COL);
 	stats->tx_single_colls +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXF_SNG_COL);
 	stats->tx_underflows +=
 	    MSK_READ_MIB32(sc_if->msk_port, GM_TXE_FIFO_UR);
 	/* Clear MIB Clear Counter Mode. */
 	gmac &= ~GM_PAR_MIB_CLR;
 	GMAC_WRITE_2(sc, sc_if->msk_port, GM_PHY_ADDR, gmac);
 }
 
 static int
 msk_sysctl_stat32(SYSCTL_HANDLER_ARGS)
 {
 	struct msk_softc *sc;
 	struct msk_if_softc *sc_if;
 	uint32_t result, *stat;
 	int off;
 
 	sc_if = (struct msk_if_softc *)arg1;
 	sc = sc_if->msk_softc;
 	off = arg2;
 	stat = (uint32_t *)((uint8_t *)&sc_if->msk_stats + off);
 
 	MSK_IF_LOCK(sc_if);
 	result = MSK_READ_MIB32(sc_if->msk_port, GM_MIB_CNT_BASE + off * 2);
 	result += *stat;
 	MSK_IF_UNLOCK(sc_if);
 
 	return (sysctl_handle_int(oidp, &result, 0, req));
 }
 
 static int
 msk_sysctl_stat64(SYSCTL_HANDLER_ARGS)
 {
 	struct msk_softc *sc;
 	struct msk_if_softc *sc_if;
 	uint64_t result, *stat;
 	int off;
 
 	sc_if = (struct msk_if_softc *)arg1;
 	sc = sc_if->msk_softc;
 	off = arg2;
 	stat = (uint64_t *)((uint8_t *)&sc_if->msk_stats + off);
 
 	MSK_IF_LOCK(sc_if);
 	result = MSK_READ_MIB64(sc_if->msk_port, GM_MIB_CNT_BASE + off * 2);
 	result += *stat;
 	MSK_IF_UNLOCK(sc_if);
 
 	return (sysctl_handle_64(oidp, &result, 0, req));
 }
 
 #undef MSK_READ_MIB32
 #undef MSK_READ_MIB64
 
 #define MSK_SYSCTL_STAT32(sc, c, o, p, n, d) 				\
 	SYSCTL_ADD_PROC(c, p, OID_AUTO, o, CTLTYPE_UINT | CTLFLAG_RD, 	\
 	    sc, offsetof(struct msk_hw_stats, n), msk_sysctl_stat32,	\
 	    "IU", d)
 #define MSK_SYSCTL_STAT64(sc, c, o, p, n, d) 				\
 	SYSCTL_ADD_PROC(c, p, OID_AUTO, o, CTLTYPE_U64 | CTLFLAG_RD, 	\
 	    sc, offsetof(struct msk_hw_stats, n), msk_sysctl_stat64,	\
 	    "QU", d)
 
 static void
 msk_sysctl_node(struct msk_if_softc *sc_if)
 {
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid_list *child, *schild;
 	struct sysctl_oid *tree;
 
 	ctx = device_get_sysctl_ctx(sc_if->msk_if_dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc_if->msk_if_dev));
 
 	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "stats", CTLFLAG_RD,
 	    NULL, "MSK Statistics");
 	schild = SYSCTL_CHILDREN(tree);
 	tree = SYSCTL_ADD_NODE(ctx, schild, OID_AUTO, "rx", CTLFLAG_RD,
 	    NULL, "MSK RX Statistics");
 	child = SYSCTL_CHILDREN(tree);
 	MSK_SYSCTL_STAT32(sc_if, ctx, "ucast_frames",
 	    child, rx_ucast_frames, "Good unicast frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "bcast_frames",
 	    child, rx_bcast_frames, "Good broadcast frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "pause_frames",
 	    child, rx_pause_frames, "Pause frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "mcast_frames",
 	    child, rx_mcast_frames, "Multicast frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "crc_errs",
 	    child, rx_crc_errs, "CRC errors");
 	MSK_SYSCTL_STAT64(sc_if, ctx, "good_octets",
 	    child, rx_good_octets, "Good octets");
 	MSK_SYSCTL_STAT64(sc_if, ctx, "bad_octets",
 	    child, rx_bad_octets, "Bad octets");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "frames_64",
 	    child, rx_pkts_64, "64 bytes frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "frames_65_127",
 	    child, rx_pkts_65_127, "65 to 127 bytes frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "frames_128_255",
 	    child, rx_pkts_128_255, "128 to 255 bytes frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "frames_256_511",
 	    child, rx_pkts_256_511, "256 to 511 bytes frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "frames_512_1023",
 	    child, rx_pkts_512_1023, "512 to 1023 bytes frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "frames_1024_1518",
 	    child, rx_pkts_1024_1518, "1024 to 1518 bytes frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "frames_1519_max",
 	    child, rx_pkts_1519_max, "1519 to max frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "frames_too_long",
 	    child, rx_pkts_too_long, "frames too long");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "jabbers",
 	    child, rx_pkts_jabbers, "Jabber errors");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "overflows",
 	    child, rx_fifo_oflows, "FIFO overflows");
 
 	tree = SYSCTL_ADD_NODE(ctx, schild, OID_AUTO, "tx", CTLFLAG_RD,
 	    NULL, "MSK TX Statistics");
 	child = SYSCTL_CHILDREN(tree);
 	MSK_SYSCTL_STAT32(sc_if, ctx, "ucast_frames",
 	    child, tx_ucast_frames, "Unicast frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "bcast_frames",
 	    child, tx_bcast_frames, "Broadcast frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "pause_frames",
 	    child, tx_pause_frames, "Pause frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "mcast_frames",
 	    child, tx_mcast_frames, "Multicast frames");
 	MSK_SYSCTL_STAT64(sc_if, ctx, "octets",
 	    child, tx_octets, "Octets");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "frames_64",
 	    child, tx_pkts_64, "64 bytes frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "frames_65_127",
 	    child, tx_pkts_65_127, "65 to 127 bytes frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "frames_128_255",
 	    child, tx_pkts_128_255, "128 to 255 bytes frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "frames_256_511",
 	    child, tx_pkts_256_511, "256 to 511 bytes frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "frames_512_1023",
 	    child, tx_pkts_512_1023, "512 to 1023 bytes frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "frames_1024_1518",
 	    child, tx_pkts_1024_1518, "1024 to 1518 bytes frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "frames_1519_max",
 	    child, tx_pkts_1519_max, "1519 to max frames");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "colls",
 	    child, tx_colls, "Collisions");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "late_colls",
 	    child, tx_late_colls, "Late collisions");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "excess_colls",
 	    child, tx_excess_colls, "Excessive collisions");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "multi_colls",
 	    child, tx_multi_colls, "Multiple collisions");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "single_colls",
 	    child, tx_single_colls, "Single collisions");
 	MSK_SYSCTL_STAT32(sc_if, ctx, "underflows",
 	    child, tx_underflows, "FIFO underflows");
 }
 
 #undef MSK_SYSCTL_STAT32
 #undef MSK_SYSCTL_STAT64
 
 static int
 sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high)
 {
 	int error, value;
 
 	if (!arg1)
 		return (EINVAL);
 	value = *(int *)arg1;
 	error = sysctl_handle_int(oidp, &value, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (value < low || value > high)
 		return (EINVAL);
 	*(int *)arg1 = value;
 
 	return (0);
 }
 
 static int
 sysctl_hw_msk_proc_limit(SYSCTL_HANDLER_ARGS)
 {
 
 	return (sysctl_int_range(oidp, arg1, arg2, req, MSK_PROC_MIN,
 	    MSK_PROC_MAX));
 }
Index: projects/clang1000-import/sys/dev/netmap/netmap_mem2.c
===================================================================
--- projects/clang1000-import/sys/dev/netmap/netmap_mem2.c	(revision 357178)
+++ projects/clang1000-import/sys/dev/netmap/netmap_mem2.c	(revision 357179)
@@ -1,2857 +1,2858 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (C) 2012-2014 Matteo Landi
  * Copyright (C) 2012-2016 Luigi Rizzo
  * Copyright (C) 2012-2016 Giuseppe Lettieri
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *   1. Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *   2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
  *      documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifdef linux
 #include "bsd_glue.h"
 #endif /* linux */
 
 #ifdef __APPLE__
 #include "osx_glue.h"
 #endif /* __APPLE__ */
 
 #ifdef __FreeBSD__
 #include <sys/cdefs.h> /* prerequisite */
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>		/* MALLOC_DEFINE */
 #include <sys/proc.h>
 #include <vm/vm.h>	/* vtophys */
 #include <vm/pmap.h>	/* vtophys */
 #include <sys/socket.h> /* sockaddrs */
 #include <sys/selinfo.h>
 #include <sys/sysctl.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
 #include <machine/bus.h>	/* bus_dmamap_* */
 
 /* M_NETMAP only used in here */
 MALLOC_DECLARE(M_NETMAP);
 MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
 
 #endif /* __FreeBSD__ */
 
 #ifdef _WIN32
 #include <win_glue.h>
 #endif
 
 #include <net/netmap.h>
 #include <dev/netmap/netmap_kern.h>
 #include <net/netmap_virt.h>
 #include "netmap_mem2.h"
 
 #ifdef _WIN32_USE_SMALL_GENERIC_DEVICES_MEMORY
 #define NETMAP_BUF_MAX_NUM  8*4096      /* if too big takes too much time to allocate */
 #else
 #define NETMAP_BUF_MAX_NUM 20*4096*2	/* large machine */
 #endif
 
 #define NETMAP_POOL_MAX_NAMSZ	32
 
 
 enum {
 	NETMAP_IF_POOL   = 0,
 	NETMAP_RING_POOL,
 	NETMAP_BUF_POOL,
 	NETMAP_POOLS_NR
 };
 
 
 struct netmap_obj_params {
 	u_int size;
 	u_int num;
 
 	u_int last_size;
 	u_int last_num;
 };
 
 struct netmap_obj_pool {
 	char name[NETMAP_POOL_MAX_NAMSZ];	/* name of the allocator */
 
 	/* ---------------------------------------------------*/
 	/* these are only meaningful if the pool is finalized */
 	/* (see 'finalized' field in netmap_mem_d)            */
 	size_t memtotal;	/* actual total memory space */
 
 	struct lut_entry *lut;  /* virt,phys addresses, objtotal entries */
 	uint32_t *bitmap;       /* one bit per buffer, 1 means free */
 	uint32_t *invalid_bitmap;/* one bit per buffer, 1 means invalid */
 	uint32_t bitmap_slots;	/* number of uint32 entries in bitmap */
 
 	u_int objtotal;         /* actual total number of objects. */
 	u_int numclusters;	/* actual number of clusters */
 	u_int objfree;          /* number of free objects. */
 
 	int	alloc_done;	/* we have allocated the memory */
 	/* ---------------------------------------------------*/
 
 	/* limits */
 	u_int objminsize;	/* minimum object size */
 	u_int objmaxsize;	/* maximum object size */
 	u_int nummin;		/* minimum number of objects */
 	u_int nummax;		/* maximum number of objects */
 
 	/* these are changed only by config */
 	u_int _objtotal;	/* total number of objects */
 	u_int _objsize;		/* object size */
 	u_int _clustsize;       /* cluster size */
 	u_int _clustentries;    /* objects per cluster */
 	u_int _numclusters;	/* number of clusters */
 
 	/* requested values */
 	u_int r_objtotal;
 	u_int r_objsize;
 };
 
 #define NMA_LOCK_T		NM_MTX_T
 #define NMA_LOCK_INIT(n)	NM_MTX_INIT((n)->nm_mtx)
 #define NMA_LOCK_DESTROY(n)	NM_MTX_DESTROY((n)->nm_mtx)
 #define NMA_LOCK(n)		NM_MTX_LOCK((n)->nm_mtx)
 #define NMA_SPINLOCK(n)         NM_MTX_SPINLOCK((n)->nm_mtx)
 #define NMA_UNLOCK(n)		NM_MTX_UNLOCK((n)->nm_mtx)
 
 struct netmap_mem_ops {
 	int (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*);
 	int  (*nmd_get_info)(struct netmap_mem_d *, uint64_t *size,
 			u_int *memflags, uint16_t *id);
 
 	vm_paddr_t (*nmd_ofstophys)(struct netmap_mem_d *, vm_ooffset_t);
 	int (*nmd_config)(struct netmap_mem_d *);
 	int (*nmd_finalize)(struct netmap_mem_d *);
 	void (*nmd_deref)(struct netmap_mem_d *);
 	ssize_t  (*nmd_if_offset)(struct netmap_mem_d *, const void *vaddr);
 	void (*nmd_delete)(struct netmap_mem_d *);
 
 	struct netmap_if * (*nmd_if_new)(struct netmap_adapter *,
 					 struct netmap_priv_d *);
 	void (*nmd_if_delete)(struct netmap_adapter *, struct netmap_if *);
 	int  (*nmd_rings_create)(struct netmap_adapter *);
 	void (*nmd_rings_delete)(struct netmap_adapter *);
 };
 
 struct netmap_mem_d {
 	NMA_LOCK_T nm_mtx;  /* protect the allocator */
 	size_t nm_totalsize; /* shorthand */
 
 	u_int flags;
 #define NETMAP_MEM_FINALIZED	0x1	/* preallocation done */
 #define NETMAP_MEM_HIDDEN	0x8	/* beeing prepared */
 	int lasterr;		/* last error for curr config */
 	int active;		/* active users */
 	int refcount;
 	/* the three allocators */
 	struct netmap_obj_pool pools[NETMAP_POOLS_NR];
 
 	nm_memid_t nm_id;	/* allocator identifier */
 	int nm_grp;	/* iommu groupd id */
 
 	/* list of all existing allocators, sorted by nm_id */
 	struct netmap_mem_d *prev, *next;
 
 	struct netmap_mem_ops *ops;
 
 	struct netmap_obj_params params[NETMAP_POOLS_NR];
 
 #define NM_MEM_NAMESZ	16
 	char name[NM_MEM_NAMESZ];
 };
 
 int
 netmap_mem_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
 {
 	int rv;
 
 	NMA_LOCK(nmd);
 	rv = nmd->ops->nmd_get_lut(nmd, lut);
 	NMA_UNLOCK(nmd);
 
 	return rv;
 }
 
 int
 netmap_mem_get_info(struct netmap_mem_d *nmd, uint64_t *size,
 		u_int *memflags, nm_memid_t *memid)
 {
 	int rv;
 
 	NMA_LOCK(nmd);
 	rv = nmd->ops->nmd_get_info(nmd, size, memflags, memid);
 	NMA_UNLOCK(nmd);
 
 	return rv;
 }
 
 vm_paddr_t
 netmap_mem_ofstophys(struct netmap_mem_d *nmd, vm_ooffset_t off)
 {
 	vm_paddr_t pa;
 
 #if defined(__FreeBSD__)
 	/* This function is called by netmap_dev_pager_fault(), which holds a
 	 * non-sleepable lock since FreeBSD 12. Since we cannot sleep, we
 	 * spin on the trylock. */
 	NMA_SPINLOCK(nmd);
 #else
 	NMA_LOCK(nmd);
 #endif
 	pa = nmd->ops->nmd_ofstophys(nmd, off);
 	NMA_UNLOCK(nmd);
 
 	return pa;
 }
 
 static int
 netmap_mem_config(struct netmap_mem_d *nmd)
 {
 	if (nmd->active) {
 		/* already in use. Not fatal, but we
 		 * cannot change the configuration
 		 */
 		return 0;
 	}
 
 	return nmd->ops->nmd_config(nmd);
 }
 
 ssize_t
 netmap_mem_if_offset(struct netmap_mem_d *nmd, const void *off)
 {
 	ssize_t rv;
 
 	NMA_LOCK(nmd);
 	rv = nmd->ops->nmd_if_offset(nmd, off);
 	NMA_UNLOCK(nmd);
 
 	return rv;
 }
 
 static void
 netmap_mem_delete(struct netmap_mem_d *nmd)
 {
 	nmd->ops->nmd_delete(nmd);
 }
 
 struct netmap_if *
 netmap_mem_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
 {
 	struct netmap_if *nifp;
 	struct netmap_mem_d *nmd = na->nm_mem;
 
 	NMA_LOCK(nmd);
 	nifp = nmd->ops->nmd_if_new(na, priv);
 	NMA_UNLOCK(nmd);
 
 	return nifp;
 }
 
 void
 netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nif)
 {
 	struct netmap_mem_d *nmd = na->nm_mem;
 
 	NMA_LOCK(nmd);
 	nmd->ops->nmd_if_delete(na, nif);
 	NMA_UNLOCK(nmd);
 }
 
 int
 netmap_mem_rings_create(struct netmap_adapter *na)
 {
 	int rv;
 	struct netmap_mem_d *nmd = na->nm_mem;
 
 	NMA_LOCK(nmd);
 	rv = nmd->ops->nmd_rings_create(na);
 	NMA_UNLOCK(nmd);
 
 	return rv;
 }
 
 void
 netmap_mem_rings_delete(struct netmap_adapter *na)
 {
 	struct netmap_mem_d *nmd = na->nm_mem;
 
 	NMA_LOCK(nmd);
 	nmd->ops->nmd_rings_delete(na);
 	NMA_UNLOCK(nmd);
 }
 
 static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *);
 static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *);
 static int nm_mem_assign_group(struct netmap_mem_d *, struct device *);
 static void nm_mem_release_id(struct netmap_mem_d *);
 
 nm_memid_t
 netmap_mem_get_id(struct netmap_mem_d *nmd)
 {
 	return nmd->nm_id;
 }
 
 #ifdef NM_DEBUG_MEM_PUTGET
 #define NM_DBG_REFC(nmd, func, line)	\
 	nm_prinf("%d mem[%d] -> %d", line, (nmd)->nm_id, (nmd)->refcount);
 #else
 #define NM_DBG_REFC(nmd, func, line)
 #endif
 
 /* circular list of all existing allocators */
 static struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
 NM_MTX_T nm_mem_list_lock;
 
 struct netmap_mem_d *
 __netmap_mem_get(struct netmap_mem_d *nmd, const char *func, int line)
 {
 	NM_MTX_LOCK(nm_mem_list_lock);
 	nmd->refcount++;
 	NM_DBG_REFC(nmd, func, line);
 	NM_MTX_UNLOCK(nm_mem_list_lock);
 	return nmd;
 }
 
 void
 __netmap_mem_put(struct netmap_mem_d *nmd, const char *func, int line)
 {
 	int last;
 	NM_MTX_LOCK(nm_mem_list_lock);
 	last = (--nmd->refcount == 0);
 	if (last)
 		nm_mem_release_id(nmd);
 	NM_DBG_REFC(nmd, func, line);
 	NM_MTX_UNLOCK(nm_mem_list_lock);
 	if (last)
 		netmap_mem_delete(nmd);
 }
 
 int
 netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
 {
 	int lasterr = 0;
 	if (nm_mem_assign_group(nmd, na->pdev) < 0) {
 		return ENOMEM;
 	}
 
 	NMA_LOCK(nmd);
 
 	if (netmap_mem_config(nmd))
 		goto out;
 
 	nmd->active++;
 
 	nmd->lasterr = nmd->ops->nmd_finalize(nmd);
 
 	if (!nmd->lasterr && na->pdev) {
 		nmd->lasterr = netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na);
 	}
 
 out:
 	lasterr = nmd->lasterr;
 	NMA_UNLOCK(nmd);
 
 	if (lasterr)
 		netmap_mem_deref(nmd, na);
 
 	return lasterr;
 }
 
 static int
 nm_isset(uint32_t *bitmap, u_int i)
 {
 	return bitmap[ (i>>5) ] & ( 1U << (i & 31U) );
 }
 
 
 static int
 netmap_init_obj_allocator_bitmap(struct netmap_obj_pool *p)
 {
 	u_int n, j;
 
 	if (p->bitmap == NULL) {
 		/* Allocate the bitmap */
 		n = (p->objtotal + 31) / 32;
 		p->bitmap = nm_os_malloc(sizeof(p->bitmap[0]) * n);
 		if (p->bitmap == NULL) {
 			nm_prerr("Unable to create bitmap (%d entries) for allocator '%s'", (int)n,
 			    p->name);
 			return ENOMEM;
 		}
 		p->bitmap_slots = n;
 	} else {
 		memset(p->bitmap, 0, p->bitmap_slots * sizeof(p->bitmap[0]));
 	}
 
 	p->objfree = 0;
 	/*
 	 * Set all the bits in the bitmap that have
 	 * corresponding buffers to 1 to indicate they are
 	 * free.
 	 */
 	for (j = 0; j < p->objtotal; j++) {
 		if (p->invalid_bitmap && nm_isset(p->invalid_bitmap, j)) {
 			if (netmap_debug & NM_DEBUG_MEM)
 				nm_prinf("skipping %s %d", p->name, j);
 			continue;
 		}
 		p->bitmap[ (j>>5) ] |=  ( 1U << (j & 31U) );
 		p->objfree++;
 	}
 
 	if (netmap_verbose)
 		nm_prinf("%s free %u", p->name, p->objfree);
 	if (p->objfree == 0) {
 		if (netmap_verbose)
 			nm_prerr("%s: no objects available", p->name);
 		return ENOMEM;
 	}
 
 	return 0;
 }
 
 static int
 netmap_mem_init_bitmaps(struct netmap_mem_d *nmd)
 {
 	int i, error = 0;
 
 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
 		struct netmap_obj_pool *p = &nmd->pools[i];
 
 		error = netmap_init_obj_allocator_bitmap(p);
 		if (error)
 			return error;
 	}
 
 	/*
 	 * buffers 0 and 1 are reserved
 	 */
 	if (nmd->pools[NETMAP_BUF_POOL].objfree < 2) {
 		nm_prerr("%s: not enough buffers", nmd->pools[NETMAP_BUF_POOL].name);
 		return ENOMEM;
 	}
 
 	nmd->pools[NETMAP_BUF_POOL].objfree -= 2;
 	if (nmd->pools[NETMAP_BUF_POOL].bitmap) {
 		/* XXX This check is a workaround that prevents a
 		 * NULL pointer crash which currently happens only
 		 * with ptnetmap guests.
 		 * Removed shared-info --> is the bug still there? */
 		nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3U;
 	}
 	return 0;
 }
 
 int
 netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
 {
 	int last_user = 0;
 	NMA_LOCK(nmd);
 	if (na->active_fds <= 0)
 		netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na);
 	if (nmd->active == 1) {
 		last_user = 1;
 		/*
 		 * Reset the allocator when it falls out of use so that any
 		 * pool resources leaked by unclean application exits are
 		 * reclaimed.
 		 */
 		netmap_mem_init_bitmaps(nmd);
 	}
 	nmd->ops->nmd_deref(nmd);
 
 	nmd->active--;
 	if (last_user) {
 		nmd->nm_grp = -1;
 		nmd->lasterr = 0;
 	}
 
 	NMA_UNLOCK(nmd);
 	return last_user;
 }
 
 
 /* accessor functions */
 static int
 netmap_mem2_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
 {
 	lut->lut = nmd->pools[NETMAP_BUF_POOL].lut;
 #ifdef __FreeBSD__
 	lut->plut = lut->lut;
 #endif
 	lut->objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
 	lut->objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
 
 	return 0;
 }
 
 static struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = {
 	[NETMAP_IF_POOL] = {
 		.size = 1024,
 		.num  = 2,
 	},
 	[NETMAP_RING_POOL] = {
 		.size = 5*PAGE_SIZE,
 		.num  = 4,
 	},
 	[NETMAP_BUF_POOL] = {
 		.size = 2048,
 		.num  = 4098,
 	},
 };
 
 
 /*
  * nm_mem is the memory allocator used for all physical interfaces
  * running in netmap mode.
  * Virtual (VALE) ports will have each its own allocator.
  */
 extern struct netmap_mem_ops netmap_mem_global_ops; /* forward */
 struct netmap_mem_d nm_mem = {	/* Our memory allocator. */
 	.pools = {
 		[NETMAP_IF_POOL] = {
 			.name 	= "netmap_if",
 			.objminsize = sizeof(struct netmap_if),
 			.objmaxsize = 4096,
 			.nummin     = 10,	/* don't be stingy */
 			.nummax	    = 10000,	/* XXX very large */
 		},
 		[NETMAP_RING_POOL] = {
 			.name 	= "netmap_ring",
 			.objminsize = sizeof(struct netmap_ring),
 			.objmaxsize = 32*PAGE_SIZE,
 			.nummin     = 2,
 			.nummax	    = 1024,
 		},
 		[NETMAP_BUF_POOL] = {
 			.name	= "netmap_buf",
 			.objminsize = 64,
 			.objmaxsize = 65536,
 			.nummin     = 4,
 			.nummax	    = 1000000, /* one million! */
 		},
 	},
 
 	.params = {
 		[NETMAP_IF_POOL] = {
 			.size = 1024,
 			.num  = 100,
 		},
 		[NETMAP_RING_POOL] = {
 			.size = 9*PAGE_SIZE,
 			.num  = 200,
 		},
 		[NETMAP_BUF_POOL] = {
 			.size = 2048,
 			.num  = NETMAP_BUF_MAX_NUM,
 		},
 	},
 
 	.nm_id = 1,
 	.nm_grp = -1,
 
 	.prev = &nm_mem,
 	.next = &nm_mem,
 
 	.ops = &netmap_mem_global_ops,
 
 	.name = "1"
 };
 
 
 /* blueprint for the private memory allocators */
 /* XXX clang is not happy about using name as a print format */
 static const struct netmap_mem_d nm_blueprint = {
 	.pools = {
 		[NETMAP_IF_POOL] = {
 			.name 	= "%s_if",
 			.objminsize = sizeof(struct netmap_if),
 			.objmaxsize = 4096,
 			.nummin     = 1,
 			.nummax	    = 100,
 		},
 		[NETMAP_RING_POOL] = {
 			.name 	= "%s_ring",
 			.objminsize = sizeof(struct netmap_ring),
 			.objmaxsize = 32*PAGE_SIZE,
 			.nummin     = 2,
 			.nummax	    = 1024,
 		},
 		[NETMAP_BUF_POOL] = {
 			.name	= "%s_buf",
 			.objminsize = 64,
 			.objmaxsize = 65536,
 			.nummin     = 4,
 			.nummax	    = 1000000, /* one million! */
 		},
 	},
 
 	.nm_grp = -1,
 
 	.flags = NETMAP_MEM_PRIVATE,
 
 	.ops = &netmap_mem_global_ops,
 };
 
 /* memory allocator related sysctls */
 
 #define STRINGIFY(x) #x
 
 
 #define DECLARE_SYSCTLS(id, name) \
 	SYSBEGIN(mem2_ ## name); \
 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \
 	    CTLFLAG_RW, &nm_mem.params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \
 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \
 	    CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \
 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \
 	    CTLFLAG_RW, &nm_mem.params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \
 	    CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s"); \
 	SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_size, \
 	    CTLFLAG_RW, &netmap_min_priv_params[id].size, 0, \
 	    "Default size of private netmap " STRINGIFY(name) "s"); \
 	SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_num, \
 	    CTLFLAG_RW, &netmap_min_priv_params[id].num, 0, \
 	    "Default number of private netmap " STRINGIFY(name) "s");	\
 	SYSEND
 
 SYSCTL_DECL(_dev_netmap);
 DECLARE_SYSCTLS(NETMAP_IF_POOL, if);
 DECLARE_SYSCTLS(NETMAP_RING_POOL, ring);
 DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf);
 
 /* call with nm_mem_list_lock held */
 static int
 nm_mem_assign_id_locked(struct netmap_mem_d *nmd)
 {
 	nm_memid_t id;
 	struct netmap_mem_d *scan = netmap_last_mem_d;
 	int error = ENOMEM;
 
 	do {
 		/* we rely on unsigned wrap around */
 		id = scan->nm_id + 1;
 		if (id == 0) /* reserve 0 as error value */
 			id = 1;
 		scan = scan->next;
 		if (id != scan->nm_id) {
 			nmd->nm_id = id;
 			nmd->prev = scan->prev;
 			nmd->next = scan;
 			scan->prev->next = nmd;
 			scan->prev = nmd;
 			netmap_last_mem_d = nmd;
 			nmd->refcount = 1;
 			NM_DBG_REFC(nmd, __FUNCTION__, __LINE__);
 			error = 0;
 			break;
 		}
 	} while (scan != netmap_last_mem_d);
 
 	return error;
 }
 
 /* call with nm_mem_list_lock *not* held */
 static int
 nm_mem_assign_id(struct netmap_mem_d *nmd)
 {
 	int ret;
 
 	NM_MTX_LOCK(nm_mem_list_lock);
 	ret = nm_mem_assign_id_locked(nmd);
 	NM_MTX_UNLOCK(nm_mem_list_lock);
 
 	return ret;
 }
 
 /* call with nm_mem_list_lock held */
 static void
 nm_mem_release_id(struct netmap_mem_d *nmd)
 {
 	nmd->prev->next = nmd->next;
 	nmd->next->prev = nmd->prev;
 
 	if (netmap_last_mem_d == nmd)
 		netmap_last_mem_d = nmd->prev;
 
 	nmd->prev = nmd->next = NULL;
 }
 
 struct netmap_mem_d *
 netmap_mem_find(nm_memid_t id)
 {
 	struct netmap_mem_d *nmd;
 
 	NM_MTX_LOCK(nm_mem_list_lock);
 	nmd = netmap_last_mem_d;
 	do {
 		if (!(nmd->flags & NETMAP_MEM_HIDDEN) && nmd->nm_id == id) {
 			nmd->refcount++;
 			NM_DBG_REFC(nmd, __FUNCTION__, __LINE__);
 			NM_MTX_UNLOCK(nm_mem_list_lock);
 			return nmd;
 		}
 		nmd = nmd->next;
 	} while (nmd != netmap_last_mem_d);
 	NM_MTX_UNLOCK(nm_mem_list_lock);
 	return NULL;
 }
 
 static int
 nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev)
 {
 	int err = 0, id;
 	id = nm_iommu_group_id(dev);
 	if (netmap_debug & NM_DEBUG_MEM)
 		nm_prinf("iommu_group %d", id);
 
 	NMA_LOCK(nmd);
 
 	if (nmd->nm_grp < 0)
 		nmd->nm_grp = id;
 
 	if (nmd->nm_grp != id) {
 		if (netmap_verbose)
 			nm_prerr("iommu group mismatch: %u vs %u",
 					nmd->nm_grp, id);
 		nmd->lasterr = err = ENOMEM;
 	}
 
 	NMA_UNLOCK(nmd);
 	return err;
 }
 
 static struct lut_entry *
 nm_alloc_lut(u_int nobj)
 {
 	size_t n = sizeof(struct lut_entry) * nobj;
 	struct lut_entry *lut;
 #ifdef linux
 	lut = vmalloc(n);
 #else
 	lut = nm_os_malloc(n);
 #endif
 	return lut;
 }
 
 static void
 nm_free_lut(struct lut_entry *lut, u_int objtotal)
 {
 	bzero(lut, sizeof(struct lut_entry) * objtotal);
 #ifdef linux
 	vfree(lut);
 #else
 	nm_os_free(lut);
 #endif
 }
 
 #if defined(linux) || defined(_WIN32)
 static struct plut_entry *
 nm_alloc_plut(u_int nobj)
 {
 	size_t n = sizeof(struct plut_entry) * nobj;
 	struct plut_entry *lut;
 	lut = vmalloc(n);
 	return lut;
 }
 
 static void
 nm_free_plut(struct plut_entry * lut)
 {
 	vfree(lut);
 }
 #endif /* linux or _WIN32 */
 
 
 /*
  * First, find the allocator that contains the requested offset,
  * then locate the cluster through a lookup table.
  */
 static vm_paddr_t
 netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
 {
 	int i;
 	vm_ooffset_t o = offset;
 	vm_paddr_t pa;
 	struct netmap_obj_pool *p;
 
 	p = nmd->pools;
 
 	for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i].memtotal, i++) {
 		if (offset >= p[i].memtotal)
 			continue;
 		// now lookup the cluster's address
 #ifndef _WIN32
 		pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr) +
 			offset % p[i]._objsize;
 #else
 		pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr);
 		pa.QuadPart += offset % p[i]._objsize;
 #endif
 		return pa;
 	}
 	/* this is only in case of errors */
 	nm_prerr("invalid ofs 0x%x out of 0x%zx 0x%zx 0x%zx", (u_int)o,
 		p[NETMAP_IF_POOL].memtotal,
 		p[NETMAP_IF_POOL].memtotal
 			+ p[NETMAP_RING_POOL].memtotal,
 		p[NETMAP_IF_POOL].memtotal
 			+ p[NETMAP_RING_POOL].memtotal
 			+ p[NETMAP_BUF_POOL].memtotal);
 #ifndef _WIN32
 	return 0; /* bad address */
 #else
 	vm_paddr_t res;
 	res.QuadPart = 0;
 	return res;
 #endif
 }
 
 #ifdef _WIN32
 
 /*
  * win32_build_virtual_memory_for_userspace
  *
  * This function get all the object making part of the pools and maps
  * a contiguous virtual memory space for the userspace
  * It works this way
  * 1 - allocate a Memory Descriptor List wide as the sum
  *		of the memory needed for the pools
  * 2 - cycle all the objects in every pool and for every object do
  *
  *		2a - cycle all the objects in every pool, get the list
  *				of the physical address descriptors
  *		2b - calculate the offset in the array of pages desciptor in the
  *				main MDL
  *		2c - copy the descriptors of the object in the main MDL
  *
  * 3 - return the resulting MDL that needs to be mapped in userland
  *
  * In this way we will have an MDL that describes all the memory for the
  * objects in a single object
 */
 
 PMDL
 win32_build_user_vm_map(struct netmap_mem_d* nmd)
 {
 	u_int memflags, ofs = 0;
 	PMDL mainMdl, tempMdl;
 	uint64_t memsize;
 	int i, j;
 
 	if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) {
 		nm_prerr("memory not finalised yet");
 		return NULL;
 	}
 
 	mainMdl = IoAllocateMdl(NULL, memsize, FALSE, FALSE, NULL);
 	if (mainMdl == NULL) {
 		nm_prerr("failed to allocate mdl");
 		return NULL;
 	}
 
 	NMA_LOCK(nmd);
 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
 		struct netmap_obj_pool *p = &nmd->pools[i];
 		int clsz = p->_clustsize;
 		int clobjs = p->_clustentries; /* objects per cluster */
 		int mdl_len = sizeof(PFN_NUMBER) * BYTES_TO_PAGES(clsz);
 		PPFN_NUMBER pSrc, pDst;
 
 		/* each pool has a different cluster size so we need to reallocate */
 		tempMdl = IoAllocateMdl(p->lut[0].vaddr, clsz, FALSE, FALSE, NULL);
 		if (tempMdl == NULL) {
 			NMA_UNLOCK(nmd);
 			nm_prerr("fail to allocate tempMdl");
 			IoFreeMdl(mainMdl);
 			return NULL;
 		}
 		pSrc = MmGetMdlPfnArray(tempMdl);
 		/* create one entry per cluster, the lut[] has one entry per object */
 		for (j = 0; j < p->numclusters; j++, ofs += clsz) {
 			pDst = &MmGetMdlPfnArray(mainMdl)[BYTES_TO_PAGES(ofs)];
 			MmInitializeMdl(tempMdl, p->lut[j*clobjs].vaddr, clsz);
 			MmBuildMdlForNonPagedPool(tempMdl); /* compute physical page addresses */
 			RtlCopyMemory(pDst, pSrc, mdl_len); /* copy the page descriptors */
 			mainMdl->MdlFlags = tempMdl->MdlFlags; /* XXX what is in here ? */
 		}
 		IoFreeMdl(tempMdl);
 	}
 	NMA_UNLOCK(nmd);
 	return mainMdl;
 }
 
 #endif /* _WIN32 */
 
 /*
  * helper function for OS-specific mmap routines (currently only windows).
  * Given an nmd and a pool index, returns the cluster size and number of clusters.
  * Returns 0 if memory is finalised and the pool is valid, otherwise 1.
  * It should be called under NMA_LOCK(nmd) otherwise the underlying info can change.
  */
 
 int
 netmap_mem2_get_pool_info(struct netmap_mem_d* nmd, u_int pool, u_int *clustsize, u_int *numclusters)
 {
 	if (!nmd || !clustsize || !numclusters || pool >= NETMAP_POOLS_NR)
 		return 1; /* invalid arguments */
 	// NMA_LOCK_ASSERT(nmd);
 	if (!(nmd->flags & NETMAP_MEM_FINALIZED)) {
 		*clustsize = *numclusters = 0;
 		return 1; /* not ready yet */
 	}
 	*clustsize = nmd->pools[pool]._clustsize;
 	*numclusters = nmd->pools[pool].numclusters;
 	return 0; /* success */
 }
 
 static int
 netmap_mem2_get_info(struct netmap_mem_d* nmd, uint64_t* size,
 			u_int *memflags, nm_memid_t *id)
 {
 	int error = 0;
 	error = netmap_mem_config(nmd);
 	if (error)
 		goto out;
 	if (size) {
 		if (nmd->flags & NETMAP_MEM_FINALIZED) {
 			*size = nmd->nm_totalsize;
 		} else {
 			int i;
 			*size = 0;
 			for (i = 0; i < NETMAP_POOLS_NR; i++) {
 				struct netmap_obj_pool *p = nmd->pools + i;
 				*size += ((size_t)p->_numclusters * (size_t)p->_clustsize);
 			}
 		}
 	}
 	if (memflags)
 		*memflags = nmd->flags;
 	if (id)
 		*id = nmd->nm_id;
 out:
 	return error;
 }
 
 /*
  * we store objects by kernel address, need to find the offset
  * within the pool to export the value to userspace.
  * Algorithm: scan until we find the cluster, then add the
  * actual offset in the cluster
  */
 static ssize_t
 netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr)
 {
 	int i, k = p->_clustentries, n = p->objtotal;
 	ssize_t ofs = 0;
 
 	for (i = 0; i < n; i += k, ofs += p->_clustsize) {
 		const char *base = p->lut[i].vaddr;
 		ssize_t relofs = (const char *) vaddr - base;
 
 		if (relofs < 0 || relofs >= p->_clustsize)
 			continue;
 
 		ofs = ofs + relofs;
 		nm_prdis("%s: return offset %d (cluster %d) for pointer %p",
 		    p->name, ofs, i, vaddr);
 		return ofs;
 	}
 	nm_prerr("address %p is not contained inside any cluster (%s)",
 	    vaddr, p->name);
 	return 0; /* An error occurred */
 }
 
 /* Helper functions which convert virtual addresses to offsets */
 #define netmap_if_offset(n, v)					\
 	netmap_obj_offset(&(n)->pools[NETMAP_IF_POOL], (v))
 
 #define netmap_ring_offset(n, v)				\
     ((n)->pools[NETMAP_IF_POOL].memtotal + 			\
 	netmap_obj_offset(&(n)->pools[NETMAP_RING_POOL], (v)))
 
 static ssize_t
 netmap_mem2_if_offset(struct netmap_mem_d *nmd, const void *addr)
 {
 	return netmap_if_offset(nmd, addr);
 }
 
 /*
  * report the index, and use start position as a hint,
  * otherwise buffer allocation becomes terribly expensive.
  */
 static void *
 netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_t *index)
 {
 	uint32_t i = 0;			/* index in the bitmap */
 	uint32_t mask, j = 0;		/* slot counter */
 	void *vaddr = NULL;
 
 	if (len > p->_objsize) {
 		nm_prerr("%s request size %d too large", p->name, len);
 		return NULL;
 	}
 
 	if (p->objfree == 0) {
 		nm_prerr("no more %s objects", p->name);
 		return NULL;
 	}
 	if (start)
 		i = *start;
 
 	/* termination is guaranteed by p->free, but better check bounds on i */
 	while (vaddr == NULL && i < p->bitmap_slots)  {
 		uint32_t cur = p->bitmap[i];
 		if (cur == 0) { /* bitmask is fully used */
 			i++;
 			continue;
 		}
 		/* locate a slot */
 		for (j = 0, mask = 1; (cur & mask) == 0; j++, mask <<= 1)
 			;
 
 		p->bitmap[i] &= ~mask; /* mark object as in use */
 		p->objfree--;
 
 		vaddr = p->lut[i * 32 + j].vaddr;
 		if (index)
 			*index = i * 32 + j;
 	}
 	nm_prdis("%s allocator: allocated object @ [%d][%d]: vaddr %p",p->name, i, j, vaddr);
 
 	if (start)
 		*start = i;
 	return vaddr;
 }
 
 
 /*
  * free by index, not by address.
  * XXX should we also cleanup the content ?
  */
 static int
 netmap_obj_free(struct netmap_obj_pool *p, uint32_t j)
 {
 	uint32_t *ptr, mask;
 
 	if (j >= p->objtotal) {
 		nm_prerr("invalid index %u, max %u", j, p->objtotal);
 		return 1;
 	}
 	ptr = &p->bitmap[j / 32];
 	mask = (1 << (j % 32));
 	if (*ptr & mask) {
 		nm_prerr("ouch, double free on buffer %d", j);
 		return 1;
 	} else {
 		*ptr |= mask;
 		p->objfree++;
 		return 0;
 	}
 }
 
 /*
  * free by address. This is slow but is only used for a few
  * objects (rings, nifp)
  */
 static void
 netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr)
 {
 	u_int i, j, n = p->numclusters;
 
 	for (i = 0, j = 0; i < n; i++, j += p->_clustentries) {
 		void *base = p->lut[i * p->_clustentries].vaddr;
 		ssize_t relofs = (ssize_t) vaddr - (ssize_t) base;
 
 		/* Given address, is out of the scope of the current cluster.*/
 		if (base == NULL || vaddr < base || relofs >= p->_clustsize)
 			continue;
 
 		j = j + relofs / p->_objsize;
 		/* KASSERT(j != 0, ("Cannot free object 0")); */
 		netmap_obj_free(p, j);
 		return;
 	}
 	nm_prerr("address %p is not contained inside any cluster (%s)",
 	    vaddr, p->name);
 }
 
 unsigned
 netmap_mem_bufsize(struct netmap_mem_d *nmd)
 {
 	return nmd->pools[NETMAP_BUF_POOL]._objsize;
 }
 
 #define netmap_if_malloc(n, len)	netmap_obj_malloc(&(n)->pools[NETMAP_IF_POOL], len, NULL, NULL)
 #define netmap_if_free(n, v)		netmap_obj_free_va(&(n)->pools[NETMAP_IF_POOL], (v))
 #define netmap_ring_malloc(n, len)	netmap_obj_malloc(&(n)->pools[NETMAP_RING_POOL], len, NULL, NULL)
 #define netmap_ring_free(n, v)		netmap_obj_free_va(&(n)->pools[NETMAP_RING_POOL], (v))
 #define netmap_buf_malloc(n, _pos, _index)			\
 	netmap_obj_malloc(&(n)->pools[NETMAP_BUF_POOL], netmap_mem_bufsize(n), _pos, _index)
 
 
 #if 0 /* currently unused */
 /* Return the index associated to the given packet buffer */
 #define netmap_buf_index(n, v)						\
     (netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v)) / NETMAP_BDG_BUF_SIZE(n))
 #endif
 
 /*
  * allocate extra buffers in a linked list.
  * returns the actual number.
  */
 uint32_t
 netmap_extra_alloc(struct netmap_adapter *na, uint32_t *head, uint32_t n)
 {
 	struct netmap_mem_d *nmd = na->nm_mem;
 	uint32_t i, pos = 0; /* opaque, scan position in the bitmap */
 
 	NMA_LOCK(nmd);
 
 	*head = 0;	/* default, 'null' index ie empty list */
 	for (i = 0 ; i < n; i++) {
 		uint32_t cur = *head;	/* save current head */
 		uint32_t *p = netmap_buf_malloc(nmd, &pos, head);
 		if (p == NULL) {
 			nm_prerr("no more buffers after %d of %d", i, n);
 			*head = cur; /* restore */
 			break;
 		}
 		nm_prdis(5, "allocate buffer %d -> %d", *head, cur);
 		*p = cur; /* link to previous head */
 	}
 
 	NMA_UNLOCK(nmd);
 
 	return i;
 }
 
 static void
 netmap_extra_free(struct netmap_adapter *na, uint32_t head)
 {
 	struct lut_entry *lut = na->na_lut.lut;
 	struct netmap_mem_d *nmd = na->nm_mem;
 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
 	uint32_t i, cur, *buf;
 
 	nm_prdis("freeing the extra list");
 	for (i = 0; head >=2 && head < p->objtotal; i++) {
 		cur = head;
 		buf = lut[head].vaddr;
 		head = *buf;
 		*buf = 0;
 		if (netmap_obj_free(p, cur))
 			break;
 	}
 	if (head != 0)
 		nm_prerr("breaking with head %d", head);
 	if (netmap_debug & NM_DEBUG_MEM)
 		nm_prinf("freed %d buffers", i);
 }
 
 
 /* Return nonzero on error */
 static int
 netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
 {
 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
 	u_int i = 0;	/* slot counter */
 	uint32_t pos = 0;	/* slot in p->bitmap */
 	uint32_t index = 0;	/* buffer index */
 
 	for (i = 0; i < n; i++) {
 		void *vaddr = netmap_buf_malloc(nmd, &pos, &index);
 		if (vaddr == NULL) {
 			nm_prerr("no more buffers after %d of %d", i, n);
 			goto cleanup;
 		}
 		slot[i].buf_idx = index;
 		slot[i].len = p->_objsize;
 		slot[i].flags = 0;
 		slot[i].ptr = 0;
 	}
 
 	nm_prdis("%s: allocated %d buffers, %d available, first at %d", p->name, n, p->objfree, pos);
 	return (0);
 
 cleanup:
 	while (i > 0) {
 		i--;
 		netmap_obj_free(p, slot[i].buf_idx);
 	}
 	bzero(slot, n * sizeof(slot[0]));
 	return (ENOMEM);
 }
 
 static void
 netmap_mem_set_ring(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n, uint32_t index)
 {
 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
 	u_int i;
 
 	for (i = 0; i < n; i++) {
 		slot[i].buf_idx = index;
 		slot[i].len = p->_objsize;
 		slot[i].flags = 0;
 	}
 }
 
 
 static void
 netmap_free_buf(struct netmap_mem_d *nmd, uint32_t i)
 {
 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
 
 	if (i < 2 || i >= p->objtotal) {
 		nm_prerr("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal);
 		return;
 	}
 	netmap_obj_free(p, i);
 }
 
 
 static void
 netmap_free_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
 {
 	u_int i;
 
 	for (i = 0; i < n; i++) {
 		if (slot[i].buf_idx > 1)
 			netmap_free_buf(nmd, slot[i].buf_idx);
 	}
 	nm_prdis("%s: released some buffers, available: %u",
 			p->name, p->objfree);
 }
 
 static void
 netmap_reset_obj_allocator(struct netmap_obj_pool *p)
 {
 
 	if (p == NULL)
 		return;
 	if (p->bitmap)
 		nm_os_free(p->bitmap);
 	p->bitmap = NULL;
 	if (p->invalid_bitmap)
 		nm_os_free(p->invalid_bitmap);
 	p->invalid_bitmap = NULL;
 	if (!p->alloc_done) {
 		/* allocation was done by somebody else.
 		 * Let them clean up after themselves.
 		 */
 		return;
 	}
 	if (p->lut) {
 		u_int i;
 
 		/*
 		 * Free each cluster allocated in
 		 * netmap_finalize_obj_allocator().  The cluster start
 		 * addresses are stored at multiples of p->_clusterentries
 		 * in the lut.
 		 */
 		for (i = 0; i < p->objtotal; i += p->_clustentries) {
 			contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP);
 		}
 		nm_free_lut(p->lut, p->objtotal);
 	}
 	p->lut = NULL;
 	p->objtotal = 0;
 	p->memtotal = 0;
 	p->numclusters = 0;
 	p->objfree = 0;
 	p->alloc_done = 0;
 }
 
 /*
  * Free all resources related to an allocator.
  */
 static void
 netmap_destroy_obj_allocator(struct netmap_obj_pool *p)
 {
 	if (p == NULL)
 		return;
 	netmap_reset_obj_allocator(p);
 }
 
 /*
  * We receive a request for objtotal objects, of size objsize each.
  * Internally we may round up both numbers, as we allocate objects
  * in small clusters multiple of the page size.
  * We need to keep track of objtotal and clustentries,
  * as they are needed when freeing memory.
  *
  * XXX note -- userspace needs the buffers to be contiguous,
  *	so we cannot afford gaps at the end of a cluster.
  */
 
 
 /* call with NMA_LOCK held */
 static int
 netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int objsize)
 {
 	int i;
 	u_int clustsize;	/* the cluster size, multiple of page size */
 	u_int clustentries;	/* how many objects per entry */
 
 	/* we store the current request, so we can
 	 * detect configuration changes later */
 	p->r_objtotal = objtotal;
 	p->r_objsize = objsize;
 
 #define MAX_CLUSTSIZE	(1<<22)		// 4 MB
 #define LINE_ROUND	NM_CACHE_ALIGN	// 64
 	if (objsize >= MAX_CLUSTSIZE) {
 		/* we could do it but there is no point */
 		nm_prerr("unsupported allocation for %d bytes", objsize);
 		return EINVAL;
 	}
 	/* make sure objsize is a multiple of LINE_ROUND */
 	i = (objsize & (LINE_ROUND - 1));
 	if (i) {
 		nm_prinf("aligning object by %d bytes", LINE_ROUND - i);
 		objsize += LINE_ROUND - i;
 	}
 	if (objsize < p->objminsize || objsize > p->objmaxsize) {
 		nm_prerr("requested objsize %d out of range [%d, %d]",
 			objsize, p->objminsize, p->objmaxsize);
 		return EINVAL;
 	}
 	if (objtotal < p->nummin || objtotal > p->nummax) {
 		nm_prerr("requested objtotal %d out of range [%d, %d]",
 			objtotal, p->nummin, p->nummax);
 		return EINVAL;
 	}
 	/*
 	 * Compute number of objects using a brute-force approach:
 	 * given a max cluster size,
 	 * we try to fill it with objects keeping track of the
 	 * wasted space to the next page boundary.
 	 */
 	for (clustentries = 0, i = 1;; i++) {
 		u_int delta, used = i * objsize;
 		if (used > MAX_CLUSTSIZE)
 			break;
 		delta = used % PAGE_SIZE;
 		if (delta == 0) { // exact solution
 			clustentries = i;
 			break;
 		}
 	}
 	/* exact solution not found */
 	if (clustentries == 0) {
 		nm_prerr("unsupported allocation for %d bytes", objsize);
 		return EINVAL;
 	}
 	/* compute clustsize */
 	clustsize = clustentries * objsize;
 	if (netmap_debug & NM_DEBUG_MEM)
 		nm_prinf("objsize %d clustsize %d objects %d",
 			objsize, clustsize, clustentries);
 
 	/*
 	 * The number of clusters is n = ceil(objtotal/clustentries)
 	 * objtotal' = n * clustentries
 	 */
 	p->_clustentries = clustentries;
 	p->_clustsize = clustsize;
 	p->_numclusters = (objtotal + clustentries - 1) / clustentries;
 
 	/* actual values (may be larger than requested) */
 	p->_objsize = objsize;
 	p->_objtotal = p->_numclusters * clustentries;
 
 	return 0;
 }
 
 /* call with NMA_LOCK held */
 static int
 netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
 {
 	int i; /* must be signed */
 	size_t n;
 
 	if (p->lut) {
 		/* if the lut is already there we assume that also all the
 		 * clusters have already been allocated, possibily by somebody
 		 * else (e.g., extmem). In the latter case, the alloc_done flag
 		 * will remain at zero, so that we will not attempt to
 		 * deallocate the clusters by ourselves in
 		 * netmap_reset_obj_allocator.
 		 */
 		return 0;
 	}
 
 	/* optimistically assume we have enough memory */
 	p->numclusters = p->_numclusters;
 	p->objtotal = p->_objtotal;
 	p->alloc_done = 1;
 
 	p->lut = nm_alloc_lut(p->objtotal);
 	if (p->lut == NULL) {
 		nm_prerr("Unable to create lookup table for '%s'", p->name);
 		goto clean;
 	}
 
 	/*
 	 * Allocate clusters, init pointers
 	 */
 
 	n = p->_clustsize;
 	for (i = 0; i < (int)p->objtotal;) {
 		int lim = i + p->_clustentries;
 		char *clust;
 
 		/*
 		 * XXX Note, we only need contigmalloc() for buffers attached
 		 * to native interfaces. In all other cases (nifp, netmap rings
 		 * and even buffers for VALE ports or emulated interfaces) we
 		 * can live with standard malloc, because the hardware will not
 		 * access the pages directly.
 		 */
 		clust = contigmalloc(n, M_NETMAP, M_NOWAIT | M_ZERO,
 		    (size_t)0, -1UL, PAGE_SIZE, 0);
 		if (clust == NULL) {
 			/*
 			 * If we get here, there is a severe memory shortage,
 			 * so halve the allocated memory to reclaim some.
 			 */
 			nm_prerr("Unable to create cluster at %d for '%s' allocator",
 			    i, p->name);
 			if (i < 2) /* nothing to halve */
 				goto out;
 			lim = i / 2;
 			for (i--; i >= lim; i--) {
 				if (i % p->_clustentries == 0 && p->lut[i].vaddr)
 					contigfree(p->lut[i].vaddr,
 						n, M_NETMAP);
 				p->lut[i].vaddr = NULL;
 			}
 		out:
 			p->objtotal = i;
 			/* we may have stopped in the middle of a cluster */
 			p->numclusters = (i + p->_clustentries - 1) / p->_clustentries;
 			break;
 		}
 		/*
 		 * Set lut state for all buffers in the current cluster.
 		 *
 		 * [i, lim) is the set of buffer indexes that cover the
 		 * current cluster.
 		 *
 		 * 'clust' is really the address of the current buffer in
 		 * the current cluster as we index through it with a stride
 		 * of p->_objsize.
 		 */
 		for (; i < lim; i++, clust += p->_objsize) {
 			p->lut[i].vaddr = clust;
 #if !defined(linux) && !defined(_WIN32)
 			p->lut[i].paddr = vtophys(clust);
 #endif
 		}
 	}
 	p->memtotal = (size_t)p->numclusters * (size_t)p->_clustsize;
 	if (netmap_verbose)
 		nm_prinf("Pre-allocated %d clusters (%d/%zuKB) for '%s'",
 		    p->numclusters, p->_clustsize >> 10,
 		    p->memtotal >> 10, p->name);
 
 	return 0;
 
 clean:
 	netmap_reset_obj_allocator(p);
 	return ENOMEM;
 }
 
 /* call with lock held */
 static int
 netmap_mem_params_changed(struct netmap_obj_params* p)
 {
 	int i, rv = 0;
 
 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
 		if (p[i].last_size != p[i].size || p[i].last_num != p[i].num) {
 			p[i].last_size = p[i].size;
 			p[i].last_num = p[i].num;
 			rv = 1;
 		}
 	}
 	return rv;
 }
 
 static void
 netmap_mem_reset_all(struct netmap_mem_d *nmd)
 {
 	int i;
 
 	if (netmap_debug & NM_DEBUG_MEM)
 		nm_prinf("resetting %p", nmd);
 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
 		netmap_reset_obj_allocator(&nmd->pools[i]);
 	}
 	nmd->flags  &= ~NETMAP_MEM_FINALIZED;
 }
 
 static int
 netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
 {
 	int i, lim = p->objtotal;
-	struct netmap_lut *lut = &na->na_lut;
+	struct netmap_lut *lut;
 
 	if (na == NULL || na->pdev == NULL)
 		return 0;
 
+	lut = &na->na_lut;
 #if defined(__FreeBSD__)
 	/* On FreeBSD mapping and unmapping is performed by the txsync
 	 * and rxsync routine, packet by packet. */
 	(void)i;
 	(void)lim;
 	(void)lut;
 #elif defined(_WIN32)
 	(void)i;
 	(void)lim;
 	(void)lut;
 	nm_prerr("unsupported on Windows");
 #else /* linux */
 	nm_prdis("unmapping and freeing plut for %s", na->name);
 	if (lut->plut == NULL)
 		return 0;
 	for (i = 0; i < lim; i += p->_clustentries) {
 		if (lut->plut[i].paddr)
 			netmap_unload_map(na, (bus_dma_tag_t) na->pdev, &lut->plut[i].paddr, p->_clustsize);
 	}
 	nm_free_plut(lut->plut);
 	lut->plut = NULL;
 #endif /* linux */
 
 	return 0;
 }
 
 static int
 netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na)
 {
 	int error = 0;
 	int i, lim = p->objtotal;
 	struct netmap_lut *lut = &na->na_lut;
 
 	if (na->pdev == NULL)
 		return 0;
 
 #if defined(__FreeBSD__)
 	/* On FreeBSD mapping and unmapping is performed by the txsync
 	 * and rxsync routine, packet by packet. */
 	(void)i;
 	(void)lim;
 	(void)lut;
 #elif defined(_WIN32)
 	(void)i;
 	(void)lim;
 	(void)lut;
 	nm_prerr("unsupported on Windows");
 #else /* linux */
 
 	if (lut->plut != NULL) {
 		nm_prdis("plut already allocated for %s", na->name);
 		return 0;
 	}
 
 	nm_prdis("allocating physical lut for %s", na->name);
 	lut->plut = nm_alloc_plut(lim);
 	if (lut->plut == NULL) {
 		nm_prerr("Failed to allocate physical lut for %s", na->name);
 		return ENOMEM;
 	}
 
 	for (i = 0; i < lim; i += p->_clustentries) {
 		lut->plut[i].paddr = 0;
 	}
 
 	for (i = 0; i < lim; i += p->_clustentries) {
 		int j;
 
 		if (p->lut[i].vaddr == NULL)
 			continue;
 
 		error = netmap_load_map(na, (bus_dma_tag_t) na->pdev, &lut->plut[i].paddr,
 				p->lut[i].vaddr, p->_clustsize);
 		if (error) {
 			nm_prerr("Failed to map cluster #%d from the %s pool", i, p->name);
 			break;
 		}
 
 		for (j = 1; j < p->_clustentries; j++) {
 			lut->plut[i + j].paddr = lut->plut[i + j - 1].paddr + p->_objsize;
 		}
 	}
 
 	if (error)
 		netmap_mem_unmap(p, na);
 
 #endif /* linux */
 
 	return error;
 }
 
 static int
 netmap_mem_finalize_all(struct netmap_mem_d *nmd)
 {
 	int i;
 	if (nmd->flags & NETMAP_MEM_FINALIZED)
 		return 0;
 	nmd->lasterr = 0;
 	nmd->nm_totalsize = 0;
 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
 		nmd->lasterr = netmap_finalize_obj_allocator(&nmd->pools[i]);
 		if (nmd->lasterr)
 			goto error;
 		nmd->nm_totalsize += nmd->pools[i].memtotal;
 	}
 	nmd->lasterr = netmap_mem_init_bitmaps(nmd);
 	if (nmd->lasterr)
 		goto error;
 
 	nmd->flags |= NETMAP_MEM_FINALIZED;
 
 	if (netmap_verbose)
 		nm_prinf("interfaces %zd KB, rings %zd KB, buffers %zd MB",
 		    nmd->pools[NETMAP_IF_POOL].memtotal >> 10,
 		    nmd->pools[NETMAP_RING_POOL].memtotal >> 10,
 		    nmd->pools[NETMAP_BUF_POOL].memtotal >> 20);
 
 	if (netmap_verbose)
 		nm_prinf("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree);
 
 
 	return 0;
 error:
 	netmap_mem_reset_all(nmd);
 	return nmd->lasterr;
 }
 
 /*
  * allocator for private memory
  */
 static void *
 _netmap_mem_private_new(size_t size, struct netmap_obj_params *p,
 		struct netmap_mem_ops *ops, int *perr)
 {
 	struct netmap_mem_d *d = NULL;
 	int i, err = 0;
 
 	d = nm_os_malloc(size);
 	if (d == NULL) {
 		err = ENOMEM;
 		goto error;
 	}
 
 	*d = nm_blueprint;
 	d->ops = ops;
 
 	err = nm_mem_assign_id(d);
 	if (err)
 		goto error_free;
 	snprintf(d->name, NM_MEM_NAMESZ, "%d", d->nm_id);
 
 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
 		snprintf(d->pools[i].name, NETMAP_POOL_MAX_NAMSZ,
 				nm_blueprint.pools[i].name,
 				d->name);
 		d->params[i].num = p[i].num;
 		d->params[i].size = p[i].size;
 	}
 
 	NMA_LOCK_INIT(d);
 
 	err = netmap_mem_config(d);
 	if (err)
 		goto error_rel_id;
 
 	d->flags &= ~NETMAP_MEM_FINALIZED;
 
 	return d;
 
 error_rel_id:
 	NMA_LOCK_DESTROY(d);
 	nm_mem_release_id(d);
 error_free:
 	nm_os_free(d);
 error:
 	if (perr)
 		*perr = err;
 	return NULL;
 }
 
 struct netmap_mem_d *
 netmap_mem_private_new(u_int txr, u_int txd, u_int rxr, u_int rxd,
 		u_int extra_bufs, u_int npipes, int *perr)
 {
 	struct netmap_mem_d *d = NULL;
 	struct netmap_obj_params p[NETMAP_POOLS_NR];
 	int i;
 	u_int v, maxd;
 	/* account for the fake host rings */
 	txr++;
 	rxr++;
 
 	/* copy the min values */
 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
 		p[i] = netmap_min_priv_params[i];
 	}
 
 	/* possibly increase them to fit user request */
 	v = sizeof(struct netmap_if) + sizeof(ssize_t) * (txr + rxr);
 	if (p[NETMAP_IF_POOL].size < v)
 		p[NETMAP_IF_POOL].size = v;
 	v = 2 + 4 * npipes;
 	if (p[NETMAP_IF_POOL].num < v)
 		p[NETMAP_IF_POOL].num = v;
 	maxd = (txd > rxd) ? txd : rxd;
 	v = sizeof(struct netmap_ring) + sizeof(struct netmap_slot) * maxd;
 	if (p[NETMAP_RING_POOL].size < v)
 		p[NETMAP_RING_POOL].size = v;
 	/* each pipe endpoint needs two tx rings (1 normal + 1 host, fake)
 	 * and two rx rings (again, 1 normal and 1 fake host)
 	 */
 	v = txr + rxr + 8 * npipes;
 	if (p[NETMAP_RING_POOL].num < v)
 		p[NETMAP_RING_POOL].num = v;
 	/* for each pipe we only need the buffers for the 4 "real" rings.
 	 * On the other end, the pipe ring dimension may be different from
 	 * the parent port ring dimension. As a compromise, we allocate twice the
 	 * space actually needed if the pipe rings were the same size as the parent rings
 	 */
 	v = (4 * npipes + rxr) * rxd + (4 * npipes + txr) * txd + 2 + extra_bufs;
 		/* the +2 is for the tx and rx fake buffers (indices 0 and 1) */
 	if (p[NETMAP_BUF_POOL].num < v)
 		p[NETMAP_BUF_POOL].num = v;
 
 	if (netmap_verbose)
 		nm_prinf("req if %d*%d ring %d*%d buf %d*%d",
 			p[NETMAP_IF_POOL].num,
 			p[NETMAP_IF_POOL].size,
 			p[NETMAP_RING_POOL].num,
 			p[NETMAP_RING_POOL].size,
 			p[NETMAP_BUF_POOL].num,
 			p[NETMAP_BUF_POOL].size);
 
 	d = _netmap_mem_private_new(sizeof(*d), p, &netmap_mem_global_ops, perr);
 
 	return d;
 }
 
 
 /* call with lock held */
 static int
 netmap_mem2_config(struct netmap_mem_d *nmd)
 {
 	int i;
 
 	if (!netmap_mem_params_changed(nmd->params))
 		goto out;
 
 	nm_prdis("reconfiguring");
 
 	if (nmd->flags & NETMAP_MEM_FINALIZED) {
 		/* reset previous allocation */
 		for (i = 0; i < NETMAP_POOLS_NR; i++) {
 			netmap_reset_obj_allocator(&nmd->pools[i]);
 		}
 		nmd->flags &= ~NETMAP_MEM_FINALIZED;
 	}
 
 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
 		nmd->lasterr = netmap_config_obj_allocator(&nmd->pools[i],
 				nmd->params[i].num, nmd->params[i].size);
 		if (nmd->lasterr)
 			goto out;
 	}
 
 out:
 
 	return nmd->lasterr;
 }
 
 static int
 netmap_mem2_finalize(struct netmap_mem_d *nmd)
 {
 	if (nmd->flags & NETMAP_MEM_FINALIZED)
 		goto out;
 
 	if (netmap_mem_finalize_all(nmd))
 		goto out;
 
 	nmd->lasterr = 0;
 
 out:
 	return nmd->lasterr;
 }
 
 static void
 netmap_mem2_delete(struct netmap_mem_d *nmd)
 {
 	int i;
 
 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
 	    netmap_destroy_obj_allocator(&nmd->pools[i]);
 	}
 
 	NMA_LOCK_DESTROY(nmd);
 	if (nmd != &nm_mem)
 		nm_os_free(nmd);
 }
 
 #ifdef WITH_EXTMEM
 /* doubly linekd list of all existing external allocators */
 static struct netmap_mem_ext *netmap_mem_ext_list = NULL;
 NM_MTX_T nm_mem_ext_list_lock;
 #endif /* WITH_EXTMEM */
 
 int
 netmap_mem_init(void)
 {
 	NM_MTX_INIT(nm_mem_list_lock);
 	NMA_LOCK_INIT(&nm_mem);
 	netmap_mem_get(&nm_mem);
 #ifdef WITH_EXTMEM
 	NM_MTX_INIT(nm_mem_ext_list_lock);
 #endif /* WITH_EXTMEM */
 	return (0);
 }
 
 void
 netmap_mem_fini(void)
 {
 	netmap_mem_put(&nm_mem);
 }
 
 static void
 netmap_free_rings(struct netmap_adapter *na)
 {
 	enum txrx t;
 
 	for_rx_tx(t) {
 		u_int i;
 		for (i = 0; i < netmap_all_rings(na, t); i++) {
 			struct netmap_kring *kring = NMR(na, t)[i];
 			struct netmap_ring *ring = kring->ring;
 
 			if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) {
 				if (netmap_debug & NM_DEBUG_MEM)
 					nm_prinf("NOT deleting ring %s (ring %p, users %d neekring %d)",
 						kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING);
 				continue;
 			}
 			if (netmap_debug & NM_DEBUG_MEM)
 				nm_prinf("deleting ring %s", kring->name);
 			if (!(kring->nr_kflags & NKR_FAKERING)) {
 				nm_prdis("freeing bufs for %s", kring->name);
 				netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
 			} else {
 				nm_prdis("NOT freeing bufs for %s", kring->name);
 			}
 			netmap_ring_free(na->nm_mem, ring);
 			kring->ring = NULL;
 		}
 	}
 }
 
 /* call with NMA_LOCK held *
  *
  * Allocate netmap rings and buffers for this card
  * The rings are contiguous, but have variable size.
  * The kring array must follow the layout described
  * in netmap_krings_create().
  */
 static int
 netmap_mem2_rings_create(struct netmap_adapter *na)
 {
 	enum txrx t;
 
 	for_rx_tx(t) {
 		u_int i;
 
 		for (i = 0; i < netmap_all_rings(na, t); i++) {
 			struct netmap_kring *kring = NMR(na, t)[i];
 			struct netmap_ring *ring = kring->ring;
 			u_int len, ndesc;
 
 			if (ring || (!kring->users && !(kring->nr_kflags & NKR_NEEDRING))) {
 				/* uneeded, or already created by somebody else */
 				if (netmap_debug & NM_DEBUG_MEM)
 					nm_prinf("NOT creating ring %s (ring %p, users %d neekring %d)",
 						kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING);
 				continue;
 			}
 			if (netmap_debug & NM_DEBUG_MEM)
 				nm_prinf("creating %s", kring->name);
 			ndesc = kring->nkr_num_slots;
 			len = sizeof(struct netmap_ring) +
 				  ndesc * sizeof(struct netmap_slot);
 			ring = netmap_ring_malloc(na->nm_mem, len);
 			if (ring == NULL) {
 				nm_prerr("Cannot allocate %s_ring", nm_txrx2str(t));
 				goto cleanup;
 			}
 			nm_prdis("txring at %p", ring);
 			kring->ring = ring;
 			*(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
 			*(int64_t *)(uintptr_t)&ring->buf_ofs =
 			    (na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
 				na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
 				netmap_ring_offset(na->nm_mem, ring);
 
 			/* copy values from kring */
 			ring->head = kring->rhead;
 			ring->cur = kring->rcur;
 			ring->tail = kring->rtail;
 			*(uint32_t *)(uintptr_t)&ring->nr_buf_size =
 				netmap_mem_bufsize(na->nm_mem);
 			nm_prdis("%s h %d c %d t %d", kring->name,
 				ring->head, ring->cur, ring->tail);
 			nm_prdis("initializing slots for %s_ring", nm_txrx2str(t));
 			if (!(kring->nr_kflags & NKR_FAKERING)) {
 				/* this is a real ring */
 				if (netmap_debug & NM_DEBUG_MEM)
 					nm_prinf("allocating buffers for %s", kring->name);
 				if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
 					nm_prerr("Cannot allocate buffers for %s_ring", nm_txrx2str(t));
 					goto cleanup;
 				}
 			} else {
 				/* this is a fake ring, set all indices to 0 */
 				if (netmap_debug & NM_DEBUG_MEM)
 					nm_prinf("NOT allocating buffers for %s", kring->name);
 				netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0);
 			}
 		        /* ring info */
 		        *(uint16_t *)(uintptr_t)&ring->ringid = kring->ring_id;
 		        *(uint16_t *)(uintptr_t)&ring->dir = kring->tx;
 		}
 	}
 
 	return 0;
 
 cleanup:
 	/* we cannot actually cleanup here, since we don't own kring->users
 	 * and kring->nr_klags & NKR_NEEDRING. The caller must decrement
 	 * the first or zero-out the second, then call netmap_free_rings()
 	 * to do the cleanup
 	 */
 
 	return ENOMEM;
 }
 
 static void
 netmap_mem2_rings_delete(struct netmap_adapter *na)
 {
 	/* last instance, release bufs and rings */
 	netmap_free_rings(na);
 }
 
 
 /* call with NMA_LOCK held */
 /*
  * Allocate the per-fd structure netmap_if.
  *
  * We assume that the configuration stored in na
  * (number of tx/rx rings and descs) does not change while
  * the interface is in netmap mode.
  */
 static struct netmap_if *
 netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
 {
 	struct netmap_if *nifp;
 	ssize_t base; /* handy for relative offsets between rings and nifp */
 	u_int i, len, n[NR_TXRX], ntot;
 	enum txrx t;
 
 	ntot = 0;
 	for_rx_tx(t) {
 		/* account for the (eventually fake) host rings */
 		n[t] = netmap_all_rings(na, t);
 		ntot += n[t];
 	}
 	/*
 	 * the descriptor is followed inline by an array of offsets
 	 * to the tx and rx rings in the shared memory region.
 	 */
 
 	len = sizeof(struct netmap_if) + (ntot * sizeof(ssize_t));
 	nifp = netmap_if_malloc(na->nm_mem, len);
 	if (nifp == NULL) {
 		NMA_UNLOCK(na->nm_mem);
 		return NULL;
 	}
 
 	/* initialize base fields -- override const */
 	*(u_int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings;
 	*(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings;
 	*(u_int *)(uintptr_t)&nifp->ni_host_tx_rings =
 		(na->num_host_tx_rings ? na->num_host_tx_rings : 1);
 	*(u_int *)(uintptr_t)&nifp->ni_host_rx_rings =
 		(na->num_host_rx_rings ? na->num_host_rx_rings : 1);
 	strlcpy(nifp->ni_name, na->name, sizeof(nifp->ni_name));
 
 	/*
 	 * fill the slots for the rx and tx rings. They contain the offset
 	 * between the ring and nifp, so the information is usable in
 	 * userspace to reach the ring from the nifp.
 	 */
 	base = netmap_if_offset(na->nm_mem, nifp);
 	for (i = 0; i < n[NR_TX]; i++) {
 		/* XXX instead of ofs == 0 maybe use the offset of an error
 		 * ring, like we do for buffers? */
 		ssize_t ofs = 0;
 
 		if (na->tx_rings[i]->ring != NULL && i >= priv->np_qfirst[NR_TX]
 				&& i < priv->np_qlast[NR_TX]) {
 			ofs = netmap_ring_offset(na->nm_mem,
 						 na->tx_rings[i]->ring) - base;
 		}
 		*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = ofs;
 	}
 	for (i = 0; i < n[NR_RX]; i++) {
 		/* XXX instead of ofs == 0 maybe use the offset of an error
 		 * ring, like we do for buffers? */
 		ssize_t ofs = 0;
 
 		if (na->rx_rings[i]->ring != NULL && i >= priv->np_qfirst[NR_RX]
 				&& i < priv->np_qlast[NR_RX]) {
 			ofs = netmap_ring_offset(na->nm_mem,
 						 na->rx_rings[i]->ring) - base;
 		}
 		*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = ofs;
 	}
 
 	return (nifp);
 }
 
 static void
 netmap_mem2_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
 {
 	if (nifp == NULL)
 		/* nothing to do */
 		return;
 	if (nifp->ni_bufs_head)
 		netmap_extra_free(na, nifp->ni_bufs_head);
 	netmap_if_free(na->nm_mem, nifp);
 }
 
 static void
 netmap_mem2_deref(struct netmap_mem_d *nmd)
 {
 
 	if (netmap_debug & NM_DEBUG_MEM)
 		nm_prinf("active = %d", nmd->active);
 
 }
 
 struct netmap_mem_ops netmap_mem_global_ops = {
 	.nmd_get_lut = netmap_mem2_get_lut,
 	.nmd_get_info = netmap_mem2_get_info,
 	.nmd_ofstophys = netmap_mem2_ofstophys,
 	.nmd_config = netmap_mem2_config,
 	.nmd_finalize = netmap_mem2_finalize,
 	.nmd_deref = netmap_mem2_deref,
 	.nmd_delete = netmap_mem2_delete,
 	.nmd_if_offset = netmap_mem2_if_offset,
 	.nmd_if_new = netmap_mem2_if_new,
 	.nmd_if_delete = netmap_mem2_if_delete,
 	.nmd_rings_create = netmap_mem2_rings_create,
 	.nmd_rings_delete = netmap_mem2_rings_delete
 };
 
 int
 netmap_mem_pools_info_get(struct nmreq_pools_info *req,
 				struct netmap_mem_d *nmd)
 {
 	int ret;
 
 	ret = netmap_mem_get_info(nmd, &req->nr_memsize, NULL,
 					&req->nr_mem_id);
 	if (ret) {
 		return ret;
 	}
 
 	NMA_LOCK(nmd);
 	req->nr_if_pool_offset = 0;
 	req->nr_if_pool_objtotal = nmd->pools[NETMAP_IF_POOL].objtotal;
 	req->nr_if_pool_objsize = nmd->pools[NETMAP_IF_POOL]._objsize;
 
 	req->nr_ring_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal;
 	req->nr_ring_pool_objtotal = nmd->pools[NETMAP_RING_POOL].objtotal;
 	req->nr_ring_pool_objsize = nmd->pools[NETMAP_RING_POOL]._objsize;
 
 	req->nr_buf_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal +
 			     nmd->pools[NETMAP_RING_POOL].memtotal;
 	req->nr_buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
 	req->nr_buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
 	NMA_UNLOCK(nmd);
 
 	return 0;
 }
 
 #ifdef WITH_EXTMEM
 struct netmap_mem_ext {
 	struct netmap_mem_d up;
 
 	struct nm_os_extmem *os;
 	struct netmap_mem_ext *next, *prev;
 };
 
 /* call with nm_mem_list_lock held */
 static void
 netmap_mem_ext_register(struct netmap_mem_ext *e)
 {
 	NM_MTX_LOCK(nm_mem_ext_list_lock);
 	if (netmap_mem_ext_list)
 		netmap_mem_ext_list->prev = e;
 	e->next = netmap_mem_ext_list;
 	netmap_mem_ext_list = e;
 	e->prev = NULL;
 	NM_MTX_UNLOCK(nm_mem_ext_list_lock);
 }
 
 /* call with nm_mem_list_lock held */
 static void
 netmap_mem_ext_unregister(struct netmap_mem_ext *e)
 {
 	if (e->prev)
 		e->prev->next = e->next;
 	else
 		netmap_mem_ext_list = e->next;
 	if (e->next)
 		e->next->prev = e->prev;
 	e->prev = e->next = NULL;
 }
 
 static struct netmap_mem_ext *
 netmap_mem_ext_search(struct nm_os_extmem *os)
 {
 	struct netmap_mem_ext *e;
 
 	NM_MTX_LOCK(nm_mem_ext_list_lock);
 	for (e = netmap_mem_ext_list; e; e = e->next) {
 		if (nm_os_extmem_isequal(e->os, os)) {
 			netmap_mem_get(&e->up);
 			break;
 		}
 	}
 	NM_MTX_UNLOCK(nm_mem_ext_list_lock);
 	return e;
 }
 
 
 static void
 netmap_mem_ext_delete(struct netmap_mem_d *d)
 {
 	int i;
 	struct netmap_mem_ext *e =
 		(struct netmap_mem_ext *)d;
 
 	netmap_mem_ext_unregister(e);
 
 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
 		struct netmap_obj_pool *p = &d->pools[i];
 
 		if (p->lut) {
 			nm_free_lut(p->lut, p->objtotal);
 			p->lut = NULL;
 		}
 	}
 	if (e->os)
 		nm_os_extmem_delete(e->os);
 	netmap_mem2_delete(d);
 }
 
 static int
 netmap_mem_ext_config(struct netmap_mem_d *nmd)
 {
 	return 0;
 }
 
 struct netmap_mem_ops netmap_mem_ext_ops = {
 	.nmd_get_lut = netmap_mem2_get_lut,
 	.nmd_get_info = netmap_mem2_get_info,
 	.nmd_ofstophys = netmap_mem2_ofstophys,
 	.nmd_config = netmap_mem_ext_config,
 	.nmd_finalize = netmap_mem2_finalize,
 	.nmd_deref = netmap_mem2_deref,
 	.nmd_delete = netmap_mem_ext_delete,
 	.nmd_if_offset = netmap_mem2_if_offset,
 	.nmd_if_new = netmap_mem2_if_new,
 	.nmd_if_delete = netmap_mem2_if_delete,
 	.nmd_rings_create = netmap_mem2_rings_create,
 	.nmd_rings_delete = netmap_mem2_rings_delete
 };
 
 struct netmap_mem_d *
 netmap_mem_ext_create(uint64_t usrptr, struct nmreq_pools_info *pi, int *perror)
 {
 	int error = 0;
 	int i, j;
 	struct netmap_mem_ext *nme;
 	char *clust;
 	size_t off;
 	struct nm_os_extmem *os = NULL;
 	int nr_pages;
 
 	// XXX sanity checks
 	if (pi->nr_if_pool_objtotal == 0)
 		pi->nr_if_pool_objtotal = netmap_min_priv_params[NETMAP_IF_POOL].num;
 	if (pi->nr_if_pool_objsize == 0)
 		pi->nr_if_pool_objsize = netmap_min_priv_params[NETMAP_IF_POOL].size;
 	if (pi->nr_ring_pool_objtotal == 0)
 		pi->nr_ring_pool_objtotal = netmap_min_priv_params[NETMAP_RING_POOL].num;
 	if (pi->nr_ring_pool_objsize == 0)
 		pi->nr_ring_pool_objsize = netmap_min_priv_params[NETMAP_RING_POOL].size;
 	if (pi->nr_buf_pool_objtotal == 0)
 		pi->nr_buf_pool_objtotal = netmap_min_priv_params[NETMAP_BUF_POOL].num;
 	if (pi->nr_buf_pool_objsize == 0)
 		pi->nr_buf_pool_objsize = netmap_min_priv_params[NETMAP_BUF_POOL].size;
 	if (netmap_verbose & NM_DEBUG_MEM)
 		nm_prinf("if %d %d ring %d %d buf %d %d",
 			pi->nr_if_pool_objtotal, pi->nr_if_pool_objsize,
 			pi->nr_ring_pool_objtotal, pi->nr_ring_pool_objsize,
 			pi->nr_buf_pool_objtotal, pi->nr_buf_pool_objsize);
 
 	os = nm_os_extmem_create(usrptr, pi, &error);
 	if (os == NULL) {
 		nm_prerr("os extmem creation failed");
 		goto out;
 	}
 
 	nme = netmap_mem_ext_search(os);
 	if (nme) {
 		nm_os_extmem_delete(os);
 		return &nme->up;
 	}
 	if (netmap_verbose & NM_DEBUG_MEM)
 		nm_prinf("not found, creating new");
 
 	nme = _netmap_mem_private_new(sizeof(*nme),
 			(struct netmap_obj_params[]){
 				{ pi->nr_if_pool_objsize, pi->nr_if_pool_objtotal },
 				{ pi->nr_ring_pool_objsize, pi->nr_ring_pool_objtotal },
 				{ pi->nr_buf_pool_objsize, pi->nr_buf_pool_objtotal }},
 			&netmap_mem_ext_ops,
 			&error);
 	if (nme == NULL)
 		goto out_unmap;
 
 	nr_pages = nm_os_extmem_nr_pages(os);
 
 	/* from now on pages will be released by nme destructor;
 	 * we let res = 0 to prevent release in out_unmap below
 	 */
 	nme->os = os;
 	os = NULL; /* pass ownership */
 
 	clust = nm_os_extmem_nextpage(nme->os);
 	off = 0;
 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
 		struct netmap_obj_pool *p = &nme->up.pools[i];
 		struct netmap_obj_params *o = &nme->up.params[i];
 
 		p->_objsize = o->size;
 		p->_clustsize = o->size;
 		p->_clustentries = 1;
 
 		p->lut = nm_alloc_lut(o->num);
 		if (p->lut == NULL) {
 			error = ENOMEM;
 			goto out_delete;
 		}
 
 		p->bitmap_slots = (o->num + sizeof(uint32_t) - 1) / sizeof(uint32_t);
 		p->invalid_bitmap = nm_os_malloc(sizeof(uint32_t) * p->bitmap_slots);
 		if (p->invalid_bitmap == NULL) {
 			error = ENOMEM;
 			goto out_delete;
 		}
 
 		if (nr_pages == 0) {
 			p->objtotal = 0;
 			p->memtotal = 0;
 			p->objfree = 0;
 			continue;
 		}
 
 		for (j = 0; j < o->num && nr_pages > 0; j++) {
 			size_t noff;
 
 			p->lut[j].vaddr = clust + off;
 #if !defined(linux) && !defined(_WIN32)
 			p->lut[j].paddr = vtophys(p->lut[j].vaddr);
 #endif
 			nm_prdis("%s %d at %p", p->name, j, p->lut[j].vaddr);
 			noff = off + p->_objsize;
 			if (noff < PAGE_SIZE) {
 				off = noff;
 				continue;
 			}
 			nm_prdis("too big, recomputing offset...");
 			while (noff >= PAGE_SIZE) {
 				char *old_clust = clust;
 				noff -= PAGE_SIZE;
 				clust = nm_os_extmem_nextpage(nme->os);
 				nr_pages--;
 				nm_prdis("noff %zu page %p nr_pages %d", noff,
 						page_to_virt(*pages), nr_pages);
 				if (noff > 0 && !nm_isset(p->invalid_bitmap, j) &&
 					(nr_pages == 0 ||
 					 old_clust + PAGE_SIZE != clust))
 				{
 					/* out of space or non contiguous,
 					 * drop this object
 					 * */
 					p->invalid_bitmap[ (j>>5) ] |= 1U << (j & 31U);
 					nm_prdis("non contiguous at off %zu, drop", noff);
 				}
 				if (nr_pages == 0)
 					break;
 			}
 			off = noff;
 		}
 		p->objtotal = j;
 		p->numclusters = p->objtotal;
 		p->memtotal = j * (size_t)p->_objsize;
 		nm_prdis("%d memtotal %zu", j, p->memtotal);
 	}
 
 	netmap_mem_ext_register(nme);
 
 	return &nme->up;
 
 out_delete:
 	netmap_mem_put(&nme->up);
 out_unmap:
 	if (os)
 		nm_os_extmem_delete(os);
 out:
 	if (perror)
 		*perror = error;
 	return NULL;
 
 }
 #endif /* WITH_EXTMEM */
 
 
 #ifdef WITH_PTNETMAP
 struct mem_pt_if {
 	struct mem_pt_if *next;
 	struct ifnet *ifp;
 	unsigned int nifp_offset;
 };
 
 /* Netmap allocator for ptnetmap guests. */
 struct netmap_mem_ptg {
 	struct netmap_mem_d up;
 
 	vm_paddr_t nm_paddr;            /* physical address in the guest */
 	void *nm_addr;                  /* virtual address in the guest */
 	struct netmap_lut buf_lut;      /* lookup table for BUF pool in the guest */
 	nm_memid_t host_mem_id;         /* allocator identifier in the host */
 	struct ptnetmap_memdev *ptn_dev;/* ptnetmap memdev */
 	struct mem_pt_if *pt_ifs;	/* list of interfaces in passthrough */
 };
 
 /* Link a passthrough interface to a passthrough netmap allocator. */
 static int
 netmap_mem_pt_guest_ifp_add(struct netmap_mem_d *nmd, struct ifnet *ifp,
 			    unsigned int nifp_offset)
 {
 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
 	struct mem_pt_if *ptif = nm_os_malloc(sizeof(*ptif));
 
 	if (!ptif) {
 		return ENOMEM;
 	}
 
 	NMA_LOCK(nmd);
 
 	ptif->ifp = ifp;
 	ptif->nifp_offset = nifp_offset;
 
 	if (ptnmd->pt_ifs) {
 		ptif->next = ptnmd->pt_ifs;
 	}
 	ptnmd->pt_ifs = ptif;
 
 	NMA_UNLOCK(nmd);
 
 	nm_prinf("ifp=%s,nifp_offset=%u",
 		ptif->ifp->if_xname, ptif->nifp_offset);
 
 	return 0;
 }
 
 /* Called with NMA_LOCK(nmd) held. */
 static struct mem_pt_if *
 netmap_mem_pt_guest_ifp_lookup(struct netmap_mem_d *nmd, struct ifnet *ifp)
 {
 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
 	struct mem_pt_if *curr;
 
 	for (curr = ptnmd->pt_ifs; curr; curr = curr->next) {
 		if (curr->ifp == ifp) {
 			return curr;
 		}
 	}
 
 	return NULL;
 }
 
 /* Unlink a passthrough interface from a passthrough netmap allocator. */
 int
 netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *nmd, struct ifnet *ifp)
 {
 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
 	struct mem_pt_if *prev = NULL;
 	struct mem_pt_if *curr;
 	int ret = -1;
 
 	NMA_LOCK(nmd);
 
 	for (curr = ptnmd->pt_ifs; curr; curr = curr->next) {
 		if (curr->ifp == ifp) {
 			if (prev) {
 				prev->next = curr->next;
 			} else {
 				ptnmd->pt_ifs = curr->next;
 			}
 			nm_prinf("removed (ifp=%s,nifp_offset=%u)",
 			  curr->ifp->if_xname, curr->nifp_offset);
 			nm_os_free(curr);
 			ret = 0;
 			break;
 		}
 		prev = curr;
 	}
 
 	NMA_UNLOCK(nmd);
 
 	return ret;
 }
 
 static int
 netmap_mem_pt_guest_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
 {
 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
 
 	if (!(nmd->flags & NETMAP_MEM_FINALIZED)) {
 		return EINVAL;
 	}
 
 	*lut = ptnmd->buf_lut;
 	return 0;
 }
 
 static int
 netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, uint64_t *size,
 			     u_int *memflags, uint16_t *id)
 {
 	int error = 0;
 
 	error = nmd->ops->nmd_config(nmd);
 	if (error)
 		goto out;
 
 	if (size)
 		*size = nmd->nm_totalsize;
 	if (memflags)
 		*memflags = nmd->flags;
 	if (id)
 		*id = nmd->nm_id;
 
 out:
 
 	return error;
 }
 
 static vm_paddr_t
 netmap_mem_pt_guest_ofstophys(struct netmap_mem_d *nmd, vm_ooffset_t off)
 {
 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
 	vm_paddr_t paddr;
 	/* if the offset is valid, just return csb->base_addr + off */
 	paddr = (vm_paddr_t)(ptnmd->nm_paddr + off);
 	nm_prdis("off %lx padr %lx", off, (unsigned long)paddr);
 	return paddr;
 }
 
 static int
 netmap_mem_pt_guest_config(struct netmap_mem_d *nmd)
 {
 	/* nothing to do, we are configured on creation
 	 * and configuration never changes thereafter
 	 */
 	return 0;
 }
 
 static int
 netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd)
 {
 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
 	uint64_t mem_size;
 	uint32_t bufsize;
 	uint32_t nbuffers;
 	uint32_t poolofs;
 	vm_paddr_t paddr;
 	char *vaddr;
 	int i;
 	int error = 0;
 
 	if (nmd->flags & NETMAP_MEM_FINALIZED)
 		goto out;
 
 	if (ptnmd->ptn_dev == NULL) {
 		nm_prerr("ptnetmap memdev not attached");
 		error = ENOMEM;
 		goto out;
 	}
 	/* Map memory through ptnetmap-memdev BAR. */
 	error = nm_os_pt_memdev_iomap(ptnmd->ptn_dev, &ptnmd->nm_paddr,
 				      &ptnmd->nm_addr, &mem_size);
 	if (error)
 		goto out;
 
 	/* Initialize the lut using the information contained in the
 	 * ptnetmap memory device. */
 	bufsize = nm_os_pt_memdev_ioread(ptnmd->ptn_dev,
 					 PTNET_MDEV_IO_BUF_POOL_OBJSZ);
 	nbuffers = nm_os_pt_memdev_ioread(ptnmd->ptn_dev,
 					 PTNET_MDEV_IO_BUF_POOL_OBJNUM);
 
 	/* allocate the lut */
 	if (ptnmd->buf_lut.lut == NULL) {
 		nm_prinf("allocating lut");
 		ptnmd->buf_lut.lut = nm_alloc_lut(nbuffers);
 		if (ptnmd->buf_lut.lut == NULL) {
 			nm_prerr("lut allocation failed");
 			return ENOMEM;
 		}
 	}
 
 	/* we have physically contiguous memory mapped through PCI BAR */
 	poolofs = nm_os_pt_memdev_ioread(ptnmd->ptn_dev,
 					 PTNET_MDEV_IO_BUF_POOL_OFS);
 	vaddr = (char *)(ptnmd->nm_addr) + poolofs;
 	paddr = ptnmd->nm_paddr + poolofs;
 
 	for (i = 0; i < nbuffers; i++) {
 		ptnmd->buf_lut.lut[i].vaddr = vaddr;
 		vaddr += bufsize;
 		paddr += bufsize;
 	}
 
 	ptnmd->buf_lut.objtotal = nbuffers;
 	ptnmd->buf_lut.objsize = bufsize;
 	nmd->nm_totalsize = mem_size;
 
 	/* Initialize these fields as are needed by
 	 * netmap_mem_bufsize().
 	 * XXX please improve this, why do we need this
 	 * replication? maybe we nmd->pools[] should no be
 	 * there for the guest allocator? */
 	nmd->pools[NETMAP_BUF_POOL]._objsize = bufsize;
 	nmd->pools[NETMAP_BUF_POOL]._objtotal = nbuffers;
 
 	nmd->flags |= NETMAP_MEM_FINALIZED;
 out:
 	return error;
 }
 
 static void
 netmap_mem_pt_guest_deref(struct netmap_mem_d *nmd)
 {
 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
 
 	if (nmd->active == 1 &&
 		(nmd->flags & NETMAP_MEM_FINALIZED)) {
 	    nmd->flags  &= ~NETMAP_MEM_FINALIZED;
 	    /* unmap ptnetmap-memdev memory */
 	    if (ptnmd->ptn_dev) {
 		nm_os_pt_memdev_iounmap(ptnmd->ptn_dev);
 	    }
 	    ptnmd->nm_addr = NULL;
 	    ptnmd->nm_paddr = 0;
 	}
 }
 
 static ssize_t
 netmap_mem_pt_guest_if_offset(struct netmap_mem_d *nmd, const void *vaddr)
 {
 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
 
 	return (const char *)(vaddr) - (char *)(ptnmd->nm_addr);
 }
 
 static void
 netmap_mem_pt_guest_delete(struct netmap_mem_d *nmd)
 {
 	if (nmd == NULL)
 		return;
 	if (netmap_verbose)
 		nm_prinf("deleting %p", nmd);
 	if (nmd->active > 0)
 		nm_prerr("bug: deleting mem allocator with active=%d!", nmd->active);
 	if (netmap_verbose)
 		nm_prinf("done deleting %p", nmd);
 	NMA_LOCK_DESTROY(nmd);
 	nm_os_free(nmd);
 }
 
 static struct netmap_if *
 netmap_mem_pt_guest_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
 {
 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
 	struct mem_pt_if *ptif;
 	struct netmap_if *nifp = NULL;
 
 	ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
 	if (ptif == NULL) {
 		nm_prerr("interface %s is not in passthrough", na->name);
 		goto out;
 	}
 
 	nifp = (struct netmap_if *)((char *)(ptnmd->nm_addr) +
 				    ptif->nifp_offset);
 out:
 	return nifp;
 }
 
 static void
 netmap_mem_pt_guest_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
 {
 	struct mem_pt_if *ptif;
 
 	ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
 	if (ptif == NULL) {
 		nm_prerr("interface %s is not in passthrough", na->name);
 	}
 }
 
 static int
 netmap_mem_pt_guest_rings_create(struct netmap_adapter *na)
 {
 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
 	struct mem_pt_if *ptif;
 	struct netmap_if *nifp;
 	int i, error = -1;
 
 	ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
 	if (ptif == NULL) {
 		nm_prerr("interface %s is not in passthrough", na->name);
 		goto out;
 	}
 
 
 	/* point each kring to the corresponding backend ring */
 	nifp = (struct netmap_if *)((char *)ptnmd->nm_addr + ptif->nifp_offset);
 	for (i = 0; i < netmap_all_rings(na, NR_TX); i++) {
 		struct netmap_kring *kring = na->tx_rings[i];
 		if (kring->ring)
 			continue;
 		kring->ring = (struct netmap_ring *)
 			((char *)nifp + nifp->ring_ofs[i]);
 	}
 	for (i = 0; i < netmap_all_rings(na, NR_RX); i++) {
 		struct netmap_kring *kring = na->rx_rings[i];
 		if (kring->ring)
 			continue;
 		kring->ring = (struct netmap_ring *)
 			((char *)nifp +
 			 nifp->ring_ofs[netmap_all_rings(na, NR_TX) + i]);
 	}
 
 	error = 0;
 out:
 	return error;
 }
 
 static void
 netmap_mem_pt_guest_rings_delete(struct netmap_adapter *na)
 {
 #if 0
 	enum txrx t;
 
 	for_rx_tx(t) {
 		u_int i;
 		for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
 			struct netmap_kring *kring = &NMR(na, t)[i];
 
 			kring->ring = NULL;
 		}
 	}
 #endif
 }
 
 static struct netmap_mem_ops netmap_mem_pt_guest_ops = {
 	.nmd_get_lut = netmap_mem_pt_guest_get_lut,
 	.nmd_get_info = netmap_mem_pt_guest_get_info,
 	.nmd_ofstophys = netmap_mem_pt_guest_ofstophys,
 	.nmd_config = netmap_mem_pt_guest_config,
 	.nmd_finalize = netmap_mem_pt_guest_finalize,
 	.nmd_deref = netmap_mem_pt_guest_deref,
 	.nmd_if_offset = netmap_mem_pt_guest_if_offset,
 	.nmd_delete = netmap_mem_pt_guest_delete,
 	.nmd_if_new = netmap_mem_pt_guest_if_new,
 	.nmd_if_delete = netmap_mem_pt_guest_if_delete,
 	.nmd_rings_create = netmap_mem_pt_guest_rings_create,
 	.nmd_rings_delete = netmap_mem_pt_guest_rings_delete
 };
 
 /* Called with nm_mem_list_lock held. */
 static struct netmap_mem_d *
 netmap_mem_pt_guest_find_memid(nm_memid_t mem_id)
 {
 	struct netmap_mem_d *mem = NULL;
 	struct netmap_mem_d *scan = netmap_last_mem_d;
 
 	do {
 		/* find ptnetmap allocator through host ID */
 		if (scan->ops->nmd_deref == netmap_mem_pt_guest_deref &&
 			((struct netmap_mem_ptg *)(scan))->host_mem_id == mem_id) {
 			mem = scan;
 			mem->refcount++;
 			NM_DBG_REFC(mem, __FUNCTION__, __LINE__);
 			break;
 		}
 		scan = scan->next;
 	} while (scan != netmap_last_mem_d);
 
 	return mem;
 }
 
 /* Called with nm_mem_list_lock held. */
 static struct netmap_mem_d *
 netmap_mem_pt_guest_create(nm_memid_t mem_id)
 {
 	struct netmap_mem_ptg *ptnmd;
 	int err = 0;
 
 	ptnmd = nm_os_malloc(sizeof(struct netmap_mem_ptg));
 	if (ptnmd == NULL) {
 		err = ENOMEM;
 		goto error;
 	}
 
 	ptnmd->up.ops = &netmap_mem_pt_guest_ops;
 	ptnmd->host_mem_id = mem_id;
 	ptnmd->pt_ifs = NULL;
 
 	/* Assign new id in the guest (We have the lock) */
 	err = nm_mem_assign_id_locked(&ptnmd->up);
 	if (err)
 		goto error;
 
 	ptnmd->up.flags &= ~NETMAP_MEM_FINALIZED;
 	ptnmd->up.flags |= NETMAP_MEM_IO;
 
 	NMA_LOCK_INIT(&ptnmd->up);
 
 	snprintf(ptnmd->up.name, NM_MEM_NAMESZ, "%d", ptnmd->up.nm_id);
 
 
 	return &ptnmd->up;
 error:
 	netmap_mem_pt_guest_delete(&ptnmd->up);
 	return NULL;
 }
 
 /*
  * find host id in guest allocators and create guest allocator
  * if it is not there
  */
 static struct netmap_mem_d *
 netmap_mem_pt_guest_get(nm_memid_t mem_id)
 {
 	struct netmap_mem_d *nmd;
 
 	NM_MTX_LOCK(nm_mem_list_lock);
 	nmd = netmap_mem_pt_guest_find_memid(mem_id);
 	if (nmd == NULL) {
 		nmd = netmap_mem_pt_guest_create(mem_id);
 	}
 	NM_MTX_UNLOCK(nm_mem_list_lock);
 
 	return nmd;
 }
 
 /*
  * The guest allocator can be created by ptnetmap_memdev (during the device
  * attach) or by ptnetmap device (ptnet), during the netmap_attach.
  *
  * The order is not important (we have different order in LINUX and FreeBSD).
  * The first one, creates the device, and the second one simply attaches it.
  */
 
 /* Called when ptnetmap_memdev is attaching, to attach a new allocator in
  * the guest */
 struct netmap_mem_d *
 netmap_mem_pt_guest_attach(struct ptnetmap_memdev *ptn_dev, nm_memid_t mem_id)
 {
 	struct netmap_mem_d *nmd;
 	struct netmap_mem_ptg *ptnmd;
 
 	nmd = netmap_mem_pt_guest_get(mem_id);
 
 	/* assign this device to the guest allocator */
 	if (nmd) {
 		ptnmd = (struct netmap_mem_ptg *)nmd;
 		ptnmd->ptn_dev = ptn_dev;
 	}
 
 	return nmd;
 }
 
 /* Called when ptnet device is attaching */
 struct netmap_mem_d *
 netmap_mem_pt_guest_new(struct ifnet *ifp,
 			unsigned int nifp_offset,
 			unsigned int memid)
 {
 	struct netmap_mem_d *nmd;
 
 	if (ifp == NULL) {
 		return NULL;
 	}
 
 	nmd = netmap_mem_pt_guest_get((nm_memid_t)memid);
 
 	if (nmd) {
 		netmap_mem_pt_guest_ifp_add(nmd, ifp, nifp_offset);
 	}
 
 	return nmd;
 }
 
 #endif /* WITH_PTNETMAP */
Index: projects/clang1000-import/sys/dev/sound/pci/emu10k1.c
===================================================================
--- projects/clang1000-import/sys/dev/sound/pci/emu10k1.c	(revision 357178)
+++ projects/clang1000-import/sys/dev/sound/pci/emu10k1.c	(revision 357179)
@@ -1,2258 +1,2259 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004 David O'Brien <obrien@FreeBSD.org>
  * Copyright (c) 2003 Orlando Bassotto <orlando.bassotto@ieo-research.it>
  * Copyright (c) 1999 Cameron Grant <cg@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHERIN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifdef HAVE_KERNEL_OPTION_HEADERS
 #include "opt_snd.h"
 #endif
 
 #include <dev/sound/pcm/sound.h>
 #include <dev/sound/pcm/ac97.h>
 #include <dev/sound/pci/emuxkireg.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <sys/queue.h>
 
 #include <dev/sound/midi/mpu401.h>
 #include "mpufoi_if.h"
 
 SND_DECLARE_FILE("$FreeBSD$");
 
 /* -------------------------------------------------------------------- */
 
 #define	NUM_G		64	/* use all channels */
 #define	WAVEOUT_MAXBUFSIZE 32768
 #define	EMUPAGESIZE	4096	/* don't change */
 #define	EMUMAXPAGES	(WAVEOUT_MAXBUFSIZE * NUM_G / EMUPAGESIZE)
 #define	EMU10K1_PCI_ID	0x00021102	/* 1102 => Creative Labs Vendor ID */
 #define	EMU10K2_PCI_ID	0x00041102	
 #define	EMU10K3_PCI_ID	0x00081102	
 #define	EMU_DEFAULT_BUFSZ	4096
 #define EMU_MAX_CHANS	8
 #define	EMU_CHANS	4
 
 #define	MAXREQVOICES	8
 #define	RESERVED	0
 #define	NUM_MIDI	16
 #define	NUM_FXSENDS	4
 
 #define	TMEMSIZE	256*1024
 #define	TMEMSIZEREG	4
 
 #define	ENABLE		0xffffffff
 #define	DISABLE		0x00000000
 #define	ENV_ON		EMU_CHAN_DCYSUSV_CHANNELENABLE_MASK
 #define	ENV_OFF		0x00	/* XXX: should this be 1? */
 
 #define	EMU_A_IOCFG_GPOUT_A	0x40
 #define	EMU_A_IOCFG_GPOUT_D	0x04
 #define	EMU_A_IOCFG_GPOUT_AD (EMU_A_IOCFG_GPOUT_A|EMU_A_IOCFG_GPOUT_D)  /* EMU_A_IOCFG_GPOUT0 */
 
 #define	EMU_HCFG_GPOUT1		0x00000800
 
 /* instruction set */
 #define iACC3	 0x06
 #define iMACINT0 0x04
 #define iINTERP  0x0e
 
 #define C_00000000	0x40
 #define C_00000001	0x41
 #define C_00000004	0x44
 #define C_40000000	0x4d
 /* Audigy constants */
 #define A_C_00000000	0xc0
 #define A_C_40000000	0xcd
 
 /* GPRs */
 #define FXBUS(x)	(0x00 + (x))
 #define EXTIN(x)	(0x10 + (x))
 #define EXTOUT(x)	(0x20 + (x))
 
 #define GPR(x)		(EMU_FXGPREGBASE + (x))
 #define A_EXTIN(x)	(0x40 + (x))
 #define A_FXBUS(x)	(0x00 + (x))
 #define A_EXTOUT(x)	(0x60 + (x))
 #define A_GPR(x)	(EMU_A_FXGPREGBASE + (x))
 
 /* FX buses */
 #define FXBUS_PCM_LEFT		0x00
 #define FXBUS_PCM_RIGHT		0x01
 #define FXBUS_MIDI_LEFT		0x04
 #define FXBUS_MIDI_RIGHT	0x05
 #define FXBUS_MIDI_REVERB	0x0c
 #define FXBUS_MIDI_CHORUS	0x0d
 
 /* Inputs */
 #define EXTIN_AC97_L		0x00
 #define EXTIN_AC97_R		0x01
 #define EXTIN_SPDIF_CD_L	0x02
 #define EXTIN_SPDIF_CD_R	0x03
 #define EXTIN_TOSLINK_L		0x06
 #define EXTIN_TOSLINK_R		0x07
 #define EXTIN_COAX_SPDIF_L	0x0a
 #define EXTIN_COAX_SPDIF_R	0x0b
 /* Audigy Inputs */
 #define A_EXTIN_AC97_L		0x00
 #define A_EXTIN_AC97_R		0x01
 
 /* Outputs */
 #define EXTOUT_AC97_L	   0x00
 #define EXTOUT_AC97_R	   0x01
 #define EXTOUT_TOSLINK_L   0x02
 #define EXTOUT_TOSLINK_R   0x03
 #define EXTOUT_AC97_CENTER 0x04
 #define EXTOUT_AC97_LFE	   0x05
 #define EXTOUT_HEADPHONE_L 0x06
 #define EXTOUT_HEADPHONE_R 0x07
 #define EXTOUT_REAR_L	   0x08
 #define EXTOUT_REAR_R	   0x09
 #define EXTOUT_ADC_CAP_L   0x0a
 #define EXTOUT_ADC_CAP_R   0x0b
 #define EXTOUT_ACENTER	   0x11
 #define EXTOUT_ALFE	   0x12
 /* Audigy Outputs */
 #define A_EXTOUT_FRONT_L	0x00
 #define A_EXTOUT_FRONT_R	0x01
 #define A_EXTOUT_CENTER		0x02
 #define A_EXTOUT_LFE		0x03
 #define A_EXTOUT_HEADPHONE_L	0x04
 #define A_EXTOUT_HEADPHONE_R	0x05
 #define A_EXTOUT_REAR_L		0x06
 #define A_EXTOUT_REAR_R		0x07
 #define A_EXTOUT_AFRONT_L	0x08
 #define A_EXTOUT_AFRONT_R	0x09
 #define A_EXTOUT_ACENTER	0x0a
 #define A_EXTOUT_ALFE		0x0b
 #define A_EXTOUT_AREAR_L	0x0e
 #define A_EXTOUT_AREAR_R	0x0f
 #define A_EXTOUT_AC97_L		0x10
 #define A_EXTOUT_AC97_R		0x11
 #define A_EXTOUT_ADC_CAP_L	0x16
 #define A_EXTOUT_ADC_CAP_R	0x17
 
 struct emu_memblk {
 	SLIST_ENTRY(emu_memblk) link;
 	void *buf;
 	bus_addr_t buf_addr;
 	u_int32_t pte_start, pte_size;
 	bus_dmamap_t buf_map;
 };
 
 struct emu_mem {
 	u_int8_t bmap[EMUMAXPAGES / 8];
 	u_int32_t *ptb_pages;
 	void *silent_page;
 	bus_addr_t silent_page_addr;
 	bus_addr_t ptb_pages_addr;
 	bus_dmamap_t ptb_map;
 	bus_dmamap_t silent_map;
 	SLIST_HEAD(, emu_memblk) blocks;
 };
 
 struct emu_voice {
 	int vnum;
 	unsigned int b16:1, stereo:1, busy:1, running:1, ismaster:1;
 	int speed;
 	int start, end, vol;
 	int fxrt1;	/* FX routing */
 	int fxrt2;	/* FX routing (only for audigy) */
 	u_int32_t buf;
 	struct emu_voice *slave;
 	struct pcm_channel *channel;
 };
 
 struct sc_info;
 
 /* channel registers */
 struct sc_pchinfo {
 	int spd, fmt, blksz, run;
 	struct emu_voice *master, *slave;
 	struct snd_dbuf *buffer;
 	struct pcm_channel *channel;
 	struct sc_info *parent;
 };
 
 struct sc_rchinfo {
 	int spd, fmt, run, blksz, num;
 	u_int32_t idxreg, basereg, sizereg, setupreg, irqmask;
 	struct snd_dbuf *buffer;
 	struct pcm_channel *channel;
 	struct sc_info *parent;
 };
 
 /* device private data */
 struct sc_info {
 	device_t	dev;
 	u_int32_t	type, rev;
 	u_int32_t	tos_link:1, APS:1, audigy:1, audigy2:1;
 	u_int32_t	addrmask;	/* wider if audigy */
 
 	bus_space_tag_t st;
 	bus_space_handle_t sh;
 	bus_dma_tag_t parent_dmat;
 
 	struct resource *reg, *irq;
 	void		*ih;
 	struct mtx	*lock;
 
 	unsigned int bufsz;
 	int timer, timerinterval;
 	int pnum, rnum;
 	int nchans;
 	struct emu_mem mem;
 	struct emu_voice voice[64];
 	struct sc_pchinfo pch[EMU_MAX_CHANS];
 	struct sc_rchinfo rch[3];
 	struct mpu401   *mpu;
 	mpu401_intr_t           *mpu_intr;
 	int mputx;
 };
 
 /* -------------------------------------------------------------------- */
 
 /*
  * prototypes
  */
 
 /* stuff */
 static int emu_init(struct sc_info *);
 static void emu_intr(void *);
 static void *emu_malloc(struct sc_info *sc, u_int32_t sz, bus_addr_t *addr, bus_dmamap_t *map);
 static void *emu_memalloc(struct sc_info *sc, u_int32_t sz, bus_addr_t *addr);
 static int emu_memfree(struct sc_info *sc, void *buf);
 static int emu_memstart(struct sc_info *sc, void *buf);
 #ifdef EMUDEBUG
 static void emu_vdump(struct sc_info *sc, struct emu_voice *v);
 #endif
 
 /* talk to the card */
 static u_int32_t emu_rd(struct sc_info *, int, int);
 static void emu_wr(struct sc_info *, int, u_int32_t, int);
 
 /* -------------------------------------------------------------------- */
 
 static u_int32_t emu_rfmt_ac97[] = {
 	SND_FORMAT(AFMT_S16_LE, 1, 0),
 	SND_FORMAT(AFMT_S16_LE, 2, 0),
 	0
 };
 
 static u_int32_t emu_rfmt_mic[] = {
 	SND_FORMAT(AFMT_U8, 1, 0),
 	0
 };
 
 static u_int32_t emu_rfmt_efx[] = {
 	SND_FORMAT(AFMT_S16_LE, 2, 0),
 	0
 };
 
 static struct pcmchan_caps emu_reccaps[3] = {
 	{8000, 48000, emu_rfmt_ac97, 0},
 	{8000, 8000, emu_rfmt_mic, 0},
 	{48000, 48000, emu_rfmt_efx, 0},
 };
 
 static u_int32_t emu_pfmt[] = {
 	SND_FORMAT(AFMT_U8, 1, 0),
 	SND_FORMAT(AFMT_U8, 2, 0),
 	SND_FORMAT(AFMT_S16_LE, 1, 0),
 	SND_FORMAT(AFMT_S16_LE, 2, 0),
 	0
 };
 
 static struct pcmchan_caps emu_playcaps = {4000, 48000, emu_pfmt, 0};
 
 static int adcspeed[8] = {48000, 44100, 32000, 24000, 22050, 16000, 11025, 8000};
 /* audigy supports 12kHz. */
 static int audigy_adcspeed[9] = {
 	48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000
 };
 
 /* -------------------------------------------------------------------- */
 /* Hardware */
 static u_int32_t
 emu_rd(struct sc_info *sc, int regno, int size)
 {
 	switch (size) {
 	case 1:
 		return bus_space_read_1(sc->st, sc->sh, regno);
 	case 2:
 		return bus_space_read_2(sc->st, sc->sh, regno);
 	case 4:
 		return bus_space_read_4(sc->st, sc->sh, regno);
 	default:
 		return 0xffffffff;
 	}
 }
 
 static void
 emu_wr(struct sc_info *sc, int regno, u_int32_t data, int size)
 {
 	switch (size) {
 	case 1:
 		bus_space_write_1(sc->st, sc->sh, regno, data);
 		break;
 	case 2:
 		bus_space_write_2(sc->st, sc->sh, regno, data);
 		break;
 	case 4:
 		bus_space_write_4(sc->st, sc->sh, regno, data);
 		break;
 	}
 }
 
 static u_int32_t
 emu_rdptr(struct sc_info *sc, int chn, int reg)
 {
 	u_int32_t ptr, val, mask, size, offset;
 
 	ptr = ((reg << 16) & sc->addrmask) | (chn & EMU_PTR_CHNO_MASK);
 	emu_wr(sc, EMU_PTR, ptr, 4);
 	val = emu_rd(sc, EMU_DATA, 4);
 	if (reg & 0xff000000) {
 		size = (reg >> 24) & 0x3f;
 		offset = (reg >> 16) & 0x1f;
 		mask = ((1 << size) - 1) << offset;
 		val &= mask;
 		val >>= offset;
 	}
 	return val;
 }
 
 static void
 emu_wrptr(struct sc_info *sc, int chn, int reg, u_int32_t data)
 {
 	u_int32_t ptr, mask, size, offset;
 
 	ptr = ((reg << 16) & sc->addrmask) | (chn & EMU_PTR_CHNO_MASK);
 	emu_wr(sc, EMU_PTR, ptr, 4);
 	if (reg & 0xff000000) {
 		size = (reg >> 24) & 0x3f;
 		offset = (reg >> 16) & 0x1f;
 		mask = ((1 << size) - 1) << offset;
 		data <<= offset;
 		data &= mask;
 		data |= emu_rd(sc, EMU_DATA, 4) & ~mask;
 	}
 	emu_wr(sc, EMU_DATA, data, 4);
 }
 
 static void
 emu_wrefx(struct sc_info *sc, unsigned int pc, unsigned int data)
 {
 	pc += sc->audigy ? EMU_A_MICROCODEBASE : EMU_MICROCODEBASE;
 	emu_wrptr(sc, 0, pc, data);
 }
 
 /* -------------------------------------------------------------------- */
 /* ac97 codec */
 /* no locking needed */
 
 static int
 emu_rdcd(kobj_t obj, void *devinfo, int regno)
 {
 	struct sc_info *sc = (struct sc_info *)devinfo;
 
 	emu_wr(sc, EMU_AC97ADDR, regno, 1);
 	return emu_rd(sc, EMU_AC97DATA, 2);
 }
 
 static int
 emu_wrcd(kobj_t obj, void *devinfo, int regno, u_int32_t data)
 {
 	struct sc_info *sc = (struct sc_info *)devinfo;
 
 	emu_wr(sc, EMU_AC97ADDR, regno, 1);
 	emu_wr(sc, EMU_AC97DATA, data, 2);
 	return 0;
 }
 
 static kobj_method_t emu_ac97_methods[] = {
 	KOBJMETHOD(ac97_read,		emu_rdcd),
 	KOBJMETHOD(ac97_write,		emu_wrcd),
 	KOBJMETHOD_END
 };
 AC97_DECLARE(emu_ac97);
 
 /* -------------------------------------------------------------------- */
 /* stuff */
 static int
 emu_settimer(struct sc_info *sc)
 {
 	struct sc_pchinfo *pch;
 	struct sc_rchinfo *rch;
 	int i, tmp, rate;
 
 	rate = 0;
 	for (i = 0; i < sc->nchans; i++) {
 		pch = &sc->pch[i];
 		if (pch->buffer) {
 			tmp = (pch->spd * sndbuf_getalign(pch->buffer))
 			    / pch->blksz;
 			if (tmp > rate)
 				rate = tmp;
 		}
 	}
 
 	for (i = 0; i < 3; i++) {
 		rch = &sc->rch[i];
 		if (rch->buffer) {
 			tmp = (rch->spd * sndbuf_getalign(rch->buffer))
 			    / rch->blksz;
 			if (tmp > rate)
 				rate = tmp;
 		}
 	}
 	RANGE(rate, 48, 9600);
 	sc->timerinterval = 48000 / rate;
 	emu_wr(sc, EMU_TIMER, sc->timerinterval & 0x03ff, 2);
 
 	return sc->timerinterval;
 }
 
 static int
 emu_enatimer(struct sc_info *sc, int go)
 {
 	u_int32_t x;
 	if (go) {
 		if (sc->timer++ == 0) {
 			x = emu_rd(sc, EMU_INTE, 4);
 			x |= EMU_INTE_INTERTIMERENB;
 			emu_wr(sc, EMU_INTE, x, 4);
 		}
 	} else {
 		sc->timer = 0;
 		x = emu_rd(sc, EMU_INTE, 4);
 		x &= ~EMU_INTE_INTERTIMERENB;
 		emu_wr(sc, EMU_INTE, x, 4);
 	}
 	return 0;
 }
 
 static void
 emu_enastop(struct sc_info *sc, char channel, int enable)
 {
 	int reg = (channel & 0x20) ? EMU_SOLEH : EMU_SOLEL;
 	channel &= 0x1f;
 	reg |= 1 << 24;
 	reg |= channel << 16;
 	emu_wrptr(sc, 0, reg, enable);
 }
 
 static int
 emu_recval(int speed) {
 	int val;
 
 	val = 0;
 	while (val < 7 && speed < adcspeed[val])
 		val++;
 	return val;
 }
 
 static int
 audigy_recval(int speed) {
 	int val;
 
 	val = 0;
 	while (val < 8 && speed < audigy_adcspeed[val])
 		val++;
 	return val;
 }
 
 static u_int32_t
 emu_rate_to_pitch(u_int32_t rate)
 {
 	static u_int32_t logMagTable[128] = {
 		0x00000, 0x02dfc, 0x05b9e, 0x088e6, 0x0b5d6, 0x0e26f, 0x10eb3, 0x13aa2,
 		0x1663f, 0x1918a, 0x1bc84, 0x1e72e, 0x2118b, 0x23b9a, 0x2655d, 0x28ed5,
 		0x2b803, 0x2e0e8, 0x30985, 0x331db, 0x359eb, 0x381b6, 0x3a93d, 0x3d081,
 		0x3f782, 0x41e42, 0x444c1, 0x46b01, 0x49101, 0x4b6c4, 0x4dc49, 0x50191,
 		0x5269e, 0x54b6f, 0x57006, 0x59463, 0x5b888, 0x5dc74, 0x60029, 0x623a7,
 		0x646ee, 0x66a00, 0x68cdd, 0x6af86, 0x6d1fa, 0x6f43c, 0x7164b, 0x73829,
 		0x759d4, 0x77b4f, 0x79c9a, 0x7bdb5, 0x7dea1, 0x7ff5e, 0x81fed, 0x8404e,
 		0x86082, 0x88089, 0x8a064, 0x8c014, 0x8df98, 0x8fef1, 0x91e20, 0x93d26,
 		0x95c01, 0x97ab4, 0x9993e, 0x9b79f, 0x9d5d9, 0x9f3ec, 0xa11d8, 0xa2f9d,
 		0xa4d3c, 0xa6ab5, 0xa8808, 0xaa537, 0xac241, 0xadf26, 0xafbe7, 0xb1885,
 		0xb3500, 0xb5157, 0xb6d8c, 0xb899f, 0xba58f, 0xbc15e, 0xbdd0c, 0xbf899,
 		0xc1404, 0xc2f50, 0xc4a7b, 0xc6587, 0xc8073, 0xc9b3f, 0xcb5ed, 0xcd07c,
 		0xceaec, 0xd053f, 0xd1f73, 0xd398a, 0xd5384, 0xd6d60, 0xd8720, 0xda0c3,
 		0xdba4a, 0xdd3b4, 0xded03, 0xe0636, 0xe1f4e, 0xe384a, 0xe512c, 0xe69f3,
 		0xe829f, 0xe9b31, 0xeb3a9, 0xecc08, 0xee44c, 0xefc78, 0xf148a, 0xf2c83,
 		0xf4463, 0xf5c2a, 0xf73da, 0xf8b71, 0xfa2f0, 0xfba57, 0xfd1a7, 0xfe8df
 	};
 	static char logSlopeTable[128] = {
 		0x5c, 0x5c, 0x5b, 0x5a, 0x5a, 0x59, 0x58, 0x58,
 		0x57, 0x56, 0x56, 0x55, 0x55, 0x54, 0x53, 0x53,
 		0x52, 0x52, 0x51, 0x51, 0x50, 0x50, 0x4f, 0x4f,
 		0x4e, 0x4d, 0x4d, 0x4d, 0x4c, 0x4c, 0x4b, 0x4b,
 		0x4a, 0x4a, 0x49, 0x49, 0x48, 0x48, 0x47, 0x47,
 		0x47, 0x46, 0x46, 0x45, 0x45, 0x45, 0x44, 0x44,
 		0x43, 0x43, 0x43, 0x42, 0x42, 0x42, 0x41, 0x41,
 		0x41, 0x40, 0x40, 0x40, 0x3f, 0x3f, 0x3f, 0x3e,
 		0x3e, 0x3e, 0x3d, 0x3d, 0x3d, 0x3c, 0x3c, 0x3c,
 		0x3b, 0x3b, 0x3b, 0x3b, 0x3a, 0x3a, 0x3a, 0x39,
 		0x39, 0x39, 0x39, 0x38, 0x38, 0x38, 0x38, 0x37,
 		0x37, 0x37, 0x37, 0x36, 0x36, 0x36, 0x36, 0x35,
 		0x35, 0x35, 0x35, 0x34, 0x34, 0x34, 0x34, 0x34,
 		0x33, 0x33, 0x33, 0x33, 0x32, 0x32, 0x32, 0x32,
 		0x32, 0x31, 0x31, 0x31, 0x31, 0x31, 0x30, 0x30,
 		0x30, 0x30, 0x30, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f
 	};
 	int i;
 
 	if (rate == 0)
 		return 0;	/* Bail out if no leading "1" */
 	rate *= 11185;	/* Scale 48000 to 0x20002380 */
 	for (i = 31; i > 0; i--) {
 		if (rate & 0x80000000) {	/* Detect leading "1" */
 			return (((u_int32_t) (i - 15) << 20) +
 			    logMagTable[0x7f & (rate >> 24)] +
 			    (0x7f & (rate >> 17)) *
 			    logSlopeTable[0x7f & (rate >> 24)]);
 		}
 		rate <<= 1;
 	}
 
 	return 0;		/* Should never reach this point */
 }
 
 static u_int32_t
 emu_rate_to_linearpitch(u_int32_t rate)
 {
 	rate = (rate << 8) / 375;
 	return (rate >> 1) + (rate & 1);
 }
 
 static struct emu_voice *
 emu_valloc(struct sc_info *sc)
 {
 	struct emu_voice *v;
 	int i;
 
 	v = NULL;
 	for (i = 0; i < 64 && sc->voice[i].busy; i++);
 	if (i < 64) {
 		v = &sc->voice[i];
 		v->busy = 1;
 	}
 	return v;
 }
 
 static int
 emu_vinit(struct sc_info *sc, struct emu_voice *m, struct emu_voice *s,
 	  u_int32_t sz, struct snd_dbuf *b)
 {
 	void *buf;
 	bus_addr_t tmp_addr;
 
 	buf = emu_memalloc(sc, sz, &tmp_addr);
 	if (buf == NULL)
 		return -1;
 	if (b != NULL)
 		sndbuf_setup(b, buf, sz);
 	m->start = emu_memstart(sc, buf) * EMUPAGESIZE;
 	m->end = m->start + sz;
 	m->channel = NULL;
 	m->speed = 0;
 	m->b16 = 0;
 	m->stereo = 0;
 	m->running = 0;
 	m->ismaster = 1;
 	m->vol = 0xff;
 	m->buf = tmp_addr;
 	m->slave = s;
 	if (sc->audigy) {
 		m->fxrt1 = FXBUS_MIDI_CHORUS | FXBUS_PCM_RIGHT << 8 |
 		    FXBUS_PCM_LEFT << 16 | FXBUS_MIDI_REVERB << 24;
 		m->fxrt2 = 0x3f3f3f3f;	/* No effects on second route */
 	} else {
 		m->fxrt1 = FXBUS_MIDI_CHORUS | FXBUS_PCM_RIGHT << 4 |
 		    FXBUS_PCM_LEFT << 8 | FXBUS_MIDI_REVERB << 12;
 		m->fxrt2 = 0;
 	}
 
 	if (s != NULL) {
 		s->start = m->start;
 		s->end = m->end;
 		s->channel = NULL;
 		s->speed = 0;
 		s->b16 = 0;
 		s->stereo = 0;
 		s->running = 0;
 		s->ismaster = 0;
 		s->vol = m->vol;
 		s->buf = m->buf;
 		s->fxrt1 = m->fxrt1;
 		s->fxrt2 = m->fxrt2;
 		s->slave = NULL;
 	}
 	return 0;
 }
 
 static void
 emu_vsetup(struct sc_pchinfo *ch)
 {
 	struct emu_voice *v = ch->master;
 
 	if (ch->fmt) {
 		v->b16 = (ch->fmt & AFMT_16BIT) ? 1 : 0;
 		v->stereo = (AFMT_CHANNEL(ch->fmt) > 1) ? 1 : 0;
 		if (v->slave != NULL) {
 			v->slave->b16 = v->b16;
 			v->slave->stereo = v->stereo;
 		}
 	}
 	if (ch->spd) {
 		v->speed = ch->spd;
 		if (v->slave != NULL)
 			v->slave->speed = v->speed;
 	}
 }
 
 static void
 emu_vwrite(struct sc_info *sc, struct emu_voice *v)
 {
 	int s;
 	int l, r, x, y;
 	u_int32_t sa, ea, start, val, silent_page;
 
 	s = (v->stereo ? 1 : 0) + (v->b16 ? 1 : 0);
 
 	sa = v->start >> s;
 	ea = v->end >> s;
 
 	l = r = x = y = v->vol;
 	if (v->stereo) {
 		l = v->ismaster ? l : 0;
 		r = v->ismaster ? 0 : r;
 	}
 
 	emu_wrptr(sc, v->vnum, EMU_CHAN_CPF, v->stereo ? EMU_CHAN_CPF_STEREO_MASK : 0);
 	val = v->stereo ? 28 : 30;
 	val *= v->b16 ? 1 : 2;
 	start = sa + val;
 
 	if (sc->audigy) {
 		emu_wrptr(sc, v->vnum, EMU_A_CHAN_FXRT1, v->fxrt1);
 		emu_wrptr(sc, v->vnum, EMU_A_CHAN_FXRT2, v->fxrt2);
 		emu_wrptr(sc, v->vnum, EMU_A_CHAN_SENDAMOUNTS, 0);
 	}
 	else
 		emu_wrptr(sc, v->vnum, EMU_CHAN_FXRT, v->fxrt1 << 16);
 
 	emu_wrptr(sc, v->vnum, EMU_CHAN_PTRX, (x << 8) | r);
 	emu_wrptr(sc, v->vnum, EMU_CHAN_DSL, ea | (y << 24));
 	emu_wrptr(sc, v->vnum, EMU_CHAN_PSST, sa | (l << 24));
 	emu_wrptr(sc, v->vnum, EMU_CHAN_CCCA, start | (v->b16 ? 0 : EMU_CHAN_CCCA_8BITSELECT));
 
 	emu_wrptr(sc, v->vnum, EMU_CHAN_Z1, 0);
 	emu_wrptr(sc, v->vnum, EMU_CHAN_Z2, 0);
 
 	silent_page = ((u_int32_t)(sc->mem.silent_page_addr) << 1)
 	    | EMU_CHAN_MAP_PTI_MASK;
 	emu_wrptr(sc, v->vnum, EMU_CHAN_MAPA, silent_page);
 	emu_wrptr(sc, v->vnum, EMU_CHAN_MAPB, silent_page);
 
 	emu_wrptr(sc, v->vnum, EMU_CHAN_CVCF, EMU_CHAN_CVCF_CURRFILTER_MASK);
 	emu_wrptr(sc, v->vnum, EMU_CHAN_VTFT, EMU_CHAN_VTFT_FILTERTARGET_MASK);
 	emu_wrptr(sc, v->vnum, EMU_CHAN_ATKHLDM, 0);
 	emu_wrptr(sc, v->vnum, EMU_CHAN_DCYSUSM, EMU_CHAN_DCYSUSM_DECAYTIME_MASK);
 	emu_wrptr(sc, v->vnum, EMU_CHAN_LFOVAL1, 0x8000);
 	emu_wrptr(sc, v->vnum, EMU_CHAN_LFOVAL2, 0x8000);
 	emu_wrptr(sc, v->vnum, EMU_CHAN_FMMOD, 0);
 	emu_wrptr(sc, v->vnum, EMU_CHAN_TREMFRQ, 0);
 	emu_wrptr(sc, v->vnum, EMU_CHAN_FM2FRQ2, 0);
 	emu_wrptr(sc, v->vnum, EMU_CHAN_ENVVAL, 0x8000);
 
 	emu_wrptr(sc, v->vnum, EMU_CHAN_ATKHLDV,
 	    EMU_CHAN_ATKHLDV_HOLDTIME_MASK | EMU_CHAN_ATKHLDV_ATTACKTIME_MASK);
 	emu_wrptr(sc, v->vnum, EMU_CHAN_ENVVOL, 0x8000);
 
 	emu_wrptr(sc, v->vnum, EMU_CHAN_PEFE_FILTERAMOUNT, 0x7f);
 	emu_wrptr(sc, v->vnum, EMU_CHAN_PEFE_PITCHAMOUNT, 0);
 
 	if (v->slave != NULL)
 		emu_vwrite(sc, v->slave);
 }
 
 static void
 emu_vtrigger(struct sc_info *sc, struct emu_voice *v, int go)
 {
 	u_int32_t pitch_target, initial_pitch;
 	u_int32_t cra, cs, ccis;
 	u_int32_t sample, i;
 
 	if (go) {
 		cra = 64;
 		cs = v->stereo ? 4 : 2;
 		ccis = v->stereo ? 28 : 30;
 		ccis *= v->b16 ? 1 : 2;
 		sample = v->b16 ? 0x00000000 : 0x80808080;
 
 		for (i = 0; i < cs; i++)
 			emu_wrptr(sc, v->vnum, EMU_CHAN_CD0 + i, sample);
 		emu_wrptr(sc, v->vnum, EMU_CHAN_CCR_CACHEINVALIDSIZE, 0);
 		emu_wrptr(sc, v->vnum, EMU_CHAN_CCR_READADDRESS, cra);
 		emu_wrptr(sc, v->vnum, EMU_CHAN_CCR_CACHEINVALIDSIZE, ccis);
 
 		emu_wrptr(sc, v->vnum, EMU_CHAN_IFATN, 0xff00);
 		emu_wrptr(sc, v->vnum, EMU_CHAN_VTFT, 0xffffffff);
 		emu_wrptr(sc, v->vnum, EMU_CHAN_CVCF, 0xffffffff);
 		emu_wrptr(sc, v->vnum, EMU_CHAN_DCYSUSV, 0x00007f7f);
 		emu_enastop(sc, v->vnum, 0);
 
 		pitch_target = emu_rate_to_linearpitch(v->speed);
 		initial_pitch = emu_rate_to_pitch(v->speed) >> 8;
 		emu_wrptr(sc, v->vnum, EMU_CHAN_PTRX_PITCHTARGET, pitch_target);
 		emu_wrptr(sc, v->vnum, EMU_CHAN_CPF_PITCH, pitch_target);
 		emu_wrptr(sc, v->vnum, EMU_CHAN_IP, initial_pitch);
 	} else {
 		emu_wrptr(sc, v->vnum, EMU_CHAN_PTRX_PITCHTARGET, 0);
 		emu_wrptr(sc, v->vnum, EMU_CHAN_CPF_PITCH, 0);
 		emu_wrptr(sc, v->vnum, EMU_CHAN_IFATN, 0xffff);
 		emu_wrptr(sc, v->vnum, EMU_CHAN_VTFT, 0x0000ffff);
 		emu_wrptr(sc, v->vnum, EMU_CHAN_CVCF, 0x0000ffff);
 		emu_wrptr(sc, v->vnum, EMU_CHAN_IP, 0);
 		emu_enastop(sc, v->vnum, 1);
 	}
 	if (v->slave != NULL)
 		emu_vtrigger(sc, v->slave, go);
 }
 
 static int
 emu_vpos(struct sc_info *sc, struct emu_voice *v)
 {
 	int s, ptr;
 
 	s = (v->b16 ? 1 : 0) + (v->stereo ? 1 : 0);
 	ptr = (emu_rdptr(sc, v->vnum, EMU_CHAN_CCCA_CURRADDR) - (v->start >> s)) << s;
 	return ptr & ~0x0000001f;
 }
 
 #ifdef EMUDEBUG
 static void
 emu_vdump(struct sc_info *sc, struct emu_voice *v)
 {
 	char *regname[] = {
 		"cpf", "ptrx", "cvcf", "vtft", "z2", "z1", "psst", "dsl",
 		"ccca", "ccr", "clp", "fxrt", "mapa", "mapb", NULL, NULL,
 		"envvol", "atkhldv", "dcysusv", "lfoval1",
 		"envval", "atkhldm", "dcysusm", "lfoval2",
 		"ip", "ifatn", "pefe", "fmmod", "tremfrq", "fmfrq2",
 		"tempenv"
 	};
 	char *regname2[] = {
 		"mudata1", "mustat1", "mudata2", "mustat2",
 		"fxwc1", "fxwc2", "spdrate", NULL, NULL,
 		NULL, NULL, NULL, "fxrt2", "sndamnt", "fxrt1",
 		NULL, NULL
 	};
 	int i, x;
 
 	printf("voice number %d\n", v->vnum);
 	for (i = 0, x = 0; i <= 0x1e; i++) {
 		if (regname[i] == NULL)
 			continue;
 		printf("%s\t[%08x]", regname[i], emu_rdptr(sc, v->vnum, i));
 		printf("%s", (x == 2) ? "\n" : "\t");
 		x++;
 		if (x > 2)
 			x = 0;
 	}
 
 	/* Print out audigy extra registers */
 	if (sc->audigy) {
 		for (i = 0; i <= 0xe; i++) {
 			if (regname2[i] == NULL)
 				continue;
 			printf("%s\t[%08x]", regname2[i],
 			    emu_rdptr(sc, v->vnum, i + 0x70));
 			printf("%s", (x == 2)? "\n" : "\t");
 			x++;
 			if (x > 2)
 				x = 0;
 		}
 	}
 	printf("\n\n");
 }
 #endif
 
 /* channel interface */
 static void *
 emupchan_init(kobj_t obj, void *devinfo, struct snd_dbuf *b,
     struct pcm_channel *c, int dir)
 {
 	struct sc_info *sc = devinfo;
 	struct sc_pchinfo *ch;
 	void *r;
 
 	KASSERT(dir == PCMDIR_PLAY, ("emupchan_init: bad direction"));
 	ch = &sc->pch[sc->pnum++];
 	ch->buffer = b;
 	ch->parent = sc;
 	ch->channel = c;
 	ch->blksz = sc->bufsz / 2;
 	ch->fmt = SND_FORMAT(AFMT_U8, 1, 0);
 	ch->spd = 8000;
 	snd_mtxlock(sc->lock);
 	ch->master = emu_valloc(sc);
 	ch->slave = emu_valloc(sc);
 	snd_mtxunlock(sc->lock);
 	r = (emu_vinit(sc, ch->master, ch->slave, sc->bufsz, ch->buffer))
 	    ? NULL : ch;
 
 	return r;
 }
 
 static int
 emupchan_free(kobj_t obj, void *data)
 {
 	struct sc_pchinfo *ch = data;
 	struct sc_info *sc = ch->parent;
 	int r;
 
 	snd_mtxlock(sc->lock);
 	r = emu_memfree(sc, sndbuf_getbuf(ch->buffer));
 	snd_mtxunlock(sc->lock);
 
 	return r;
 }
 
 static int
 emupchan_setformat(kobj_t obj, void *data, u_int32_t format)
 {
 	struct sc_pchinfo *ch = data;
 
 	ch->fmt = format;
 	return 0;
 }
 
 static u_int32_t
 emupchan_setspeed(kobj_t obj, void *data, u_int32_t speed)
 {
 	struct sc_pchinfo *ch = data;
 
 	ch->spd = speed;
 	return ch->spd;
 }
 
 static u_int32_t
 emupchan_setblocksize(kobj_t obj, void *data, u_int32_t blocksize)
 {
 	struct sc_pchinfo *ch = data;
 	struct sc_info *sc = ch->parent;
 	int irqrate, blksz;
 
 	ch->blksz = blocksize;
 	snd_mtxlock(sc->lock);
 	emu_settimer(sc);
 	irqrate = 48000 / sc->timerinterval;
 	snd_mtxunlock(sc->lock);
 	blksz = (ch->spd * sndbuf_getalign(ch->buffer)) / irqrate;
 	return blocksize;
 }
 
 static int
 emupchan_trigger(kobj_t obj, void *data, int go)
 {
 	struct sc_pchinfo *ch = data;
 	struct sc_info *sc = ch->parent;
 
 	if (!PCMTRIG_COMMON(go))
 		return 0;
 
 	snd_mtxlock(sc->lock);
 	if (go == PCMTRIG_START) {
 		emu_vsetup(ch);
 		emu_vwrite(sc, ch->master);
 		emu_settimer(sc);
 		emu_enatimer(sc, 1);
 #ifdef EMUDEBUG
 		printf("start [%d bit, %s, %d hz]\n",
 			ch->master->b16 ? 16 : 8,
 			ch->master->stereo ? "stereo" : "mono",
 			ch->master->speed);
 		emu_vdump(sc, ch->master);
 		emu_vdump(sc, ch->slave);
 #endif
 	}
 	ch->run = (go == PCMTRIG_START) ? 1 : 0;
 	emu_vtrigger(sc, ch->master, ch->run);
 	snd_mtxunlock(sc->lock);
 	return 0;
 }
 
 static u_int32_t
 emupchan_getptr(kobj_t obj, void *data)
 {
 	struct sc_pchinfo *ch = data;
 	struct sc_info *sc = ch->parent;
 	int r;
 
 	snd_mtxlock(sc->lock);
 	r = emu_vpos(sc, ch->master);
 	snd_mtxunlock(sc->lock);
 
 	return r;
 }
 
 static struct pcmchan_caps *
 emupchan_getcaps(kobj_t obj, void *data)
 {
 	return &emu_playcaps;
 }
 
 static kobj_method_t emupchan_methods[] = {
 	KOBJMETHOD(channel_init,		emupchan_init),
 	KOBJMETHOD(channel_free,		emupchan_free),
 	KOBJMETHOD(channel_setformat,		emupchan_setformat),
 	KOBJMETHOD(channel_setspeed,		emupchan_setspeed),
 	KOBJMETHOD(channel_setblocksize,	emupchan_setblocksize),
 	KOBJMETHOD(channel_trigger,		emupchan_trigger),
 	KOBJMETHOD(channel_getptr,		emupchan_getptr),
 	KOBJMETHOD(channel_getcaps,		emupchan_getcaps),
 	KOBJMETHOD_END
 };
 CHANNEL_DECLARE(emupchan);
 
 /* channel interface */
 static void *
 emurchan_init(kobj_t obj, void *devinfo, struct snd_dbuf *b,
     struct pcm_channel *c, int dir)
 {
 	struct sc_info *sc = devinfo;
 	struct sc_rchinfo *ch;
 
 	KASSERT(dir == PCMDIR_REC, ("emurchan_init: bad direction"));
 	ch = &sc->rch[sc->rnum];
 	ch->buffer = b;
 	ch->parent = sc;
 	ch->channel = c;
 	ch->blksz = sc->bufsz / 2;
 	ch->fmt = SND_FORMAT(AFMT_U8, 1, 0);
 	ch->spd = 8000;
 	ch->num = sc->rnum;
 	switch(sc->rnum) {
 	case 0:
 		ch->idxreg = sc->audigy ? EMU_A_ADCIDX : EMU_ADCIDX;
 		ch->basereg = EMU_ADCBA;
 		ch->sizereg = EMU_ADCBS;
 		ch->setupreg = EMU_ADCCR;
 		ch->irqmask = EMU_INTE_ADCBUFENABLE;
 		break;
 
 	case 1:
 		ch->idxreg = EMU_FXIDX;
 		ch->basereg = EMU_FXBA;
 		ch->sizereg = EMU_FXBS;
 		ch->setupreg = EMU_FXWC;
 		ch->irqmask = EMU_INTE_EFXBUFENABLE;
 		break;
 
 	case 2:
 		ch->idxreg = EMU_MICIDX;
 		ch->basereg = EMU_MICBA;
 		ch->sizereg = EMU_MICBS;
 		ch->setupreg = 0;
 		ch->irqmask = EMU_INTE_MICBUFENABLE;
 		break;
 	}
 	sc->rnum++;
 	if (sndbuf_alloc(ch->buffer, sc->parent_dmat, 0, sc->bufsz) != 0)
 		return NULL;
 	else {
 		snd_mtxlock(sc->lock);
 		emu_wrptr(sc, 0, ch->basereg, sndbuf_getbufaddr(ch->buffer));
 		emu_wrptr(sc, 0, ch->sizereg, 0); /* off */
 		snd_mtxunlock(sc->lock);
 		return ch;
 	}
 }
 
 static int
 emurchan_setformat(kobj_t obj, void *data, u_int32_t format)
 {
 	struct sc_rchinfo *ch = data;
 
 	ch->fmt = format;
 	return 0;
 }
 
 static u_int32_t
 emurchan_setspeed(kobj_t obj, void *data, u_int32_t speed)
 {
 	struct sc_rchinfo *ch = data;
 
 	if (ch->num == 0) {
 		if (ch->parent->audigy)
 			speed = audigy_adcspeed[audigy_recval(speed)];
 		else
 			speed = adcspeed[emu_recval(speed)];
 	}
 	if (ch->num == 1)
 		speed = 48000;
 	if (ch->num == 2)
 		speed = 8000;
 	ch->spd = speed;
 	return ch->spd;
 }
 
 static u_int32_t
 emurchan_setblocksize(kobj_t obj, void *data, u_int32_t blocksize)
 {
 	struct sc_rchinfo *ch = data;
 	struct sc_info *sc = ch->parent;
 	int irqrate, blksz;
 
 	ch->blksz = blocksize;
 	snd_mtxlock(sc->lock);
 	emu_settimer(sc);
 	irqrate = 48000 / sc->timerinterval;
 	snd_mtxunlock(sc->lock);
 	blksz = (ch->spd * sndbuf_getalign(ch->buffer)) / irqrate;
 	return blocksize;
 }
 
 /* semantic note: must start at beginning of buffer */
 static int
 emurchan_trigger(kobj_t obj, void *data, int go)
 {
 	struct sc_rchinfo *ch = data;
 	struct sc_info *sc = ch->parent;
 	u_int32_t val, sz;
 
 	if (!PCMTRIG_COMMON(go))
 		return 0;
 
 	switch(sc->bufsz) {
 	case 4096:
 		sz = EMU_RECBS_BUFSIZE_4096;
 		break;
 
 	case 8192:
 		sz = EMU_RECBS_BUFSIZE_8192;
 		break;
 
 	case 16384:
 		sz = EMU_RECBS_BUFSIZE_16384;
 		break;
 
 	case 32768:
 		sz = EMU_RECBS_BUFSIZE_32768;
 		break;
 
 	case 65536:
 		sz = EMU_RECBS_BUFSIZE_65536;
 		break;
 
 	default:
 		sz = EMU_RECBS_BUFSIZE_4096;
 	}
 
 	snd_mtxlock(sc->lock);
 	switch(go) {
 	case PCMTRIG_START:
 		ch->run = 1;
 		emu_wrptr(sc, 0, ch->sizereg, sz);
 		if (ch->num == 0) {
 			if (sc->audigy) {
 				val = EMU_A_ADCCR_LCHANENABLE;
 				if (AFMT_CHANNEL(ch->fmt) > 1)
 					val |= EMU_A_ADCCR_RCHANENABLE;
 				val |= audigy_recval(ch->spd);
 			} else {
 				val = EMU_ADCCR_LCHANENABLE;
 				if (AFMT_CHANNEL(ch->fmt) > 1)
 					val |= EMU_ADCCR_RCHANENABLE;
 				val |= emu_recval(ch->spd);
 			}
 
 			emu_wrptr(sc, 0, ch->setupreg, 0);
 			emu_wrptr(sc, 0, ch->setupreg, val);
 		}
 		val = emu_rd(sc, EMU_INTE, 4);
 		val |= ch->irqmask;
 		emu_wr(sc, EMU_INTE, val, 4);
 		break;
 
 	case PCMTRIG_STOP:
 	case PCMTRIG_ABORT:
 		ch->run = 0;
 		emu_wrptr(sc, 0, ch->sizereg, 0);
 		if (ch->setupreg)
 			emu_wrptr(sc, 0, ch->setupreg, 0);
 		val = emu_rd(sc, EMU_INTE, 4);
 		val &= ~ch->irqmask;
 		emu_wr(sc, EMU_INTE, val, 4);
 		break;
 
 	case PCMTRIG_EMLDMAWR:
 	case PCMTRIG_EMLDMARD:
 	default:
 		break;
 	}
 	snd_mtxunlock(sc->lock);
 
 	return 0;
 }
 
 static u_int32_t
 emurchan_getptr(kobj_t obj, void *data)
 {
 	struct sc_rchinfo *ch = data;
 	struct sc_info *sc = ch->parent;
 	int r;
 
 	snd_mtxlock(sc->lock);
 	r = emu_rdptr(sc, 0, ch->idxreg) & 0x0000ffff;
 	snd_mtxunlock(sc->lock);
 
 	return r;
 }
 
 static struct pcmchan_caps *
 emurchan_getcaps(kobj_t obj, void *data)
 {
 	struct sc_rchinfo *ch = data;
 
 	return &emu_reccaps[ch->num];
 }
 
 static kobj_method_t emurchan_methods[] = {
 	KOBJMETHOD(channel_init,		emurchan_init),
 	KOBJMETHOD(channel_setformat,		emurchan_setformat),
 	KOBJMETHOD(channel_setspeed,		emurchan_setspeed),
 	KOBJMETHOD(channel_setblocksize,	emurchan_setblocksize),
 	KOBJMETHOD(channel_trigger,		emurchan_trigger),
 	KOBJMETHOD(channel_getptr,		emurchan_getptr),
 	KOBJMETHOD(channel_getcaps,		emurchan_getcaps),
 	KOBJMETHOD_END
 };
 CHANNEL_DECLARE(emurchan);
 
 static unsigned char
 emu_mread(struct mpu401 *arg, void *sc, int reg)
 {	
 	unsigned int d;
 
 	d = emu_rd((struct sc_info *)sc, 0x18 + reg, 1); 
 	return d;
 }
 
 static void
 emu_mwrite(struct mpu401 *arg, void *sc, int reg, unsigned char b)
 {
 
 	emu_wr((struct sc_info *)sc, 0x18 + reg, b, 1);
 }
 
 static int
 emu_muninit(struct mpu401 *arg, void *cookie)
 {
 	struct sc_info *sc = cookie;
 
 	snd_mtxlock(sc->lock);
 	sc->mpu_intr = NULL;
 	snd_mtxunlock(sc->lock);
 
 	return 0;
 }
 
 static kobj_method_t emu_mpu_methods[] = {
     	KOBJMETHOD(mpufoi_read,		emu_mread),
     	KOBJMETHOD(mpufoi_write,	emu_mwrite),
     	KOBJMETHOD(mpufoi_uninit,	emu_muninit),
 	KOBJMETHOD_END
 };
 
 static DEFINE_CLASS(emu_mpu, emu_mpu_methods, 0);
 
 static void
 emu_intr2(void *p)
 {
 	struct sc_info *sc = (struct sc_info *)p;
 
 	if (sc->mpu_intr)
 	    (sc->mpu_intr)(sc->mpu);
 }
 
 static void
 emu_midiattach(struct sc_info *sc)
 {
 	int i;
 
 	i = emu_rd(sc, EMU_INTE, 4);
 	i |= EMU_INTE_MIDIRXENABLE;
 	emu_wr(sc, EMU_INTE, i, 4);
 
 	sc->mpu = mpu401_init(&emu_mpu_class, sc, emu_intr2, &sc->mpu_intr);
 }
 /* -------------------------------------------------------------------- */
 /* The interrupt handler */
 
 static void
 emu_intr(void *data)
 {
 	struct sc_info *sc = data;
 	u_int32_t stat, ack, i, x;
 
 	snd_mtxlock(sc->lock);
 	while (1) {
 		stat = emu_rd(sc, EMU_IPR, 4);
 		if (stat == 0)
 			break;
 		ack = 0;
 
 		/* process irq */
 		if (stat & EMU_IPR_INTERVALTIMER)
 			ack |= EMU_IPR_INTERVALTIMER;
 
 		if (stat & (EMU_IPR_ADCBUFFULL | EMU_IPR_ADCBUFHALFFULL))
 			ack |= stat & (EMU_IPR_ADCBUFFULL | EMU_IPR_ADCBUFHALFFULL);
 
 		if (stat & (EMU_IPR_EFXBUFFULL | EMU_IPR_EFXBUFHALFFULL))
 			ack |= stat & (EMU_IPR_EFXBUFFULL | EMU_IPR_EFXBUFHALFFULL);
 
 		if (stat & (EMU_IPR_MICBUFFULL | EMU_IPR_MICBUFHALFFULL))
 			ack |= stat & (EMU_IPR_MICBUFFULL | EMU_IPR_MICBUFHALFFULL);
 
 		if (stat & EMU_PCIERROR) {
 			ack |= EMU_PCIERROR;
 			device_printf(sc->dev, "pci error\n");
 			/* we still get an nmi with ecc ram even if we ack this */
 		}
 		if (stat & EMU_IPR_RATETRCHANGE) {
 			ack |= EMU_IPR_RATETRCHANGE;
 #ifdef EMUDEBUG
 			device_printf(sc->dev,
 			    "sample rate tracker lock status change\n");
 #endif
 		}
 
-	    if (stat & EMU_IPR_MIDIRECVBUFE)
-		if (sc->mpu_intr) {
-		    (sc->mpu_intr)(sc->mpu);
-		    ack |= EMU_IPR_MIDIRECVBUFE | EMU_IPR_MIDITRANSBUFE;
- 		}
+		if (stat & EMU_IPR_MIDIRECVBUFE) {
+			if (sc->mpu_intr) {
+				(sc->mpu_intr)(sc->mpu);
+				ack |= EMU_IPR_MIDIRECVBUFE | EMU_IPR_MIDITRANSBUFE;
+			}
+		}
 		if (stat & ~ack)
 			device_printf(sc->dev, "dodgy irq: %x (harmless)\n",
 			    stat & ~ack);
 
 		emu_wr(sc, EMU_IPR, stat, 4);
 
 		if (ack) {
 			snd_mtxunlock(sc->lock);
 
 			if (ack & EMU_IPR_INTERVALTIMER) {
 				x = 0;
 				for (i = 0; i < sc->nchans; i++) {
 					if (sc->pch[i].run) {
 						x = 1;
 						chn_intr(sc->pch[i].channel);
 					}
 				}
 				if (x == 0)
 					emu_enatimer(sc, 0);
 			}
 
 
 			if (ack & (EMU_IPR_ADCBUFFULL | EMU_IPR_ADCBUFHALFFULL)) {
 				if (sc->rch[0].channel)
 					chn_intr(sc->rch[0].channel);
 			}
 			if (ack & (EMU_IPR_EFXBUFFULL | EMU_IPR_EFXBUFHALFFULL)) {
 				if (sc->rch[1].channel)
 					chn_intr(sc->rch[1].channel);
 			}
 			if (ack & (EMU_IPR_MICBUFFULL | EMU_IPR_MICBUFHALFFULL)) {
 				if (sc->rch[2].channel)
 					chn_intr(sc->rch[2].channel);
 			}
 
 			snd_mtxlock(sc->lock);
 		}
 	}
 	snd_mtxunlock(sc->lock);
 }
 
 /* -------------------------------------------------------------------- */
 
 static void
 emu_setmap(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	bus_addr_t *phys = arg;
 
 	*phys = error ? 0 : (bus_addr_t)segs->ds_addr;
 
 	if (bootverbose) {
 		printf("emu: setmap (%lx, %lx), nseg=%d, error=%d\n",
 		    (unsigned long)segs->ds_addr, (unsigned long)segs->ds_len,
 		    nseg, error);
 	}
 }
 
 static void *
 emu_malloc(struct sc_info *sc, u_int32_t sz, bus_addr_t *addr,
     bus_dmamap_t *map)
 {
 	void *buf;
 
 	*addr = 0;
 	if (bus_dmamem_alloc(sc->parent_dmat, &buf, BUS_DMA_NOWAIT, map))
 		return NULL;
 	if (bus_dmamap_load(sc->parent_dmat, *map, buf, sz, emu_setmap, addr, 0)
 	    || !*addr) {
 		bus_dmamem_free(sc->parent_dmat, buf, *map);
 		return NULL;
 	}
 	return buf;
 }
 
 static void
 emu_free(struct sc_info *sc, void *buf, bus_dmamap_t map)
 {
 	bus_dmamap_unload(sc->parent_dmat, map);
 	bus_dmamem_free(sc->parent_dmat, buf, map);
 }
 
 static void *
 emu_memalloc(struct sc_info *sc, u_int32_t sz, bus_addr_t *addr)
 {
 	u_int32_t blksz, start, idx, ofs, tmp, found;
 	struct emu_mem *mem = &sc->mem;
 	struct emu_memblk *blk;
 	void *buf;
 
 	blksz = sz / EMUPAGESIZE;
 	if (sz > (blksz * EMUPAGESIZE))
 		blksz++;
 	/* find a free block in the bitmap */
 	found = 0;
 	start = 1;
 	while (!found && start + blksz < EMUMAXPAGES) {
 		found = 1;
 		for (idx = start; idx < start + blksz; idx++)
 			if (mem->bmap[idx >> 3] & (1 << (idx & 7)))
 				found = 0;
 		if (!found)
 			start++;
 	}
 	if (!found)
 		return NULL;
 	blk = malloc(sizeof(*blk), M_DEVBUF, M_NOWAIT);
 	if (blk == NULL)
 		return NULL;
 	buf = emu_malloc(sc, sz, &blk->buf_addr, &blk->buf_map);
 	*addr = blk->buf_addr;
 	if (buf == NULL) {
 		free(blk, M_DEVBUF);
 		return NULL;
 	}
 	blk->buf = buf;
 	blk->pte_start = start;
 	blk->pte_size = blksz;
 #ifdef EMUDEBUG
 	printf("buf %p, pte_start %d, pte_size %d\n", blk->buf,
 	    blk->pte_start, blk->pte_size);
 #endif
 	ofs = 0;
 	for (idx = start; idx < start + blksz; idx++) {
 		mem->bmap[idx >> 3] |= 1 << (idx & 7);
 		tmp = (uint32_t)(blk->buf_addr + ofs);
 #ifdef EMUDEBUG
 		printf("pte[%d] -> %x phys, %x virt\n", idx, tmp,
 		    ((u_int32_t)buf) + ofs);
 #endif
 		mem->ptb_pages[idx] = (tmp << 1) | idx;
 		ofs += EMUPAGESIZE;
 	}
 	SLIST_INSERT_HEAD(&mem->blocks, blk, link);
 	return buf;
 }
 
 static int
 emu_memfree(struct sc_info *sc, void *buf)
 {
 	u_int32_t idx, tmp;
 	struct emu_mem *mem = &sc->mem;
 	struct emu_memblk *blk, *i;
 
 	blk = NULL;
 	SLIST_FOREACH(i, &mem->blocks, link) {
 		if (i->buf == buf)
 			blk = i;
 	}
 	if (blk == NULL)
 		return EINVAL;
 	SLIST_REMOVE(&mem->blocks, blk, emu_memblk, link);
 	emu_free(sc, buf, blk->buf_map);
 	tmp = (u_int32_t)(sc->mem.silent_page_addr) << 1;
 	for (idx = blk->pte_start; idx < blk->pte_start + blk->pte_size; idx++) {
 		mem->bmap[idx >> 3] &= ~(1 << (idx & 7));
 		mem->ptb_pages[idx] = tmp | idx;
 	}
 	free(blk, M_DEVBUF);
 	return 0;
 }
 
 static int
 emu_memstart(struct sc_info *sc, void *buf)
 {
 	struct emu_mem *mem = &sc->mem;
 	struct emu_memblk *blk, *i;
 
 	blk = NULL;
 	SLIST_FOREACH(i, &mem->blocks, link) {
 		if (i->buf == buf)
 			blk = i;
 	}
 	if (blk == NULL)
 		return -EINVAL;
 	return blk->pte_start;
 }
 
 static void
 emu_addefxop(struct sc_info *sc, int op, int z, int w, int x, int y,
     u_int32_t *pc)
 {
 	emu_wrefx(sc, (*pc) * 2, (x << 10) | y);
 	emu_wrefx(sc, (*pc) * 2 + 1, (op << 20) | (z << 10) | w);
 	(*pc)++;
 }
 
 static void
 audigy_addefxop(struct sc_info *sc, int op, int z, int w, int x, int y,
     u_int32_t *pc)
 {
 	emu_wrefx(sc, (*pc) * 2, (x << 12) | y);
 	emu_wrefx(sc, (*pc) * 2 + 1, (op << 24) | (z << 12) | w);
 	(*pc)++;
 }
 
 static void
 audigy_initefx(struct sc_info *sc)
 {
 	int i;
 	u_int32_t pc = 0;
 
 	/* skip 0, 0, -1, 0 - NOPs */
 	for (i = 0; i < 512; i++)
 		audigy_addefxop(sc, 0x0f, 0x0c0, 0x0c0, 0x0cf, 0x0c0, &pc);
 
 	for (i = 0; i < 512; i++)
 		emu_wrptr(sc, 0, EMU_A_FXGPREGBASE + i, 0x0);
 
 	pc = 16;
 
 	/* stop fx processor */
 	emu_wrptr(sc, 0, EMU_A_DBG, EMU_A_DBG_SINGLE_STEP);
 
 	/* Audigy 2 (EMU10K2) DSP Registers:
 	   FX Bus
 		0x000-0x00f : 16 registers (?)
 	   Input
 		0x040/0x041 : AC97 Codec (l/r)
 		0x042/0x043 : ADC, S/PDIF (l/r)
 		0x044/0x045 : Optical S/PDIF in (l/r)
 		0x046/0x047 : ?
 		0x048/0x049 : Line/Mic 2 (l/r)
 		0x04a/0x04b : RCA S/PDIF (l/r)
 		0x04c/0x04d : Aux 2 (l/r)
 	   Output
 		0x060/0x061 : Digital Front (l/r)
 		0x062/0x063 : Digital Center/LFE
 		0x064/0x065 : AudigyDrive Heaphone (l/r)
 		0x066/0x067 : Digital Rear (l/r)
 		0x068/0x069 : Analog Front (l/r)
 		0x06a/0x06b : Analog Center/LFE
 		0x06c/0x06d : ?
 		0x06e/0x06f : Analog Rear (l/r)
 		0x070/0x071 : AC97 Output (l/r)
 		0x072/0x073 : ?
 		0x074/0x075 : ?
 		0x076/0x077 : ADC Recording Buffer (l/r)
 	   Constants
 		0x0c0 - 0x0c4 = 0 - 4
 		0x0c5 = 0x8, 0x0c6 = 0x10, 0x0c7 = 0x20
 		0x0c8 = 0x100, 0x0c9 = 0x10000, 0x0ca = 0x80000
 		0x0cb = 0x10000000, 0x0cc = 0x20000000, 0x0cd = 0x40000000
 		0x0ce = 0x80000000, 0x0cf = 0x7fffffff, 0x0d0 = 0xffffffff
 		0x0d1 = 0xfffffffe, 0x0d2 = 0xc0000000, 0x0d3 = 0x41fbbcdc
 		0x0d4 = 0x5a7ef9db, 0x0d5 = 0x00100000, 0x0dc = 0x00000001 (?)
 	   Temporary Values
 		0x0d6 : Accumulator (?)
 		0x0d7 : Condition Register
 		0x0d8 : Noise source
 		0x0d9 : Noise source
 	   Tank Memory Data Registers
 		0x200 - 0x2ff
 	   Tank Memory Address Registers
 		0x300 - 0x3ff
 	   General Purpose Registers
 		0x400 - 0x5ff
 	 */
 
 	/* AC97Output[l/r] = FXBus PCM[l/r] */
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_AC97_L), A_C_00000000,
 			A_C_00000000, A_FXBUS(FXBUS_PCM_LEFT), &pc);
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_AC97_R), A_C_00000000,
 			A_C_00000000, A_FXBUS(FXBUS_PCM_RIGHT), &pc);
 
 	/* GPR[0/1] = RCA S/PDIF[l/r] -- Master volume */
 	audigy_addefxop(sc, iACC3, A_GPR(0), A_C_00000000,
 			A_C_00000000, A_EXTIN(EXTIN_COAX_SPDIF_L), &pc);
 	audigy_addefxop(sc, iACC3, A_GPR(1), A_C_00000000,
 			A_C_00000000, A_EXTIN(EXTIN_COAX_SPDIF_R), &pc);
 
 	/* GPR[2] = GPR[0] (Left) / 2 + GPR[1] (Right) / 2 -- Central volume */
 	audigy_addefxop(sc, iINTERP, A_GPR(2), A_GPR(1),
 			A_C_40000000, A_GPR(0), &pc);
 
 	/* Headphones[l/r] = GPR[0/1] */
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_HEADPHONE_L),
 			A_C_00000000, A_C_00000000, A_GPR(0), &pc);
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_HEADPHONE_R),
 			A_C_00000000, A_C_00000000, A_GPR(1), &pc);
 
 	/* Analog Front[l/r] = GPR[0/1] */
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_AFRONT_L), A_C_00000000,
 			A_C_00000000, A_GPR(0), &pc);
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_AFRONT_R), A_C_00000000,
 			A_C_00000000, A_GPR(1), &pc);
 
 	/* Digital Front[l/r] = GPR[0/1] */
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_FRONT_L), A_C_00000000,
 			A_C_00000000, A_GPR(0), &pc);
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_FRONT_R), A_C_00000000,
 			A_C_00000000, A_GPR(1), &pc);
 
 	/* Center and Subwoofer configuration */
 	/* Analog Center = GPR[0] + GPR[2] */
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_ACENTER), A_C_00000000,
 			A_GPR(0), A_GPR(2), &pc);
 	/* Analog Sub = GPR[1] + GPR[2] */
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_ALFE), A_C_00000000,
 			A_GPR(1), A_GPR(2), &pc);
 
 	/* Digital Center = GPR[0] + GPR[2] */
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_CENTER), A_C_00000000,
 			A_GPR(0), A_GPR(2), &pc);
 	/* Digital Sub = GPR[1] + GPR[2] */
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_LFE), A_C_00000000,
 			A_GPR(1), A_GPR(2), &pc);
 
 #if 0
 	/* Analog Rear[l/r] = (GPR[0/1] * RearVolume[l/r]) >> 31 */
 	/*   RearVolume = GPR[0x10/0x11] (Will this ever be implemented?) */
 	audigy_addefxop(sc, iMAC0, A_EXTOUT(A_EXTOUT_AREAR_L), A_C_00000000,
 			A_GPR(16), A_GPR(0), &pc);
 	audigy_addefxop(sc, iMAC0, A_EXTOUT(A_EXTOUT_AREAR_R), A_C_00000000,
 			A_GPR(17), A_GPR(1), &pc);
 
 	/* Digital Rear[l/r] = (GPR[0/1] * RearVolume[l/r]) >> 31 */
 	/*   RearVolume = GPR[0x10/0x11] (Will this ever be implemented?) */
 	audigy_addefxop(sc, iMAC0, A_EXTOUT(A_EXTOUT_REAR_L), A_C_00000000,
 			A_GPR(16), A_GPR(0), &pc);
 	audigy_addefxop(sc, iMAC0, A_EXTOUT(A_EXTOUT_REAR_R), A_C_00000000,
 			A_GPR(17), A_GPR(1), &pc);
 #else
 	/* XXX This is just a copy to the channel, since we do not have
 	 *     a patch manager, it is useful for have another output enabled.
 	 */
 
 	/* Analog Rear[l/r] = GPR[0/1] */
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_AREAR_L), A_C_00000000,
 			A_C_00000000, A_GPR(0), &pc);
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_AREAR_R), A_C_00000000,
 			A_C_00000000, A_GPR(1), &pc);
 
 	/* Digital Rear[l/r] = GPR[0/1] */
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_REAR_L), A_C_00000000,
 			A_C_00000000, A_GPR(0), &pc);
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_REAR_R), A_C_00000000,
 			A_C_00000000, A_GPR(1), &pc);
 #endif
 
 	/* ADC Recording buffer[l/r] = AC97Input[l/r] */
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_ADC_CAP_L), A_C_00000000,
 			A_C_00000000, A_EXTIN(A_EXTIN_AC97_L), &pc);
 	audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_ADC_CAP_R), A_C_00000000,
 			A_C_00000000, A_EXTIN(A_EXTIN_AC97_R), &pc);
 
 	/* resume normal operations */
 	emu_wrptr(sc, 0, EMU_A_DBG, 0);
 }
 
 static void
 emu_initefx(struct sc_info *sc)
 {
 	int i;
 	u_int32_t pc = 16;
 
 	/* acc3 0,0,0,0 - NOPs */
 	for (i = 0; i < 512; i++) {
 		emu_wrefx(sc, i * 2, 0x10040);
 		emu_wrefx(sc, i * 2 + 1, 0x610040);
 	}
 
 	for (i = 0; i < 256; i++)
 		emu_wrptr(sc, 0, EMU_FXGPREGBASE + i, 0);
 
 	/* FX-8010 DSP Registers:
 	   FX Bus
 	     0x000-0x00f : 16 registers
 	   Input
 	     0x010/0x011 : AC97 Codec (l/r)
 	     0x012/0x013 : ADC, S/PDIF (l/r)
 	     0x014/0x015 : Mic(left), Zoom (l/r)
 	     0x016/0x017 : TOS link in (l/r)
 	     0x018/0x019 : Line/Mic 1 (l/r)
 	     0x01a/0x01b : COAX S/PDIF (l/r)
 	     0x01c/0x01d : Line/Mic 2 (l/r)
 	   Output
 	     0x020/0x021 : AC97 Output (l/r)
 	     0x022/0x023 : TOS link out (l/r)
 	     0x024/0x025 : Center/LFE
 	     0x026/0x027 : LiveDrive Headphone (l/r)
 	     0x028/0x029 : Rear Channel (l/r)
 	     0x02a/0x02b : ADC Recording Buffer (l/r)
 	     0x02c       : Mic Recording Buffer
 	     0x031/0x032 : Analog Center/LFE
 	   Constants
 	     0x040 - 0x044 = 0 - 4
 	     0x045 = 0x8, 0x046 = 0x10, 0x047 = 0x20
 	     0x048 = 0x100, 0x049 = 0x10000, 0x04a = 0x80000
 	     0x04b = 0x10000000, 0x04c = 0x20000000, 0x04d = 0x40000000
 	     0x04e = 0x80000000, 0x04f = 0x7fffffff, 0x050 = 0xffffffff
 	     0x051 = 0xfffffffe, 0x052 = 0xc0000000, 0x053 = 0x41fbbcdc
 	     0x054 = 0x5a7ef9db, 0x055 = 0x00100000
 	   Temporary Values
 	     0x056 : Accumulator
 	     0x057 : Condition Register
 	     0x058 : Noise source
 	     0x059 : Noise source
 	     0x05a : IRQ Register
 	     0x05b : TRAM Delay Base Address Count
 	   General Purpose Registers
 	     0x100 - 0x1ff
 	   Tank Memory Data Registers
 	     0x200 - 0x2ff
 	   Tank Memory Address Registers
 	     0x300 - 0x3ff
 	     */
 
 	/* Routing - this will be configurable in later version */
 
 	/* GPR[0/1] = FX * 4 + SPDIF-in */
 	emu_addefxop(sc, iMACINT0, GPR(0), EXTIN(EXTIN_SPDIF_CD_L),
 			FXBUS(FXBUS_PCM_LEFT), C_00000004, &pc);
 	emu_addefxop(sc, iMACINT0, GPR(1), EXTIN(EXTIN_SPDIF_CD_R),
 			FXBUS(FXBUS_PCM_RIGHT), C_00000004, &pc);
 
 	/* GPR[0/1] += APS-input */
 	emu_addefxop(sc, iACC3, GPR(0), GPR(0), C_00000000,
 			sc->APS ? EXTIN(EXTIN_TOSLINK_L) : C_00000000, &pc);
 	emu_addefxop(sc, iACC3, GPR(1), GPR(1), C_00000000,
 			sc->APS ? EXTIN(EXTIN_TOSLINK_R) : C_00000000, &pc);
 
 	/* FrontOut (AC97) = GPR[0/1] */
 	emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_AC97_L), C_00000000,
 			C_00000000, GPR(0), &pc);
 	emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_AC97_R), C_00000000,
 			C_00000001, GPR(1), &pc);
 
 	/* GPR[2] = GPR[0] (Left) / 2 + GPR[1] (Right) / 2 -- Central volume */
 	emu_addefxop(sc, iINTERP, GPR(2), GPR(1), C_40000000, GPR(0), &pc);
 
 #if 0
 	/* RearOut = (GPR[0/1] * RearVolume) >> 31 */
 	/*   RearVolume = GPR[0x10/0x11] */
 	emu_addefxop(sc, iMAC0, EXTOUT(EXTOUT_REAR_L), C_00000000,
 			GPR(16), GPR(0), &pc);
 	emu_addefxop(sc, iMAC0, EXTOUT(EXTOUT_REAR_R), C_00000000,
 			GPR(17), GPR(1), &pc);
 #else
 	/* XXX This is just a copy to the channel, since we do not have
 	 *     a patch manager, it is useful for have another output enabled.
 	 */
 
 	/* Rear[l/r] = GPR[0/1] */
 	emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_REAR_L), C_00000000,
 			C_00000000, GPR(0), &pc);
 	emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_REAR_R), C_00000000,
 			C_00000000, GPR(1), &pc);
 #endif
 
 	/* TOS out[l/r] = GPR[0/1] */
 	emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_TOSLINK_L), C_00000000,
 			C_00000000, GPR(0), &pc);
 	emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_TOSLINK_R), C_00000000,
 			C_00000000, GPR(1), &pc);
 
 	/* Center and Subwoofer configuration */
 	/* Analog Center = GPR[0] + GPR[2] */
 	emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_ACENTER), C_00000000,
 			GPR(0), GPR(2), &pc);
 	/* Analog Sub = GPR[1] + GPR[2] */
 	emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_ALFE), C_00000000,
 			GPR(1), GPR(2), &pc);
 	/* Digital Center = GPR[0] + GPR[2] */
 	emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_AC97_CENTER), C_00000000,
 			GPR(0), GPR(2), &pc);
 	/* Digital Sub = GPR[1] + GPR[2] */
 	emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_AC97_LFE), C_00000000,
 			GPR(1), GPR(2), &pc);
 
 	/* Headphones[l/r] = GPR[0/1] */
 	emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_HEADPHONE_L), C_00000000,
 			C_00000000, GPR(0), &pc);
 	emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_HEADPHONE_R), C_00000000,
 			C_00000000, GPR(1), &pc);
 
 	/* ADC Recording buffer[l/r] = AC97Input[l/r] */
 	emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_ADC_CAP_L), C_00000000,
 			C_00000000, EXTIN(EXTIN_AC97_L), &pc);
 	emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_ADC_CAP_R), C_00000000,
 			C_00000000, EXTIN(EXTIN_AC97_R), &pc);
 
 	/* resume normal operations */
 	emu_wrptr(sc, 0, EMU_DBG, 0);
 }
 
 /* Probe and attach the card */
 static int
 emu_init(struct sc_info *sc)
 {
 	u_int32_t spcs, ch, tmp, i;
 
 	if (sc->audigy) {
 		/* enable additional AC97 slots */
 		emu_wrptr(sc, 0, EMU_AC97SLOT, EMU_AC97SLOT_CENTER | EMU_AC97SLOT_LFE);
 	}
 
 	/* disable audio and lock cache */
 	emu_wr(sc, EMU_HCFG,
 	    EMU_HCFG_LOCKSOUNDCACHE | EMU_HCFG_LOCKTANKCACHE_MASK | EMU_HCFG_MUTEBUTTONENABLE,
 	    4);
 
 	/* reset recording buffers */
 	emu_wrptr(sc, 0, EMU_MICBS, EMU_RECBS_BUFSIZE_NONE);
 	emu_wrptr(sc, 0, EMU_MICBA, 0);
 	emu_wrptr(sc, 0, EMU_FXBS, EMU_RECBS_BUFSIZE_NONE);
 	emu_wrptr(sc, 0, EMU_FXBA, 0);
 	emu_wrptr(sc, 0, EMU_ADCBS, EMU_RECBS_BUFSIZE_NONE);
 	emu_wrptr(sc, 0, EMU_ADCBA, 0);
 
 	/* disable channel interrupt */
 	emu_wr(sc, EMU_INTE,
 	    EMU_INTE_INTERTIMERENB | EMU_INTE_SAMPLERATER | EMU_INTE_PCIERRENABLE,
 	    4);
 	emu_wrptr(sc, 0, EMU_CLIEL, 0);
 	emu_wrptr(sc, 0, EMU_CLIEH, 0);
 	emu_wrptr(sc, 0, EMU_SOLEL, 0);
 	emu_wrptr(sc, 0, EMU_SOLEH, 0);
 
 	/* wonder what these do... */
 	if (sc->audigy) {
 		emu_wrptr(sc, 0, EMU_SPBYPASS, 0xf00);
 		emu_wrptr(sc, 0, EMU_AC97SLOT, 0x3);
 	}
 
 	/* init envelope engine */
 	for (ch = 0; ch < NUM_G; ch++) {
 		emu_wrptr(sc, ch, EMU_CHAN_DCYSUSV, ENV_OFF);
 		emu_wrptr(sc, ch, EMU_CHAN_IP, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_VTFT, 0xffff);
 		emu_wrptr(sc, ch, EMU_CHAN_CVCF, 0xffff);
 		emu_wrptr(sc, ch, EMU_CHAN_PTRX, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_CPF, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_CCR, 0);
 
 		emu_wrptr(sc, ch, EMU_CHAN_PSST, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_DSL, 0x10);
 		emu_wrptr(sc, ch, EMU_CHAN_CCCA, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_Z1, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_Z2, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_FXRT, 0xd01c0000);
 
 		emu_wrptr(sc, ch, EMU_CHAN_ATKHLDM, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_DCYSUSM, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_IFATN, 0xffff);
 		emu_wrptr(sc, ch, EMU_CHAN_PEFE, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_FMMOD, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_TREMFRQ, 24);	/* 1 Hz */
 		emu_wrptr(sc, ch, EMU_CHAN_FM2FRQ2, 24);	/* 1 Hz */
 		emu_wrptr(sc, ch, EMU_CHAN_TEMPENV, 0);
 
 		/*** these are last so OFF prevents writing ***/
 		emu_wrptr(sc, ch, EMU_CHAN_LFOVAL2, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_LFOVAL1, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_ATKHLDV, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_ENVVOL, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_ENVVAL, 0);
 
 		if (sc->audigy) {
 			/* audigy cards need this to initialize correctly */
 			emu_wrptr(sc, ch, 0x4c, 0);
 			emu_wrptr(sc, ch, 0x4d, 0);
 			emu_wrptr(sc, ch, 0x4e, 0);
 			emu_wrptr(sc, ch, 0x4f, 0);
 			/* set default routing */
 			emu_wrptr(sc, ch, EMU_A_CHAN_FXRT1, 0x03020100);
 			emu_wrptr(sc, ch, EMU_A_CHAN_FXRT2, 0x3f3f3f3f);
 			emu_wrptr(sc, ch, EMU_A_CHAN_SENDAMOUNTS, 0);
 		}
 
 		sc->voice[ch].vnum = ch;
 		sc->voice[ch].slave = NULL;
 		sc->voice[ch].busy = 0;
 		sc->voice[ch].ismaster = 0;
 		sc->voice[ch].running = 0;
 		sc->voice[ch].b16 = 0;
 		sc->voice[ch].stereo = 0;
 		sc->voice[ch].speed = 0;
 		sc->voice[ch].start = 0;
 		sc->voice[ch].end = 0;
 		sc->voice[ch].channel = NULL;
 	}
 	sc->pnum = sc->rnum = 0;
 
 	/*
 	 *  Init to 0x02109204 :
 	 *  Clock accuracy    = 0     (1000ppm)
 	 *  Sample Rate       = 2     (48kHz)
 	 *  Audio Channel     = 1     (Left of 2)
 	 *  Source Number     = 0     (Unspecified)
 	 *  Generation Status = 1     (Original for Cat Code 12)
 	 *  Cat Code          = 12    (Digital Signal Mixer)
 	 *  Mode              = 0     (Mode 0)
 	 *  Emphasis          = 0     (None)
 	 *  CP                = 1     (Copyright unasserted)
 	 *  AN                = 0     (Audio data)
 	 *  P                 = 0     (Consumer)
 	 */
 	spcs = EMU_SPCS_CLKACCY_1000PPM | EMU_SPCS_SAMPLERATE_48 |
 	    EMU_SPCS_CHANNELNUM_LEFT | EMU_SPCS_SOURCENUM_UNSPEC |
 	    EMU_SPCS_GENERATIONSTATUS | 0x00001200 | 0x00000000 |
 	    EMU_SPCS_EMPHASIS_NONE | EMU_SPCS_COPYRIGHT;
 	emu_wrptr(sc, 0, EMU_SPCS0, spcs);
 	emu_wrptr(sc, 0, EMU_SPCS1, spcs);
 	emu_wrptr(sc, 0, EMU_SPCS2, spcs);
 
 	if (!sc->audigy)
 		emu_initefx(sc);
 	else if (sc->audigy2) {	/* Audigy 2 */
 		/* from ALSA initialization code: */
 
 		/* Hack for Alice3 to work independent of haP16V driver */
 		u_int32_t tmp;
 
 		/* Setup SRCMulti_I2S SamplingRate */
 		tmp = emu_rdptr(sc, 0, EMU_A_SPDIF_SAMPLERATE) & 0xfffff1ff;
 		emu_wrptr(sc, 0, EMU_A_SPDIF_SAMPLERATE, tmp | 0x400);
 
 		/* Setup SRCSel (Enable SPDIF, I2S SRCMulti) */
 		emu_wr(sc, 0x20, 0x00600000, 4);
 		emu_wr(sc, 0x24, 0x00000014, 4);
 
 		/* Setup SRCMulti Input Audio Enable */
 		emu_wr(sc, 0x20, 0x006e0000, 4);
 		emu_wr(sc, 0x24, 0xff00ff00, 4);
 	}
 
 	SLIST_INIT(&sc->mem.blocks);
 	sc->mem.ptb_pages = emu_malloc(sc, EMUMAXPAGES * sizeof(u_int32_t),
 	    &sc->mem.ptb_pages_addr, &sc->mem.ptb_map);
 	if (sc->mem.ptb_pages == NULL)
 		return -1;
 
 	sc->mem.silent_page = emu_malloc(sc, EMUPAGESIZE,
 	    &sc->mem.silent_page_addr, &sc->mem.silent_map);
 	if (sc->mem.silent_page == NULL) {
 		emu_free(sc, sc->mem.ptb_pages, sc->mem.ptb_map);
 		return -1;
 	}
 	/* Clear page with silence & setup all pointers to this page */
 	bzero(sc->mem.silent_page, EMUPAGESIZE);
 	tmp = (u_int32_t)(sc->mem.silent_page_addr) << 1;
 	for (i = 0; i < EMUMAXPAGES; i++)
 		sc->mem.ptb_pages[i] = tmp | i;
 
 	emu_wrptr(sc, 0, EMU_PTB, (sc->mem.ptb_pages_addr));
 	emu_wrptr(sc, 0, EMU_TCB, 0);	/* taken from original driver */
 	emu_wrptr(sc, 0, EMU_TCBS, 0);	/* taken from original driver */
 
 	for (ch = 0; ch < NUM_G; ch++) {
 		emu_wrptr(sc, ch, EMU_CHAN_MAPA, tmp | EMU_CHAN_MAP_PTI_MASK);
 		emu_wrptr(sc, ch, EMU_CHAN_MAPB, tmp | EMU_CHAN_MAP_PTI_MASK);
 	}
 
 	/* emu_memalloc(sc, EMUPAGESIZE); */
 	/*
 	 *  Hokay, now enable the AUD bit
 	 *
 	 *  Audigy
 	 *   Enable Audio = 0 (enabled after fx processor initialization)
 	 *   Mute Disable Audio = 0
 	 *   Joystick = 1
 	 *
 	 *  Audigy 2
 	 *   Enable Audio = 1
 	 *   Mute Disable Audio = 0
 	 *   Joystick = 1
 	 *   GP S/PDIF AC3 Enable = 1
 	 *   CD S/PDIF AC3 Enable = 1
 	 *
 	 *  EMU10K1
 	 *   Enable Audio = 1
 	 *   Mute Disable Audio = 0
 	 *   Lock Tank Memory = 1
 	 *   Lock Sound Memory = 0
 	 *   Auto Mute = 1
 	 */
 
 	if (sc->audigy) {
 		tmp = EMU_HCFG_AUTOMUTE | EMU_HCFG_JOYENABLE;
 		if (sc->audigy2)	/* Audigy 2 */
 			tmp = EMU_HCFG_AUDIOENABLE | EMU_HCFG_AC3ENABLE_CDSPDIF |
 			    EMU_HCFG_AC3ENABLE_GPSPDIF;
 		emu_wr(sc, EMU_HCFG, tmp, 4);
 
 		audigy_initefx(sc);
 
 		/* from ALSA initialization code: */
 
 		/* enable audio and disable both audio/digital outputs */
 		emu_wr(sc, EMU_HCFG, emu_rd(sc, EMU_HCFG, 4) | EMU_HCFG_AUDIOENABLE, 4);
 		emu_wr(sc, EMU_A_IOCFG, emu_rd(sc, EMU_A_IOCFG, 4) & ~EMU_A_IOCFG_GPOUT_AD,
 		    4);
 		if (sc->audigy2) {	/* Audigy 2 */
 			/* Unmute Analog.
 			 * Set GPO6 to 1 for Apollo. This has to be done after
 			 * init Alice3 I2SOut beyond 48kHz.
 			 * So, sequence is important.
 			 */
 			emu_wr(sc, EMU_A_IOCFG,
 			    emu_rd(sc, EMU_A_IOCFG, 4) | EMU_A_IOCFG_GPOUT_A, 4);
 		}
 	} else {
 		/* EMU10K1 initialization code */
 		tmp = EMU_HCFG_AUDIOENABLE | EMU_HCFG_LOCKTANKCACHE_MASK 
 		    | EMU_HCFG_AUTOMUTE;
 		if (sc->rev >= 6)
 			tmp |= EMU_HCFG_JOYENABLE;
 
 		emu_wr(sc, EMU_HCFG, tmp, 4);
 
 		/* TOSLink detection */
 		sc->tos_link = 0;
 		tmp = emu_rd(sc, EMU_HCFG, 4);
 		if (tmp & (EMU_HCFG_GPINPUT0 | EMU_HCFG_GPINPUT1)) {
 			emu_wr(sc, EMU_HCFG, tmp | EMU_HCFG_GPOUT1, 4);
 			DELAY(50);
 			if (tmp != (emu_rd(sc, EMU_HCFG, 4) & ~EMU_HCFG_GPOUT1)) {
 				sc->tos_link = 1;
 				emu_wr(sc, EMU_HCFG, tmp, 4);
 			}
 		}
 	}
 
 	return 0;
 }
 
 static int
 emu_uninit(struct sc_info *sc)
 {
 	u_int32_t ch;
 
 	emu_wr(sc, EMU_INTE, 0, 4);
 	for (ch = 0; ch < NUM_G; ch++)
 		emu_wrptr(sc, ch, EMU_CHAN_DCYSUSV, ENV_OFF);
 	for (ch = 0; ch < NUM_G; ch++) {
 		emu_wrptr(sc, ch, EMU_CHAN_VTFT, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_CVCF, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_PTRX, 0);
 		emu_wrptr(sc, ch, EMU_CHAN_CPF, 0);
 	}
 
 	if (sc->audigy) {	/* stop fx processor */
 		emu_wrptr(sc, 0, EMU_A_DBG, EMU_A_DBG_SINGLE_STEP);
 	}
 
 	/* disable audio and lock cache */
 	emu_wr(sc, EMU_HCFG,
 	    EMU_HCFG_LOCKSOUNDCACHE | EMU_HCFG_LOCKTANKCACHE_MASK | EMU_HCFG_MUTEBUTTONENABLE,
 	    4);
 
 	emu_wrptr(sc, 0, EMU_PTB, 0);
 	/* reset recording buffers */
 	emu_wrptr(sc, 0, EMU_MICBS, EMU_RECBS_BUFSIZE_NONE);
 	emu_wrptr(sc, 0, EMU_MICBA, 0);
 	emu_wrptr(sc, 0, EMU_FXBS, EMU_RECBS_BUFSIZE_NONE);
 	emu_wrptr(sc, 0, EMU_FXBA, 0);
 	emu_wrptr(sc, 0, EMU_FXWC, 0);
 	emu_wrptr(sc, 0, EMU_ADCBS, EMU_RECBS_BUFSIZE_NONE);
 	emu_wrptr(sc, 0, EMU_ADCBA, 0);
 	emu_wrptr(sc, 0, EMU_TCB, 0);
 	emu_wrptr(sc, 0, EMU_TCBS, 0);
 
 	/* disable channel interrupt */
 	emu_wrptr(sc, 0, EMU_CLIEL, 0);
 	emu_wrptr(sc, 0, EMU_CLIEH, 0);
 	emu_wrptr(sc, 0, EMU_SOLEL, 0);
 	emu_wrptr(sc, 0, EMU_SOLEH, 0);
 
 	/* init envelope engine */
 	if (!SLIST_EMPTY(&sc->mem.blocks))
 		device_printf(sc->dev, "warning: memblock list not empty\n");
 	emu_free(sc, sc->mem.ptb_pages, sc->mem.ptb_map);
 	emu_free(sc, sc->mem.silent_page, sc->mem.silent_map);
 
 	if(sc->mpu)
 	    mpu401_uninit(sc->mpu);
 	return 0;
 }
 
 static int
 emu_pci_probe(device_t dev)
 {
 	char *s = NULL;
 
 	switch (pci_get_devid(dev)) {
 	case EMU10K1_PCI_ID:
 		s = "Creative EMU10K1";
 		break;
 
 	case EMU10K2_PCI_ID:
 		if (pci_get_revid(dev) == 0x04)
 			s = "Creative Audigy 2 (EMU10K2)";
 		else
 			s = "Creative Audigy (EMU10K2)";
 		break;
 
 	case EMU10K3_PCI_ID:
 		s = "Creative Audigy 2 (EMU10K3)";
 		break;
 
 	default:
 		return ENXIO;
 	}
 
 	device_set_desc(dev, s);
 	return BUS_PROBE_LOW_PRIORITY;
 }
 
 static int
 emu_pci_attach(device_t dev)
 {
 	struct ac97_info *codec = NULL;
 	struct sc_info *sc;
 	int i, gotmic;
 	char status[SND_STATUSLEN];
 
 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
 	sc->lock = snd_mtxcreate(device_get_nameunit(dev), "snd_emu10k1 softc");
 	sc->dev = dev;
 	sc->type = pci_get_devid(dev);
 	sc->rev = pci_get_revid(dev);
 	sc->audigy = sc->type == EMU10K2_PCI_ID || sc->type == EMU10K3_PCI_ID;
 	sc->audigy2 = (sc->audigy && sc->rev == 0x04);
 	sc->nchans = sc->audigy ? 8 : 4;
 	sc->addrmask = sc->audigy ? EMU_A_PTR_ADDR_MASK : EMU_PTR_ADDR_MASK;
 
 	pci_enable_busmaster(dev);
 
 	i = PCIR_BAR(0);
 	sc->reg = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &i, RF_ACTIVE);
 	if (sc->reg == NULL) {
 		device_printf(dev, "unable to map register space\n");
 		goto bad;
 	}
 	sc->st = rman_get_bustag(sc->reg);
 	sc->sh = rman_get_bushandle(sc->reg);
 
 	sc->bufsz = pcm_getbuffersize(dev, 4096, EMU_DEFAULT_BUFSZ, 65536);
 
 	if (bus_dma_tag_create(/*parent*/bus_get_dma_tag(dev), /*alignment*/2,
 		/*boundary*/0,
 		/*lowaddr*/(1U << 31) - 1, /* can only access 0-2gb */
 		/*highaddr*/BUS_SPACE_MAXADDR,
 		/*filter*/NULL, /*filterarg*/NULL,
 		/*maxsize*/sc->bufsz, /*nsegments*/1, /*maxsegz*/0x3ffff,
 		/*flags*/0, /*lockfunc*/busdma_lock_mutex,
 		/*lockarg*/&Giant, &sc->parent_dmat) != 0) {
 		device_printf(dev, "unable to create dma tag\n");
 		goto bad;
 	}
 
 	if (emu_init(sc) == -1) {
 		device_printf(dev, "unable to initialize the card\n");
 		goto bad;
 	}
 
 	codec = AC97_CREATE(dev, sc, emu_ac97);
 	if (codec == NULL) goto bad;
 	gotmic = (ac97_getcaps(codec) & AC97_CAP_MICCHANNEL) ? 1 : 0;
 	if (mixer_init(dev, ac97_getmixerclass(), codec) == -1) goto bad;
 
 	emu_midiattach(sc);
 
 	i = 0;
 	sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &i,
 	    RF_ACTIVE | RF_SHAREABLE);
 	if (!sc->irq ||
 	    snd_setup_intr(dev, sc->irq, INTR_MPSAFE, emu_intr, sc, &sc->ih)) {
 		device_printf(dev, "unable to map interrupt\n");
 		goto bad;
 	}
 
 	snprintf(status, SND_STATUSLEN, "at io 0x%jx irq %jd %s",
 	    rman_get_start(sc->reg), rman_get_start(sc->irq),
 	    PCM_KLDSTRING(snd_emu10k1));
 
 	if (pcm_register(dev, sc, sc->nchans, gotmic ? 3 : 2)) goto bad;
 	for (i = 0; i < sc->nchans; i++)
 		pcm_addchan(dev, PCMDIR_PLAY, &emupchan_class, sc);
 	for (i = 0; i < (gotmic ? 3 : 2); i++)
 		pcm_addchan(dev, PCMDIR_REC, &emurchan_class, sc);
 
 	pcm_setstatus(dev, status);
 
 	return 0;
 
 bad:
 	if (codec) ac97_destroy(codec);
 	if (sc->reg) bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0), sc->reg);
 	if (sc->ih) bus_teardown_intr(dev, sc->irq, sc->ih);
 	if (sc->irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq);
 	if (sc->parent_dmat) bus_dma_tag_destroy(sc->parent_dmat);
 	if (sc->lock) snd_mtxfree(sc->lock);
 	free(sc, M_DEVBUF);
 	return ENXIO;
 }
 
 static int
 emu_pci_detach(device_t dev)
 {
 	int r;
 	struct sc_info *sc;
 
 	r = pcm_unregister(dev);
 	if (r)
 		return r;
 
 	sc = pcm_getdevinfo(dev);
 	/* shutdown chip */
 	emu_uninit(sc);
 
 	bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0), sc->reg);
 	bus_teardown_intr(dev, sc->irq, sc->ih);
 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq);
 	bus_dma_tag_destroy(sc->parent_dmat);
 	snd_mtxfree(sc->lock);
 	free(sc, M_DEVBUF);
 
 	return 0;
 }
 
 /* add suspend, resume */
 static device_method_t emu_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		emu_pci_probe),
 	DEVMETHOD(device_attach,	emu_pci_attach),
 	DEVMETHOD(device_detach,	emu_pci_detach),
 
 	DEVMETHOD_END
 };
 
 static driver_t emu_driver = {
 	"pcm",
 	emu_methods,
 	PCM_SOFTC_SIZE,
 };
 
 DRIVER_MODULE(snd_emu10k1, pci, emu_driver, pcm_devclass, NULL, NULL);
 MODULE_DEPEND(snd_emu10k1, sound, SOUND_MINVER, SOUND_PREFVER, SOUND_MAXVER);
 MODULE_VERSION(snd_emu10k1, 1);
 MODULE_DEPEND(snd_emu10k1, midi, 1, 1, 1);
 
 /* dummy driver to silence the joystick device */
 static int
 emujoy_pci_probe(device_t dev)
 {
 	char *s = NULL;
 
 	switch (pci_get_devid(dev)) {
 	case 0x70021102:
 		s = "Creative EMU10K1 Joystick";
 		device_quiet(dev);
 		break;
 	case 0x70031102:
 		s = "Creative EMU10K2 Joystick";
 		device_quiet(dev);
 		break;
 	}
 
 	if (s) device_set_desc(dev, s);
 	return s ? -1000 : ENXIO;
 }
 
 static int
 emujoy_pci_attach(device_t dev)
 {
 
 	return 0;
 }
 
 static int
 emujoy_pci_detach(device_t dev)
 {
 
 	return 0;
 }
 
 static device_method_t emujoy_methods[] = {
 	DEVMETHOD(device_probe,		emujoy_pci_probe),
 	DEVMETHOD(device_attach,	emujoy_pci_attach),
 	DEVMETHOD(device_detach,	emujoy_pci_detach),
 
 	DEVMETHOD_END
 };
 
 static driver_t emujoy_driver = {
 	"emujoy",
 	emujoy_methods,
 	1	/* no softc */
 };
 
 static devclass_t emujoy_devclass;
 
 DRIVER_MODULE(emujoy, pci, emujoy_driver, emujoy_devclass, NULL, NULL);
Index: projects/clang1000-import/sys/dev/sound/pci/emu10kx-pcm.c
===================================================================
--- projects/clang1000-import/sys/dev/sound/pci/emu10kx-pcm.c	(revision 357178)
+++ projects/clang1000-import/sys/dev/sound/pci/emu10kx-pcm.c	(revision 357179)
@@ -1,1540 +1,1541 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1999 Cameron Grant <gandalf@vilnya.demon.co.uk>
  * Copyright (c) 2003-2007 Yuriy Tsibizov <yuriy.tsibizov@gfk.ru>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHERIN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/bus.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <sys/systm.h>
 #include <sys/sbuf.h>
 #include <sys/queue.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #ifdef HAVE_KERNEL_OPTION_HEADERS
 #include "opt_snd.h"
 #endif
 
 #include <dev/sound/chip.h>
 #include <dev/sound/pcm/sound.h>
 #include <dev/sound/pcm/ac97.h>
 
 #include "mixer_if.h"
 
 #include <dev/sound/pci/emuxkireg.h>
 #include <dev/sound/pci/emu10kx.h>
 
 struct emu_pcm_pchinfo {
 	int		spd;
 	int		fmt;
 	unsigned int	blksz;
 	int		run;
 	struct emu_voice *master;
 	struct emu_voice *slave;
 	struct snd_dbuf	*buffer;
 	struct pcm_channel *channel;
 	struct emu_pcm_info *pcm;
 	int		timer;
 };
 
 struct emu_pcm_rchinfo {
 	int		spd;
 	int		fmt;
 	unsigned int	blksz;
 	int		run;
 	uint32_t 	idxreg;
 	uint32_t	basereg;
 	uint32_t	sizereg;
 	uint32_t	setupreg;
 	uint32_t	irqmask;
 	uint32_t	iprmask;
 	int 		ihandle;
 	struct snd_dbuf	*buffer;
 	struct pcm_channel *channel;
 	struct emu_pcm_info *pcm;
 	int		timer;
 };
 
 /* XXX Hardware playback channels */
 #define	MAX_CHANNELS	4
 
 #if MAX_CHANNELS > 13
 #error	Too many hardware channels defined. 13 is the maximum
 #endif
 
 struct emu_pcm_info {
 	struct mtx		*lock;
 	device_t		dev;		/* device information */
 	struct emu_sc_info 	*card;
 	struct emu_pcm_pchinfo	pch[MAX_CHANNELS];	/* hardware channels */
 	int			pnum;		/* next free channel number */
 	struct emu_pcm_rchinfo	rch_adc;
 	struct emu_pcm_rchinfo	rch_efx;
 	struct emu_route	rt;
 	struct emu_route	rt_mono;
 	int			route;
 	int			ihandle;	/* interrupt handler */
 	unsigned int		bufsz;
 	int			is_emu10k1;
 	struct ac97_info	*codec;
 	uint32_t 		ac97_state[0x7F];
 	kobj_class_t		ac97_mixerclass;
 	uint32_t		ac97_recdevs;
 	uint32_t		ac97_playdevs;
 	struct snd_mixer	*sm;
 	int			mch_disabled;
 	unsigned int		emu10k1_volcache[2][2];
 };
 
 
 static uint32_t emu_rfmt_adc[] = {
 	SND_FORMAT(AFMT_S16_LE, 1, 0),
 	SND_FORMAT(AFMT_S16_LE, 2, 0),
 	0
 };
 static struct pcmchan_caps emu_reccaps_adc = {
 	8000, 48000, emu_rfmt_adc, 0
 };
 
 static uint32_t emu_rfmt_efx[] = {
 	SND_FORMAT(AFMT_S16_LE, 1, 0),
 	0
 };
 
 static struct pcmchan_caps emu_reccaps_efx_live = {
 	48000*32, 48000*32, emu_rfmt_efx, 0
 };
 
 static struct pcmchan_caps emu_reccaps_efx_audigy = {
 	48000*64, 48000*64, emu_rfmt_efx, 0
 };
 
 static int emu_rates_live[] = {
 	48000*32
 };
 
 static int emu_rates_audigy[] = {
 	48000*64
 };
 
 static uint32_t emu_pfmt[] = {
 	SND_FORMAT(AFMT_U8, 1, 0),
 	SND_FORMAT(AFMT_U8, 2, 0),
 	SND_FORMAT(AFMT_S16_LE, 1, 0),
 	SND_FORMAT(AFMT_S16_LE, 2, 0),
 	0
 };
 static uint32_t emu_pfmt_mono[] = {
 	SND_FORMAT(AFMT_U8, 1, 0),
 	SND_FORMAT(AFMT_S16_LE, 1, 0),
 	0
 };
 
 static struct pcmchan_caps emu_playcaps = {4000, 48000, emu_pfmt, 0};
 static struct pcmchan_caps emu_playcaps_mono = {4000, 48000, emu_pfmt_mono, 0};
 
 static int emu10k1_adcspeed[8] = {48000, 44100, 32000, 24000, 22050, 16000, 11025, 8000};
 /* audigy supports 12kHz. */
 static int emu10k2_adcspeed[9] = {48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000};
 
 static uint32_t emu_pcm_intr(void *pcm, uint32_t stat);
 
 static const struct emu_dspmix_props_k1 {
 	uint8_t	present;
 	uint8_t	recdev;
 	int8_t	input;
 } dspmix_k1 [SOUND_MIXER_NRDEVICES] = {
 	/* no mixer device for ac97 */		/* in0 AC97 */
 	[SOUND_MIXER_DIGITAL1] = {1, 1, 1},	/* in1 CD SPDIF */
 	/* not connected */			/* in2 (zoom) */
 	[SOUND_MIXER_DIGITAL2] = {1, 1, 3},	/* in3 toslink */
 	[SOUND_MIXER_LINE2] =    {1, 1, 4},	/* in4 Line-In2 */
 	[SOUND_MIXER_DIGITAL3] = {1, 1, 5},	/* in5 on-card  SPDIF */
 	[SOUND_MIXER_LINE3] =    {1, 1, 6},	/* in6 AUX2 */
 	/* not connected */			/* in7 */
 };
 static const struct emu_dspmix_props_k2 {
 	uint8_t	present;
 	uint8_t	recdev;
 	int8_t	input;
 } dspmix_k2 [SOUND_MIXER_NRDEVICES] = {
 	[SOUND_MIXER_VOLUME] =	{1, 0, (-1)},
 	[SOUND_MIXER_PCM] =	{1, 0, (-1)},
 
 	/* no mixer device */			/* in0 AC97 */
 	[SOUND_MIXER_DIGITAL1] = {1, 1, 1},	/* in1 CD SPDIF */
 	[SOUND_MIXER_DIGITAL2] = {1, 1, 2},	/* in2 COAX SPDIF */
 	/* not connected */			/* in3 */
 	[SOUND_MIXER_LINE2] =    {1, 1, 4},	/* in4 Line-In2 */
 	[SOUND_MIXER_DIGITAL3] = {1, 1, 5},	/* in5 on-card  SPDIF */
 	[SOUND_MIXER_LINE3] =    {1, 1, 6},	/* in6 AUX2 */
 	/* not connected */			/* in7 */
 };
 
 static int
 emu_dspmixer_init(struct snd_mixer *m)
 {
 	struct emu_pcm_info	*sc;
 	int i;
 	int p, r;
 
 	p = 0;
 	r = 0;
 
 	sc = mix_getdevinfo(m);
 
 	if (sc->route == RT_FRONT) {
 		/* create submixer for AC97 codec */
 		if ((sc->ac97_mixerclass != NULL) && (sc->codec != NULL)) {
 			sc->sm = mixer_create(sc->dev, sc->ac97_mixerclass, sc->codec, "ac97");
 			if (sc->sm != NULL) {
 				p = mix_getdevs(sc->sm);
 				r = mix_getrecdevs(sc->sm);
 			}
 		}
 
 		sc->ac97_playdevs = p;
 		sc->ac97_recdevs = r;
 	}
 
 	/* This two are always here */
 	p |= (1 << SOUND_MIXER_PCM);
 	p |= (1 << SOUND_MIXER_VOLUME);
 
 	if (sc->route == RT_FRONT) {
 		if (sc->is_emu10k1) {
 			for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) {
 				if (dspmix_k1[i].present)
 					p |= (1 << i);
 				if (dspmix_k1[i].recdev)
 					r |= (1 << i);
 			}
 		} else {
 			for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) {
 				if (dspmix_k2[i].present)
 					p |= (1 << i);
 				if (dspmix_k2[i].recdev)
 					r |= (1 << i);
 			}
 		}
 	}
 
 	mix_setdevs(m, p);
 	mix_setrecdevs(m, r);
 
 	return (0);
 }
 
 static int
 emu_dspmixer_uninit(struct snd_mixer *m)
 {
 	struct emu_pcm_info	*sc;
 	int err = 0;
 
 	/* drop submixer for AC97 codec */
 	sc = mix_getdevinfo(m);
-	if (sc->sm != NULL)
+	if (sc->sm != NULL) {
 		err = mixer_delete(sc->sm);
 		if (err)
 			return (err);
 		sc->sm = NULL;
+	}
 	return (0);
 }
 
 static int
 emu_dspmixer_set(struct snd_mixer *m, unsigned dev, unsigned left, unsigned right)
 {
 	struct emu_pcm_info *sc;
 
 	sc = mix_getdevinfo(m);
 
 	switch (dev) {
 	case SOUND_MIXER_VOLUME:
 		switch (sc->route) {
 		case RT_FRONT:
 			if (sc->sm != NULL)
 				mix_set(sc->sm, dev, left, right);
 			if (sc->mch_disabled) {
 				/* In emu10k1 case PCM volume does not affect
 				   sound routed to rear & center/sub (it is connected
 				   to AC97 codec). Calculate it manually. */
 				/* This really should belong to emu10kx.c */
 				if (sc->is_emu10k1) {
 					sc->emu10k1_volcache[0][0] = left;
 					left = left * sc->emu10k1_volcache[1][0] / 100;
 					sc->emu10k1_volcache[0][1] = right;
 					right = right * sc->emu10k1_volcache[1][1] / 100;
 				}
 
 				emumix_set_volume(sc->card, M_MASTER_REAR_L, left);
 				emumix_set_volume(sc->card, M_MASTER_REAR_R, right);
 				if (!sc->is_emu10k1) {
 					emumix_set_volume(sc->card, M_MASTER_CENTER, (left+right)/2);
 					emumix_set_volume(sc->card, M_MASTER_SUBWOOFER, (left+right)/2);
 					/* XXX side */
 				}
 			} /* mch disabled */
 			break;
 		case RT_REAR:
 			emumix_set_volume(sc->card, M_MASTER_REAR_L, left);
 			emumix_set_volume(sc->card, M_MASTER_REAR_R, right);
 			break;
 		case RT_CENTER:
 			emumix_set_volume(sc->card, M_MASTER_CENTER, (left+right)/2);
 			break;
 		case RT_SUB:
 			emumix_set_volume(sc->card, M_MASTER_SUBWOOFER, (left+right)/2);
 			break;
 		}
 		break;
 	case SOUND_MIXER_PCM:
 		switch (sc->route) {
 		case RT_FRONT:
 			if (sc->sm != NULL)
 				mix_set(sc->sm, dev, left, right);
 			if (sc->mch_disabled) {
 				/* See SOUND_MIXER_VOLUME case */
 				if (sc->is_emu10k1) {
 					sc->emu10k1_volcache[1][0] = left;
 					left = left * sc->emu10k1_volcache[0][0] / 100;
 					sc->emu10k1_volcache[1][1] = right;
 					right = right * sc->emu10k1_volcache[0][1] / 100;
 				}
 				emumix_set_volume(sc->card, M_MASTER_REAR_L, left);
 				emumix_set_volume(sc->card, M_MASTER_REAR_R, right);
 
 				if (!sc->is_emu10k1) {
 					emumix_set_volume(sc->card, M_MASTER_CENTER, (left+right)/2);
 					emumix_set_volume(sc->card, M_MASTER_SUBWOOFER, (left+right)/2);
 					/* XXX side */
 				}
 			} /* mch_disabled */
 			break;
 		case RT_REAR:
 			emumix_set_volume(sc->card, M_FX2_REAR_L, left);
 			emumix_set_volume(sc->card, M_FX3_REAR_R, right);
 			break;
 		case RT_CENTER:
 			emumix_set_volume(sc->card, M_FX4_CENTER, (left+right)/2);
 			break;
 		case RT_SUB:
 			emumix_set_volume(sc->card, M_FX5_SUBWOOFER, (left+right)/2);
 			break;
 		}
 		break;
 	case SOUND_MIXER_DIGITAL1:	/* CD SPDIF, in1 */
 			emumix_set_volume(sc->card, M_IN1_FRONT_L, left);
 			emumix_set_volume(sc->card, M_IN1_FRONT_R, right);
 		break;
 	case SOUND_MIXER_DIGITAL2:
 			if (sc->is_emu10k1) {
 				/* TOSLink, in3 */
 				emumix_set_volume(sc->card, M_IN3_FRONT_L, left);
 				emumix_set_volume(sc->card, M_IN3_FRONT_R, right);
 			} else {
 				/* COAX SPDIF, in2 */
 				emumix_set_volume(sc->card, M_IN2_FRONT_L, left);
 				emumix_set_volume(sc->card, M_IN2_FRONT_R, right);
 			}
 		break;
 	case SOUND_MIXER_LINE2:		/* Line-In2, in4 */
 			emumix_set_volume(sc->card, M_IN4_FRONT_L, left);
 			emumix_set_volume(sc->card, M_IN4_FRONT_R, right);
 		break;
 	case SOUND_MIXER_DIGITAL3:	/* on-card SPDIF, in5 */
 			emumix_set_volume(sc->card, M_IN5_FRONT_L, left);
 			emumix_set_volume(sc->card, M_IN5_FRONT_R, right);
 		break;
 	case SOUND_MIXER_LINE3:		/* AUX2, in6 */
 			emumix_set_volume(sc->card, M_IN6_FRONT_L, left);
 			emumix_set_volume(sc->card, M_IN6_FRONT_R, right);
 		break;
 	default:
 		if (sc->sm != NULL) {
 			/* XXX emumix_set_volume is not required here */
 			emumix_set_volume(sc->card, M_IN0_FRONT_L, 100);
 			emumix_set_volume(sc->card, M_IN0_FRONT_R, 100);
 			mix_set(sc->sm, dev, left, right);
 		} else
 			device_printf(sc->dev, "mixer error: unknown device %d\n", dev);
 	}
 	return  (0);
 }
 
 static u_int32_t
 emu_dspmixer_setrecsrc(struct snd_mixer *m, u_int32_t src)
 {
 	struct emu_pcm_info *sc;
 	int i;
 	u_int32_t recmask;
 	int	input[8];
 
 	sc = mix_getdevinfo(m);
 	recmask = 0;
 	for (i=0; i < 8; i++)
 		input[i]=0;
 	
 	if (sc->sm != NULL)
 		if ((src & sc->ac97_recdevs) !=0)
 			if (mix_setrecsrc(sc->sm, src & sc->ac97_recdevs) == 0) {
 				recmask |= (src & sc->ac97_recdevs);
 				/* Recording from AC97 codec.
 				   Enable AC97 route to rec on DSP */
 				input[0] = 1;
 			}
 	if (sc->is_emu10k1) {
 		for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) {
 			if (dspmix_k1[i].recdev)
 				if ((src & (1 << i)) == ((uint32_t)1 << i)) {
 				recmask |= (1 << i);
 				/* enable device i */
 				input[dspmix_k1[i].input] = 1;
 				}
 		}
 	} else {
 		for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) {
 			if (dspmix_k2[i].recdev)
 				if ((src & (1 << i)) == ((uint32_t)1 << i)) {
 				recmask |= (1 << i);
 				/* enable device i */
 				input[dspmix_k2[i].input] = 1;
 				}
 		}
 	}
 	emumix_set_volume(sc->card, M_IN0_REC_L, input[0] == 1 ? 100 : 0);
 	emumix_set_volume(sc->card, M_IN0_REC_R, input[0] == 1 ? 100 : 0);
 
 	emumix_set_volume(sc->card, M_IN1_REC_L, input[1] == 1 ? 100 : 0);
 	emumix_set_volume(sc->card, M_IN1_REC_R, input[1] == 1 ? 100 : 0);
 
 	if (!sc->is_emu10k1) {
 		emumix_set_volume(sc->card, M_IN2_REC_L, input[2] == 1 ? 100 : 0);
 		emumix_set_volume(sc->card, M_IN2_REC_R, input[2] == 1 ? 100 : 0);
 	}
 
 	if (sc->is_emu10k1) {
 		emumix_set_volume(sc->card, M_IN3_REC_L, input[3] == 1 ? 100 : 0);
 		emumix_set_volume(sc->card, M_IN3_REC_R, input[3] == 1 ? 100 : 0);
 	}
 
 	emumix_set_volume(sc->card, M_IN4_REC_L, input[4] == 1 ? 100 : 0);
 	emumix_set_volume(sc->card, M_IN4_REC_R, input[4] == 1 ? 100 : 0);
 
 	emumix_set_volume(sc->card, M_IN5_REC_L, input[5] == 1 ? 100 : 0);
 	emumix_set_volume(sc->card, M_IN5_REC_R, input[5] == 1 ? 100 : 0);
 
 	emumix_set_volume(sc->card, M_IN6_REC_L, input[6] == 1 ? 100 : 0);
 	emumix_set_volume(sc->card, M_IN6_REC_R, input[6] == 1 ? 100 : 0);
 	
 	/* XXX check for K1/k2 differences? */
 	if ((src & (1 << SOUND_MIXER_PCM)) == (1 << SOUND_MIXER_PCM)) {
 		emumix_set_volume(sc->card, M_FX0_REC_L, emumix_get_volume(sc->card, M_FX0_FRONT_L));
 		emumix_set_volume(sc->card, M_FX1_REC_R, emumix_get_volume(sc->card, M_FX1_FRONT_R));
 	} else {
 		emumix_set_volume(sc->card, M_FX0_REC_L, 0);
 		emumix_set_volume(sc->card, M_FX1_REC_R, 0);
 	}
 
 	return (recmask);
 }
 
 static kobj_method_t emudspmixer_methods[] = {
 	KOBJMETHOD(mixer_init,		emu_dspmixer_init),
 	KOBJMETHOD(mixer_uninit,	emu_dspmixer_uninit),
 	KOBJMETHOD(mixer_set,		emu_dspmixer_set),
 	KOBJMETHOD(mixer_setrecsrc,	emu_dspmixer_setrecsrc),
 	KOBJMETHOD_END
 };
 MIXER_DECLARE(emudspmixer);
 
 static int
 emu_efxmixer_init(struct snd_mixer *m)
 {
 	mix_setdevs(m, SOUND_MASK_VOLUME);
 	mix_setrecdevs(m, SOUND_MASK_MONITOR);
 	return (0);
 }
 
 static int
 emu_efxmixer_set(struct snd_mixer *m, unsigned dev, unsigned left, unsigned right)
 {
 	if (left + right == 200) return (0);
 	return  (0);
 }
 
 static u_int32_t
 emu_efxmixer_setrecsrc(struct snd_mixer *m __unused, u_int32_t src __unused)
 {
 	return (SOUND_MASK_MONITOR);
 }
 
 static kobj_method_t emuefxmixer_methods[] = {
 	KOBJMETHOD(mixer_init,		emu_efxmixer_init),
 	KOBJMETHOD(mixer_set,		emu_efxmixer_set),
 	KOBJMETHOD(mixer_setrecsrc,	emu_efxmixer_setrecsrc),
 	KOBJMETHOD_END
 };
 MIXER_DECLARE(emuefxmixer);
 
 /*
  * AC97 emulation code for Audigy and later cards.
  * Some parts of AC97 codec are not used by hardware, but can be used
  * to change some DSP controls via AC97 mixer interface. This includes:
  * - master volume controls MASTER_FRONT_[R|L]
  * - pcm volume controls FX[0|1]_FRONT_[R|L]
  * - rec volume controls MASTER_REC_[R|L]
  * We do it because we need to put it under user control....
  * We also keep some parts of AC97 disabled to get better sound quality
  */
 
 #define	AC97LEFT(x)	((x & 0x7F00)>>8)
 #define	AC97RIGHT(x)	(x & 0x007F)
 #define	AC97MUTE(x)	((x & 0x8000)>>15)
 #define	BIT4_TO100(x)	(100-(x)*100/(0x0f))
 #define	BIT6_TO100(x)	(100-(x)*100/(0x3f))
 #define	BIT4_TO255(x)	(255-(x)*255/(0x0f))
 #define	BIT6_TO255(x)	(255-(x)*255/(0x3f))
 #define	V100_TOBIT6(x)	(0x3f*(100-x)/100)
 #define	V100_TOBIT4(x)	(0x0f*(100-x)/100)
 #define	AC97ENCODE(x_muted, x_left, x_right)	(((x_muted & 1)<<15) | ((x_left & 0x3f)<<8) | (x_right & 0x3f))
 
 static int
 emu_ac97_read_emulation(struct emu_pcm_info *sc, int regno)
 {
 	int use_ac97;
 	int emulated;
 	int tmp;
 
 	use_ac97 = 1;
 	emulated = 0;
 
 	switch (regno) {
 	case AC97_MIX_MASTER:
 		emulated = sc->ac97_state[AC97_MIX_MASTER];
 		use_ac97 = 0;
 		break;
 	case AC97_MIX_PCM:
 		emulated = sc->ac97_state[AC97_MIX_PCM];
 		use_ac97 = 0;
 		break;
 	case AC97_REG_RECSEL:
 		emulated = 0x0505;
 		use_ac97 = 0;
 		break;
 	case AC97_MIX_RGAIN:
 		emulated = sc->ac97_state[AC97_MIX_RGAIN];
 		use_ac97 = 0;
 		break;
 	}
 
 	emu_wr(sc->card, EMU_AC97ADDR, regno, 1);
 	tmp = emu_rd(sc->card, EMU_AC97DATA, 2);
 
 	if (use_ac97)
 		emulated = tmp;
 
 	return (emulated);
 }
 
 static void
 emu_ac97_write_emulation(struct emu_pcm_info *sc, int regno, uint32_t data)
 {
 	int write_ac97;
 	int left, right;
 	uint32_t emu_left, emu_right;
 	int is_mute;
 
 	write_ac97 = 1;
 
 	left = AC97LEFT(data);
 	emu_left = BIT6_TO100(left);	/* We show us as 6-bit AC97 mixer */
 	right = AC97RIGHT(data);
 	emu_right = BIT6_TO100(right);
 	is_mute = AC97MUTE(data);
 	if (is_mute)
 		emu_left = emu_right = 0;
 
 	switch (regno) {
 		/* TODO: reset emulator on AC97_RESET */
 	case AC97_MIX_MASTER:
 		emumix_set_volume(sc->card, M_MASTER_FRONT_L, emu_left);
 		emumix_set_volume(sc->card, M_MASTER_FRONT_R, emu_right);
 		sc->ac97_state[AC97_MIX_MASTER] = data & (0x8000 | 0x3f3f);
 		data = 0x8000;	/* Mute AC97 main out */
 		break;
 	case AC97_MIX_PCM:	/* PCM OUT VOL */
 		emumix_set_volume(sc->card, M_FX0_FRONT_L, emu_left);
 		emumix_set_volume(sc->card, M_FX1_FRONT_R, emu_right);
 		sc->ac97_state[AC97_MIX_PCM] = data & (0x8000 | 0x3f3f);
 		data = 0x8000;	/* Mute AC97 PCM out */
 		break;
 	case AC97_REG_RECSEL:
 		/*
 		 * PCM recording source is set to "stereo mix" (labeled "vol"
 		 * in mixer). There is no 'playback' from AC97 codec -
 		 * if you want to hear anything from AC97 you have to _record_
 		 * it. Keep things simple and record "stereo mix".
 		 */
 		data = 0x0505;
 		break;
 	case AC97_MIX_RGAIN:	/* RECORD GAIN */
 		emu_left = BIT4_TO100(left);	/* rgain is 4-bit */
 		emu_right = BIT4_TO100(right);
 		emumix_set_volume(sc->card, M_MASTER_REC_L, 100-emu_left);
 		emumix_set_volume(sc->card, M_MASTER_REC_R, 100-emu_right);
 		/*
 		 * Record gain on AC97 should stay zero to get AC97 sound on
 		 * AC97_[RL] connectors on EMU10K2 chip. AC97 on Audigy is not
 		 * directly connected to any output, only to EMU10K2 chip Use
 		 * this control to set AC97 mix volume inside EMU10K2 chip
 		 */
 		sc->ac97_state[AC97_MIX_RGAIN] = data & (0x8000 | 0x0f0f);
 		data = 0x0000;
 		break;
 	}
 	if (write_ac97) {
 		emu_wr(sc->card, EMU_AC97ADDR, regno, 1);
 		emu_wr(sc->card, EMU_AC97DATA, data, 2);
 	}
 }
 
 static int
 emu_erdcd(kobj_t obj __unused, void *devinfo, int regno)
 {
 	struct emu_pcm_info *sc = (struct emu_pcm_info *)devinfo;
 
 	return (emu_ac97_read_emulation(sc, regno));
 }
 
 static int
 emu_ewrcd(kobj_t obj __unused, void *devinfo, int regno, uint32_t data)
 {
 	struct emu_pcm_info *sc = (struct emu_pcm_info *)devinfo;
 
 	emu_ac97_write_emulation(sc, regno, data);
 	return (0);
 }
 
 static kobj_method_t emu_eac97_methods[] = {
 	KOBJMETHOD(ac97_read, emu_erdcd),
 	KOBJMETHOD(ac97_write, emu_ewrcd),
 	KOBJMETHOD_END
 };
 AC97_DECLARE(emu_eac97);
 
 /* real ac97 codec */
 static int
 emu_rdcd(kobj_t obj __unused, void *devinfo, int regno)
 {
 	int rd;
 	struct emu_pcm_info *sc = (struct emu_pcm_info *)devinfo;
 
 	KASSERT(sc->card != NULL, ("emu_rdcd: no soundcard"));
 	emu_wr(sc->card, EMU_AC97ADDR, regno, 1);
 	rd = emu_rd(sc->card, EMU_AC97DATA, 2);
 	return (rd);
 }
 
 static int
 emu_wrcd(kobj_t obj __unused, void *devinfo, int regno, uint32_t data)
 {
 	struct emu_pcm_info *sc = (struct emu_pcm_info *)devinfo;
 
 	KASSERT(sc->card != NULL, ("emu_wrcd: no soundcard"));
 	emu_wr(sc->card, EMU_AC97ADDR, regno, 1);
 	emu_wr(sc->card, EMU_AC97DATA, data, 2);
 	return (0);
 }
 
 static kobj_method_t emu_ac97_methods[] = {
 	KOBJMETHOD(ac97_read, emu_rdcd),
 	KOBJMETHOD(ac97_write, emu_wrcd),
 	KOBJMETHOD_END
 };
 AC97_DECLARE(emu_ac97);
 
 
 static int
 emu_k1_recval(int speed)
 {
 	int val;
 
 	val = 0;
 	while ((val < 7) && (speed < emu10k1_adcspeed[val]))
 		val++;
 	return (val);
 }
 
 static int
 emu_k2_recval(int speed)
 {
 	int val;
 
 	val = 0;
 	while ((val < 8) && (speed < emu10k2_adcspeed[val]))
 		val++;
 	return (val);
 }
 
 static void *
 emupchan_init(kobj_t obj __unused, void *devinfo, struct snd_dbuf *b, struct pcm_channel *c, int dir __unused)
 {
 	struct emu_pcm_info *sc = devinfo;
 	struct emu_pcm_pchinfo *ch;
 	void *r;
 
 	KASSERT(dir == PCMDIR_PLAY, ("emupchan_init: bad direction"));
 	KASSERT(sc->card != NULL, ("empchan_init: no soundcard"));
 
 
 	if (sc->pnum >= MAX_CHANNELS)
 		return (NULL);
 	ch = &(sc->pch[sc->pnum++]);
 	ch->buffer = b;
 	ch->pcm = sc;
 	ch->channel = c;
 	ch->blksz = sc->bufsz;
 	ch->fmt = SND_FORMAT(AFMT_U8, 1, 0);
 	ch->spd = 8000;
 	ch->master = emu_valloc(sc->card);
 	/*
 	 * XXX we have to allocate slave even for mono channel until we
 	 * fix emu_vfree to handle this case.
 	 */
 	ch->slave = emu_valloc(sc->card);
 	ch->timer = emu_timer_create(sc->card);
 	r = (emu_vinit(sc->card, ch->master, ch->slave, EMU_PLAY_BUFSZ, ch->buffer)) ? NULL : ch;
 	return (r);
 }
 
 static int
 emupchan_free(kobj_t obj __unused, void *c_devinfo)
 {
 	struct emu_pcm_pchinfo *ch = c_devinfo;
 	struct emu_pcm_info *sc = ch->pcm;
 
 	emu_timer_clear(sc->card, ch->timer);
 	if (ch->slave != NULL)
 		emu_vfree(sc->card, ch->slave);
 	emu_vfree(sc->card, ch->master);
 	return (0);
 }
 
 static int
 emupchan_setformat(kobj_t obj __unused, void *c_devinfo, uint32_t format)
 {
 	struct emu_pcm_pchinfo *ch = c_devinfo;
 
 	ch->fmt = format;
 	return (0);
 }
 
 static uint32_t
 emupchan_setspeed(kobj_t obj __unused, void *c_devinfo, uint32_t speed)
 {
 	struct emu_pcm_pchinfo *ch = c_devinfo;
 
 	ch->spd = speed;
 	return (ch->spd);
 }
 
 static uint32_t
 emupchan_setblocksize(kobj_t obj __unused, void *c_devinfo, uint32_t blocksize)
 {
 	struct emu_pcm_pchinfo *ch = c_devinfo;
 	struct emu_pcm_info *sc = ch->pcm;
 
 	if (blocksize > ch->pcm->bufsz)
 		blocksize = ch->pcm->bufsz;
 	snd_mtxlock(sc->lock);
 	ch->blksz = blocksize;
 	emu_timer_set(sc->card, ch->timer, ch->blksz / sndbuf_getalign(ch->buffer));
 	snd_mtxunlock(sc->lock);
 	return (ch->blksz);
 }
 
 static int
 emupchan_trigger(kobj_t obj __unused, void *c_devinfo, int go)
 {
 	struct emu_pcm_pchinfo *ch = c_devinfo;
 	struct emu_pcm_info *sc = ch->pcm;
 
 	if (!PCMTRIG_COMMON(go))
 		return (0);
 
 	snd_mtxlock(sc->lock); /* XXX can we trigger on parallel threads ? */
 	if (go == PCMTRIG_START) {
 		emu_vsetup(ch->master, ch->fmt, ch->spd);
 		if (AFMT_CHANNEL(ch->fmt) > 1)
 			emu_vroute(sc->card, &(sc->rt), ch->master);
 		else
 			emu_vroute(sc->card, &(sc->rt_mono), ch->master);
 		emu_vwrite(sc->card, ch->master);
 		emu_timer_set(sc->card, ch->timer, ch->blksz / sndbuf_getalign(ch->buffer));
 		emu_timer_enable(sc->card, ch->timer, 1);
 	}
 	/* PCM interrupt handler will handle PCMTRIG_STOP event */
 	ch->run = (go == PCMTRIG_START) ? 1 : 0;
 	emu_vtrigger(sc->card, ch->master, ch->run);
 	snd_mtxunlock(sc->lock);
 	return (0);
 }
 
 static uint32_t
 emupchan_getptr(kobj_t obj __unused, void *c_devinfo)
 {
 	struct emu_pcm_pchinfo *ch = c_devinfo;
 	struct emu_pcm_info *sc = ch->pcm;
 	int r;
 
 	r = emu_vpos(sc->card, ch->master);
 
 	return (r);
 }
 
 static struct pcmchan_caps *
 emupchan_getcaps(kobj_t obj __unused, void *c_devinfo __unused)
 {
 	struct emu_pcm_pchinfo *ch = c_devinfo;
 	struct emu_pcm_info *sc = ch->pcm;
 
 	switch (sc->route) {
 	case RT_FRONT:
 		/* FALLTHROUGH */
 	case RT_REAR:
 		/* FALLTHROUGH */
 	case RT_SIDE:
 		return (&emu_playcaps);
 		break;
 	case RT_CENTER:
 		/* FALLTHROUGH */
 	case RT_SUB:
 		return (&emu_playcaps_mono);
 		break;
 	}
 	return (NULL);
 }
 
 static kobj_method_t emupchan_methods[] = {
 	KOBJMETHOD(channel_init, emupchan_init),
 	KOBJMETHOD(channel_free, emupchan_free),
 	KOBJMETHOD(channel_setformat, emupchan_setformat),
 	KOBJMETHOD(channel_setspeed, emupchan_setspeed),
 	KOBJMETHOD(channel_setblocksize, emupchan_setblocksize),
 	KOBJMETHOD(channel_trigger, emupchan_trigger),
 	KOBJMETHOD(channel_getptr, emupchan_getptr),
 	KOBJMETHOD(channel_getcaps, emupchan_getcaps),
 	KOBJMETHOD_END
 };
 CHANNEL_DECLARE(emupchan);
 
 static void *
 emurchan_init(kobj_t obj __unused, void *devinfo, struct snd_dbuf *b, struct pcm_channel *c, int dir __unused)
 {
 	struct emu_pcm_info *sc = devinfo;
 	struct emu_pcm_rchinfo *ch;
 
 	KASSERT(dir == PCMDIR_REC, ("emurchan_init: bad direction"));
 	ch = &sc->rch_adc;
 	ch->buffer = b;
 	ch->pcm = sc;
 	ch->channel = c;
 	ch->blksz = sc->bufsz / 2; /* We rise interrupt for half-full buffer */
 	ch->fmt = SND_FORMAT(AFMT_U8, 1, 0);
 	ch->spd = 8000;
 	ch->idxreg = sc->is_emu10k1 ? EMU_ADCIDX : EMU_A_ADCIDX;
 	ch->basereg = EMU_ADCBA;
 	ch->sizereg = EMU_ADCBS;
 	ch->setupreg = EMU_ADCCR;
 	ch->irqmask = EMU_INTE_ADCBUFENABLE;
 	ch->iprmask = EMU_IPR_ADCBUFFULL | EMU_IPR_ADCBUFHALFFULL;
 
 	if (sndbuf_alloc(ch->buffer, emu_gettag(sc->card), 0, sc->bufsz) != 0)
 		return (NULL);
 	else {
 		ch->timer = emu_timer_create(sc->card);
 		emu_wrptr(sc->card, 0, ch->basereg, sndbuf_getbufaddr(ch->buffer));
 		emu_wrptr(sc->card, 0, ch->sizereg, 0);	/* off */
 		return (ch);
 	}
 }
 
 static int
 emurchan_free(kobj_t obj __unused, void *c_devinfo)
 {
 	struct emu_pcm_rchinfo *ch = c_devinfo;
 	struct emu_pcm_info *sc = ch->pcm;
 
 	emu_timer_clear(sc->card, ch->timer);
 	return (0);
 }
 
 static int
 emurchan_setformat(kobj_t obj __unused, void *c_devinfo, uint32_t format)
 {
 	struct emu_pcm_rchinfo *ch = c_devinfo;
 
 	ch->fmt = format;
 	return (0);
 }
 
 static uint32_t
 emurchan_setspeed(kobj_t obj __unused, void *c_devinfo, uint32_t speed)
 {
 	struct emu_pcm_rchinfo *ch = c_devinfo;
 
 	if (ch->pcm->is_emu10k1) {
 		speed = emu10k1_adcspeed[emu_k1_recval(speed)];
 	} else {
 		speed = emu10k2_adcspeed[emu_k2_recval(speed)];
 	}
 	ch->spd = speed;
 	return (ch->spd);
 }
 
 static uint32_t
 emurchan_setblocksize(kobj_t obj __unused, void *c_devinfo, uint32_t blocksize)
 {
 	struct emu_pcm_rchinfo *ch = c_devinfo;
 	struct emu_pcm_info *sc = ch->pcm;
 
 	ch->blksz = blocksize;
 	/*
 	 * If blocksize is less than half of buffer size we will not get
 	 * BUFHALFFULL interrupt in time and channel will need to generate
 	 * (and use) timer interrupts. Otherwise channel will be marked dead.
 	 */
 	if (ch->blksz < (ch->pcm->bufsz / 2)) {
 		emu_timer_set(sc->card, ch->timer, ch->blksz / sndbuf_getalign(ch->buffer));
 		emu_timer_enable(sc->card, ch->timer, 1);
 	} else {
 		emu_timer_enable(sc->card, ch->timer, 0);
 	}
 	return (ch->blksz);
 }
 
 static int
 emurchan_trigger(kobj_t obj __unused, void *c_devinfo, int go)
 {
 	struct emu_pcm_rchinfo *ch = c_devinfo;
 	struct emu_pcm_info *sc = ch->pcm;
 	uint32_t val, sz;
 
 	if (!PCMTRIG_COMMON(go))
 		return (0);
 
 	switch (sc->bufsz) {
 	case 4096:
 		sz = EMU_RECBS_BUFSIZE_4096;
 		break;
 	case 8192:
 		sz = EMU_RECBS_BUFSIZE_8192;
 		break;
 	case 16384:
 		sz = EMU_RECBS_BUFSIZE_16384;
 		break;
 	case 32768:
 		sz = EMU_RECBS_BUFSIZE_32768;
 		break;
 	case 65536:
 		sz = EMU_RECBS_BUFSIZE_65536;
 		break;
 	default:
 		sz = EMU_RECBS_BUFSIZE_4096;
 	}
 
 	snd_mtxlock(sc->lock);
 	switch (go) {
 	case PCMTRIG_START:
 		ch->run = 1;
 		emu_wrptr(sc->card, 0, ch->sizereg, sz);
 		val = sc->is_emu10k1 ? EMU_ADCCR_LCHANENABLE : EMU_A_ADCCR_LCHANENABLE;
 		if (AFMT_CHANNEL(ch->fmt) > 1)
 			val |= sc->is_emu10k1 ? EMU_ADCCR_RCHANENABLE : EMU_A_ADCCR_RCHANENABLE;
 		val |= sc->is_emu10k1 ? emu_k1_recval(ch->spd) : emu_k2_recval(ch->spd);
 		emu_wrptr(sc->card, 0, ch->setupreg, 0);
 		emu_wrptr(sc->card, 0, ch->setupreg, val);
 		ch->ihandle = emu_intr_register(sc->card, ch->irqmask, ch->iprmask, &emu_pcm_intr, sc);
 		break;
 	case PCMTRIG_STOP:
 		/* FALLTHROUGH */
 	case PCMTRIG_ABORT:
 		ch->run = 0;
 		emu_wrptr(sc->card, 0, ch->sizereg, 0);
 		if (ch->setupreg)
 			emu_wrptr(sc->card, 0, ch->setupreg, 0);
 		(void)emu_intr_unregister(sc->card, ch->ihandle);
 		break;
 	case PCMTRIG_EMLDMAWR:
 		/* FALLTHROUGH */
 	case PCMTRIG_EMLDMARD:
 		/* FALLTHROUGH */
 	default:
 		break;
 	}
 	snd_mtxunlock(sc->lock);
 
 	return (0);
 }
 
 static uint32_t
 emurchan_getptr(kobj_t obj __unused, void *c_devinfo)
 {
 	struct emu_pcm_rchinfo *ch = c_devinfo;
 	struct emu_pcm_info *sc = ch->pcm;
 	int r;
 
 	r = emu_rdptr(sc->card, 0, ch->idxreg) & 0x0000ffff;
 
 	return (r);
 }
 
 static struct pcmchan_caps *
 emurchan_getcaps(kobj_t obj __unused, void *c_devinfo __unused)
 {
 	return (&emu_reccaps_adc);
 }
 
 static kobj_method_t emurchan_methods[] = {
 	KOBJMETHOD(channel_init, emurchan_init),
 	KOBJMETHOD(channel_free, emurchan_free),
 	KOBJMETHOD(channel_setformat, emurchan_setformat),
 	KOBJMETHOD(channel_setspeed, emurchan_setspeed),
 	KOBJMETHOD(channel_setblocksize, emurchan_setblocksize),
 	KOBJMETHOD(channel_trigger, emurchan_trigger),
 	KOBJMETHOD(channel_getptr, emurchan_getptr),
 	KOBJMETHOD(channel_getcaps, emurchan_getcaps),
 	KOBJMETHOD_END
 };
 CHANNEL_DECLARE(emurchan);
 
 static void *
 emufxrchan_init(kobj_t obj __unused, void *devinfo, struct snd_dbuf *b, struct pcm_channel *c, int dir __unused)
 {
 	struct emu_pcm_info *sc = devinfo;
 	struct emu_pcm_rchinfo *ch;
 
 	KASSERT(dir == PCMDIR_REC, ("emurchan_init: bad direction"));
 
 	if (sc == NULL) return (NULL);
 
 	ch = &(sc->rch_efx);
 	ch->fmt = SND_FORMAT(AFMT_S16_LE, 1, 0);
 	ch->spd = sc->is_emu10k1 ? 48000*32 : 48000 * 64;
 	ch->idxreg = EMU_FXIDX;
 	ch->basereg = EMU_FXBA;
 	ch->sizereg = EMU_FXBS;
 	ch->irqmask = EMU_INTE_EFXBUFENABLE;
 	ch->iprmask = EMU_IPR_EFXBUFFULL | EMU_IPR_EFXBUFHALFFULL;
 	ch->buffer = b;
 	ch->pcm = sc;
 	ch->channel = c;
 	ch->blksz = sc->bufsz / 2;
 
 	if (sndbuf_alloc(ch->buffer, emu_gettag(sc->card), 0, sc->bufsz) != 0)
 		return (NULL);
 	else {
 		emu_wrptr(sc->card, 0, ch->basereg, sndbuf_getbufaddr(ch->buffer));
 		emu_wrptr(sc->card, 0, ch->sizereg, 0);	/* off */
 		return (ch);
 	}
 }
 
 static int
 emufxrchan_setformat(kobj_t obj __unused, void *c_devinfo __unused, uint32_t format)
 {
 	if (format == SND_FORMAT(AFMT_S16_LE, 1, 0)) return (0);
 	return (EINVAL);
 }
 
 static uint32_t
 emufxrchan_setspeed(kobj_t obj __unused, void *c_devinfo, uint32_t speed)
 {
 	struct emu_pcm_rchinfo *ch = c_devinfo;
 
 	/* FIXED RATE CHANNEL */
 	return (ch->spd);
 }
 
 static uint32_t
 emufxrchan_setblocksize(kobj_t obj __unused, void *c_devinfo, uint32_t blocksize)
 {
 	struct emu_pcm_rchinfo *ch = c_devinfo;
 
 	ch->blksz = blocksize;
 	/*
 	 * XXX If blocksize is less than half of buffer size we will not get
 	 * interrupt in time and channel will die due to interrupt timeout.
 	 * This	should not happen with FX rchan, because it will fill buffer
 	 * very	fast (64K buffer is 0.021seconds on Audigy).
 	 */
 	if (ch->blksz < (ch->pcm->bufsz / 2))
 		ch->blksz = ch->pcm->bufsz / 2;
 	return (ch->blksz);
 }
 
 static int
 emufxrchan_trigger(kobj_t obj __unused, void *c_devinfo, int go)
 {
 	struct emu_pcm_rchinfo *ch = c_devinfo;
 	struct emu_pcm_info *sc = ch->pcm;
 	uint32_t sz;
 
 	if (!PCMTRIG_COMMON(go))
 		return (0);
 
 	switch (sc->bufsz) {
 	case 4096:
 		sz = EMU_RECBS_BUFSIZE_4096;
 		break;
 	case 8192:
 		sz = EMU_RECBS_BUFSIZE_8192;
 		break;
 	case 16384:
 		sz = EMU_RECBS_BUFSIZE_16384;
 		break;
 	case 32768:
 		sz = EMU_RECBS_BUFSIZE_32768;
 		break;
 	case 65536:
 		sz = EMU_RECBS_BUFSIZE_65536;
 		break;
 	default:
 		sz = EMU_RECBS_BUFSIZE_4096;
 	}
 
 	snd_mtxlock(sc->lock);
 	switch (go) {
 	case PCMTRIG_START:
 		ch->run = 1;
 		emu_wrptr(sc->card, 0, ch->sizereg, sz);
 		ch->ihandle = emu_intr_register(sc->card, ch->irqmask, ch->iprmask, &emu_pcm_intr, sc);
 		/*
 		 * SB Live! is limited to 32 mono channels. Audigy
 		 * has 64 mono channels. Channels are enabled
 		 * by setting a bit in EMU_A_FXWC[1|2] registers.
 		 */
 		/* XXX there is no way to demultiplex this streams for now */
 		if (sc->is_emu10k1) {
 			emu_wrptr(sc->card, 0, EMU_FXWC, 0xffffffff);
 		} else {
 			emu_wrptr(sc->card, 0, EMU_A_FXWC1, 0xffffffff);
 			emu_wrptr(sc->card, 0, EMU_A_FXWC2, 0xffffffff);
 		}
 		break;
 	case PCMTRIG_STOP:
 		/* FALLTHROUGH */
 	case PCMTRIG_ABORT:
 		ch->run = 0;
 		if (sc->is_emu10k1) {
 			emu_wrptr(sc->card, 0, EMU_FXWC, 0x0);
 		} else {
 			emu_wrptr(sc->card, 0, EMU_A_FXWC1, 0x0);
 			emu_wrptr(sc->card, 0, EMU_A_FXWC2, 0x0);
 		}
 		emu_wrptr(sc->card, 0, ch->sizereg, 0);
 		(void)emu_intr_unregister(sc->card, ch->ihandle);
 		break;
 	case PCMTRIG_EMLDMAWR:
 		/* FALLTHROUGH */
 	case PCMTRIG_EMLDMARD:
 		/* FALLTHROUGH */
 	default:
 		break;
 	}
 	snd_mtxunlock(sc->lock);
 
 	return (0);
 }
 
 static uint32_t
 emufxrchan_getptr(kobj_t obj __unused, void *c_devinfo)
 {
 	struct emu_pcm_rchinfo *ch = c_devinfo;
 	struct emu_pcm_info *sc = ch->pcm;
 	int r;
 
 	r = emu_rdptr(sc->card, 0, ch->idxreg) & 0x0000ffff;
 
 	return (r);
 }
 
 static struct pcmchan_caps *
 emufxrchan_getcaps(kobj_t obj __unused, void *c_devinfo)
 {
 	struct emu_pcm_rchinfo *ch = c_devinfo;
 	struct emu_pcm_info *sc = ch->pcm;
 
 	if (sc->is_emu10k1)
 		return (&emu_reccaps_efx_live);
 	return (&emu_reccaps_efx_audigy);
 
 }
 
 static int
 emufxrchan_getrates(kobj_t obj __unused, void *c_devinfo, int **rates)
 {
 	struct emu_pcm_rchinfo *ch = c_devinfo;
 	struct emu_pcm_info *sc = ch->pcm;
 
 	if (sc->is_emu10k1)
 		*rates = emu_rates_live;
 	else
 		*rates = emu_rates_audigy;
 
 	return 1;
 }
 
 static kobj_method_t emufxrchan_methods[] = {
 	KOBJMETHOD(channel_init, emufxrchan_init),
 	KOBJMETHOD(channel_setformat, emufxrchan_setformat),
 	KOBJMETHOD(channel_setspeed, emufxrchan_setspeed),
 	KOBJMETHOD(channel_setblocksize, emufxrchan_setblocksize),
 	KOBJMETHOD(channel_trigger, emufxrchan_trigger),
 	KOBJMETHOD(channel_getptr, emufxrchan_getptr),
 	KOBJMETHOD(channel_getcaps, emufxrchan_getcaps),
 	KOBJMETHOD(channel_getrates, emufxrchan_getrates),
 	KOBJMETHOD_END
 };
 CHANNEL_DECLARE(emufxrchan);
 
 
 static uint32_t
 emu_pcm_intr(void *pcm, uint32_t stat)
 {
 	struct emu_pcm_info *sc = (struct emu_pcm_info *)pcm;
 	uint32_t ack;
 	int i;
 
 	ack = 0;
 
 	snd_mtxlock(sc->lock);
 	
 	if (stat & EMU_IPR_INTERVALTIMER) {
 		ack |= EMU_IPR_INTERVALTIMER;
 		for (i = 0; i < MAX_CHANNELS; i++)
 			if (sc->pch[i].channel) {
 				if (sc->pch[i].run == 1) {
 					snd_mtxunlock(sc->lock);
 					chn_intr(sc->pch[i].channel);
 					snd_mtxlock(sc->lock);
 				} else
 					emu_timer_enable(sc->card, sc->pch[i].timer, 0);
 			}
 		/* ADC may install timer to get low-latency interrupts */
 		if ((sc->rch_adc.channel) && (sc->rch_adc.run)) {
 			snd_mtxunlock(sc->lock);
 			chn_intr(sc->rch_adc.channel);
 			snd_mtxlock(sc->lock);
 		}
 		/*
 		 * EFX does not use timer, because it will fill
 		 * buffer at least 32x times faster than ADC.
 		 */
 	}
 
 
 	if (stat & (EMU_IPR_ADCBUFFULL | EMU_IPR_ADCBUFHALFFULL)) {
 		ack |= stat & (EMU_IPR_ADCBUFFULL | EMU_IPR_ADCBUFHALFFULL);
 		if (sc->rch_adc.channel) {
 			snd_mtxunlock(sc->lock);
 			chn_intr(sc->rch_adc.channel);
 			snd_mtxlock(sc->lock);
 		}
 	}
 
 	if (stat & (EMU_IPR_EFXBUFFULL | EMU_IPR_EFXBUFHALFFULL)) {
 		ack |= stat & (EMU_IPR_EFXBUFFULL | EMU_IPR_EFXBUFHALFFULL);
 		if (sc->rch_efx.channel) {
 			snd_mtxunlock(sc->lock);
 			chn_intr(sc->rch_efx.channel);
 			snd_mtxlock(sc->lock);
 		}
 	}
 	snd_mtxunlock(sc->lock);
 
 	return (ack);
 }
 
 static int
 emu_pcm_init(struct emu_pcm_info *sc)
 {
 	sc->bufsz = pcm_getbuffersize(sc->dev, EMUPAGESIZE, EMU_REC_BUFSZ, EMU_MAX_BUFSZ);
 	return (0);
 }
 
 static int
 emu_pcm_uninit(struct emu_pcm_info *sc __unused)
 {
 	return (0);
 }
 
 static int
 emu_pcm_probe(device_t dev)
 {
 	uintptr_t func, route, r;
 	const char *rt;
 	char buffer[255];
 
 	r = BUS_READ_IVAR(device_get_parent(dev), dev, EMU_VAR_FUNC, &func);
 
 	if (func != SCF_PCM)
 		return (ENXIO);
 
 	rt = "UNKNOWN";
 	r = BUS_READ_IVAR(device_get_parent(dev), dev, EMU_VAR_ROUTE, &route);
 	switch (route) {
 	case RT_FRONT:
 		rt = "front";
 		break;
 	case RT_REAR:
 		rt = "rear";
 		break;
 	case RT_CENTER:
 		rt = "center";
 		break;
 	case RT_SUB:
 		rt = "subwoofer";
 		break;
 	case RT_SIDE:
 		rt = "side";
 		break;
 	case RT_MCHRECORD:
 		rt = "multichannel recording";
 		break;
 	}
 
 	snprintf(buffer, 255, "EMU10Kx DSP %s PCM interface", rt);
 	device_set_desc_copy(dev, buffer);
 	return (0);
 }
 
 static int
 emu_pcm_attach(device_t dev)
 {
 	struct emu_pcm_info *sc;
 	unsigned int i;
 	char status[SND_STATUSLEN];
 	uint32_t inte, ipr;
 	uintptr_t route, r, ivar;
 
 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
 	sc->card = (struct emu_sc_info *)(device_get_softc(device_get_parent(dev)));
 	if (sc->card == NULL) {
 		device_printf(dev, "cannot get bridge conf\n");
 		free(sc, M_DEVBUF);
 		return (ENXIO);
 	}
 
 	sc->lock = snd_mtxcreate(device_get_nameunit(dev), "snd_emu10kx pcm softc");
 	sc->dev = dev;
 
 	r = BUS_READ_IVAR(device_get_parent(dev), dev, EMU_VAR_ISEMU10K1, &ivar);
 	sc->is_emu10k1 = ivar ? 1 : 0;
 
 	r = BUS_READ_IVAR(device_get_parent(dev), dev, EMU_VAR_MCH_DISABLED, &ivar);
 	sc->mch_disabled = ivar ? 1 : 0;
 
 	sc->codec = NULL;
 
 	for (i = 0; i < 8; i++) {
 		sc->rt.routing_left[i] = i;
 		sc->rt.amounts_left[i] = 0x00;
 		sc->rt.routing_right[i] = i;
 		sc->rt.amounts_right[i] = 0x00;
 	}
 
 	for (i = 0; i < 8; i++) {
 		sc->rt_mono.routing_left[i] = i;
 		sc->rt_mono.amounts_left[i] = 0x00;
 		sc->rt_mono.routing_right[i] = i;
 		sc->rt_mono.amounts_right[i] = 0x00;
 	}
 
 	sc->emu10k1_volcache[0][0] = 75;
 	sc->emu10k1_volcache[1][0] = 75;
 	sc->emu10k1_volcache[0][1] = 75;
 	sc->emu10k1_volcache[1][1] = 75;
 	r = BUS_READ_IVAR(device_get_parent(dev), dev, EMU_VAR_ROUTE, &route);
 	sc->route = route;
 	switch (route) {
 	case RT_FRONT:
 		sc->rt.amounts_left[0] = 0xff;
 		sc->rt.amounts_right[1] = 0xff;
 		sc->rt_mono.amounts_left[0] = 0xff;
 		sc->rt_mono.amounts_left[1] = 0xff;
 		if (sc->is_emu10k1)
 			sc->codec = AC97_CREATE(dev, sc, emu_ac97);
 		else
 			sc->codec = AC97_CREATE(dev, sc, emu_eac97);
 		sc->ac97_mixerclass = NULL;
 		if (sc->codec != NULL)
 			sc->ac97_mixerclass = ac97_getmixerclass();
 		if (mixer_init(dev, &emudspmixer_class, sc)) {
 			device_printf(dev, "failed to initialize DSP mixer\n");
 			goto bad;
 		}
 		break;
 	case RT_REAR:
 		sc->rt.amounts_left[2] = 0xff;
 		sc->rt.amounts_right[3] = 0xff;
 		sc->rt_mono.amounts_left[2] = 0xff;
 		sc->rt_mono.amounts_left[3] = 0xff;
 		if (mixer_init(dev, &emudspmixer_class, sc)) {
 			device_printf(dev, "failed to initialize mixer\n");
 			goto bad;
 		}
 		break;
 	case RT_CENTER:
 		sc->rt.amounts_left[4] = 0xff;
 		sc->rt_mono.amounts_left[4] = 0xff;
 		if (mixer_init(dev, &emudspmixer_class, sc)) {
 			device_printf(dev, "failed to initialize mixer\n");
 			goto bad;
 		}
 		break;
 	case RT_SUB:
 		sc->rt.amounts_left[5] = 0xff;
 		sc->rt_mono.amounts_left[5] = 0xff;
 		if (mixer_init(dev, &emudspmixer_class, sc)) {
 			device_printf(dev, "failed to initialize mixer\n");
 			goto bad;
 		}
 		break;
 	case RT_SIDE:
 		sc->rt.amounts_left[6] = 0xff;
 		sc->rt.amounts_right[7] = 0xff;
 		sc->rt_mono.amounts_left[6] = 0xff;
 		sc->rt_mono.amounts_left[7] = 0xff;
 		if (mixer_init(dev, &emudspmixer_class, sc)) {
 			device_printf(dev, "failed to initialize mixer\n");
 			goto bad;
 		}
 		break;
 	case RT_MCHRECORD:
 		if (mixer_init(dev, &emuefxmixer_class, sc)) {
 			device_printf(dev, "failed to initialize EFX mixer\n");
 			goto bad;
 		}
 		break;
 	default:
 		device_printf(dev, "invalid default route\n");
 		goto bad;
 	}
 
 	inte = EMU_INTE_INTERTIMERENB;
 	ipr = EMU_IPR_INTERVALTIMER; /* Used by playback & ADC */
 	sc->ihandle = emu_intr_register(sc->card, inte, ipr, &emu_pcm_intr, sc);
 
 	if (emu_pcm_init(sc) == -1) {
 		device_printf(dev, "unable to initialize PCM part of the card\n");
 		goto bad;
 	}
 
 	/* 
 	 * We don't register interrupt handler with snd_setup_intr
 	 * in pcm device. Mark pcm device as MPSAFE manually.
 	 */
 	pcm_setflags(dev, pcm_getflags(dev) | SD_F_MPSAFE);
 
 	/* XXX we should better get number of available channels from parent */
 	if (pcm_register(dev, sc, (route == RT_FRONT) ? MAX_CHANNELS : 1, (route == RT_FRONT) ? 1 : 0)) {
 		device_printf(dev, "can't register PCM channels!\n");
 		goto bad;
 	}
 	sc->pnum = 0;
 	if (route != RT_MCHRECORD)
 		pcm_addchan(dev, PCMDIR_PLAY, &emupchan_class, sc);
 	if (route == RT_FRONT) {
 		for (i = 1; i < MAX_CHANNELS; i++)
 			pcm_addchan(dev, PCMDIR_PLAY, &emupchan_class, sc);
 		pcm_addchan(dev, PCMDIR_REC, &emurchan_class, sc);
 	}
 	if (route == RT_MCHRECORD)
 		pcm_addchan(dev, PCMDIR_REC, &emufxrchan_class, sc);
 
 	snprintf(status, SND_STATUSLEN, "on %s", device_get_nameunit(device_get_parent(dev)));
 	pcm_setstatus(dev, status);
 
 	return (0);
 
 bad:
 	if (sc->codec)
 		ac97_destroy(sc->codec);
 	if (sc->lock)
 		snd_mtxfree(sc->lock);
 	free(sc, M_DEVBUF);
 	return (ENXIO);
 }
 
 static int
 emu_pcm_detach(device_t dev)
 {
 	int r;
 	struct emu_pcm_info *sc;
 
 	sc = pcm_getdevinfo(dev);
 
 	r = pcm_unregister(dev);
 
 	if (r) 	return (r);
 
 	emu_pcm_uninit(sc);
 
 	if (sc->lock)
 		snd_mtxfree(sc->lock);
 	free(sc, M_DEVBUF);
 
 	return (0);
 }
 
 static device_method_t emu_pcm_methods[] = {
 	DEVMETHOD(device_probe, emu_pcm_probe),
 	DEVMETHOD(device_attach, emu_pcm_attach),
 	DEVMETHOD(device_detach, emu_pcm_detach),
 
 	DEVMETHOD_END
 };
 
 static driver_t emu_pcm_driver = {
 	"pcm",
 	emu_pcm_methods,
 	PCM_SOFTC_SIZE,
 	NULL,
 	0,
 	NULL
 };
 DRIVER_MODULE(snd_emu10kx_pcm, emu10kx, emu_pcm_driver, pcm_devclass, 0, 0);
 MODULE_DEPEND(snd_emu10kx_pcm, snd_emu10kx, SND_EMU10KX_MINVER, SND_EMU10KX_PREFVER, SND_EMU10KX_MAXVER);
 MODULE_DEPEND(snd_emu10kx_pcm, sound, SOUND_MINVER, SOUND_PREFVER, SOUND_MAXVER);
 MODULE_VERSION(snd_emu10kx_pcm, SND_EMU10KX_PREFVER);
Index: projects/clang1000-import/sys/fs/msdosfs/msdosfsmount.h
===================================================================
--- projects/clang1000-import/sys/fs/msdosfs/msdosfsmount.h	(revision 357178)
+++ projects/clang1000-import/sys/fs/msdosfs/msdosfsmount.h	(revision 357179)
@@ -1,263 +1,267 @@
 /* $FreeBSD$ */
 /*	$NetBSD: msdosfsmount.h,v 1.17 1997/11/17 15:37:07 ws Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
  * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
  * All rights reserved.
  * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Written by Paul Popelka (paulp@uts.amdahl.com)
  *
  * You can do anything you want with this software, just don't say you wrote
  * it, and don't remove this notice.
  *
  * This software is provided "as is".
  *
  * The author supplies this software to be publicly redistributed on the
  * understanding that the author is not responsible for the correct
  * functioning of this software in any circumstances and is not liable for
  * any damages caused by this software.
  *
  * October 1992
  */
 
 #ifndef _MSDOSFS_MSDOSFSMOUNT_H_
 #define	_MSDOSFS_MSDOSFSMOUNT_H_
 
 #if defined (_KERNEL) || defined(MAKEFS)
 
 #include <sys/types.h>
+#ifndef MAKEFS
 #include <sys/lock.h>
 #include <sys/lockmgr.h>
+#endif
 #include <sys/tree.h>
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_MSDOSFSMNT);
 #endif
 
 struct msdosfs_fileno;
 
 /*
  * Layout of the mount control block for a MSDOSFS filesystem.
  */
 struct msdosfsmount {
 	struct mount *pm_mountp;/* vfs mount struct for this fs */
 	struct g_consumer *pm_cp;
 	struct bufobj *pm_bo;
 	uid_t pm_uid;		/* uid to set as owner of the files */
 	gid_t pm_gid;		/* gid to set as owner of the files */
 	mode_t pm_mask;		/* mask to and with file protection bits
 				   for files */
 	mode_t pm_dirmask;	/* mask to and with file protection bits
 				   for directories */
 	struct vnode *pm_devvp;	/* vnode for character device mounted */
 	struct cdev *pm_dev;	/* character device mounted */
 	struct bpb50 pm_bpb;	/* BIOS parameter blk for this fs */
 	u_long pm_BlkPerSec;	/* How many DEV_BSIZE blocks fit inside a physical sector */
 	u_long pm_FATsecs;	/* actual number of FAT sectors */
 	u_long pm_fatblk;	/* block # of first FAT */
 	u_long pm_rootdirblk;	/* block # (cluster # for FAT32) of root directory number */
 	u_long pm_rootdirsize;	/* size in blocks (not clusters) */
 	u_long pm_firstcluster;	/* block number of first cluster */
 	u_long pm_maxcluster;	/* maximum cluster number */
 	u_long pm_freeclustercount;	/* number of free clusters */
 	u_long pm_cnshift;	/* shift file offset right this amount to get a cluster number */
 	u_long pm_crbomask;	/* and a file offset with this mask to get cluster rel offset */
 	u_long pm_bnshift;	/* shift file offset right this amount to get a block number */
 	u_long pm_bpcluster;	/* bytes per cluster */
 	u_long pm_fmod;		/* ~0 if fs is modified, this can rollover to 0	*/
 	u_long pm_fatblocksize;	/* size of FAT blocks in bytes */
 	u_long pm_fatblocksec;	/* size of FAT blocks in sectors */
 	u_long pm_fatsize;	/* size of FAT in bytes */
 	uint32_t pm_fatmask;	/* mask to use for FAT numbers */
 	u_long pm_fsinfo;	/* fsinfo block number */
 	u_long pm_nxtfree;	/* next place to search for a free cluster */
 	u_int pm_fatmult;	/* these 2 values are used in FAT */
 	u_int pm_fatdiv;	/*	offset computation */
 	u_int pm_curfat;	/* current FAT for FAT32 (0 otherwise) */
 	u_int *pm_inusemap;	/* ptr to bitmap of in-use clusters */
 	uint64_t pm_flags;	/* see below */
 	void *pm_u2w;	/* Local->Unicode iconv handle */
 	void *pm_w2u;	/* Unicode->Local iconv handle */
 	void *pm_u2d;	/* Unicode->DOS iconv handle */
 	void *pm_d2u;	/* DOS->Local iconv handle */
+#ifndef MAKEFS
 	struct lock pm_fatlock;	/* lockmgr protecting allocations */
+#endif
 };
 
 /*
  * A 64-bit file number and the 32-bit file number to which it is mapped,
  * in a red-black tree node.
  */
 struct msdosfs_fileno {
 	RB_ENTRY(msdosfs_fileno)	mf_tree;
 	uint32_t			mf_fileno32;
 	uint64_t			mf_fileno64;
 };
 
 /* Byte offset in FAT on filesystem pmp, cluster cn */
 #define	FATOFS(pmp, cn)	((cn) * (pmp)->pm_fatmult / (pmp)->pm_fatdiv)
 
 
 #define	VFSTOMSDOSFS(mp)	((struct msdosfsmount *)mp->mnt_data)
 
 /* Number of bits in one pm_inusemap item: */
 #define	N_INUSEBITS	(8 * sizeof(u_int))
 
 /*
  * Shorthand for fields in the bpb contained in the msdosfsmount structure.
  */
 #define	pm_BytesPerSec	pm_bpb.bpbBytesPerSec
 #define	pm_ResSectors	pm_bpb.bpbResSectors
 #define	pm_FATs		pm_bpb.bpbFATs
 #define	pm_RootDirEnts	pm_bpb.bpbRootDirEnts
 #define	pm_Sectors	pm_bpb.bpbSectors
 #define	pm_Media	pm_bpb.bpbMedia
 #define	pm_SecPerTrack	pm_bpb.bpbSecPerTrack
 #define	pm_Heads	pm_bpb.bpbHeads
 #define	pm_HiddenSects	pm_bpb.bpbHiddenSecs
 #define	pm_HugeSectors	pm_bpb.bpbHugeSectors
 
 /*
  * Convert pointer to buffer -> pointer to direntry
  */
 #define	bptoep(pmp, bp, dirofs) \
 	((struct direntry *)(((bp)->b_data)	\
 	 + ((dirofs) & (pmp)->pm_crbomask)))
 
 /*
  * Convert block number to cluster number
  */
 #define	de_bn2cn(pmp, bn) \
 	((bn) >> ((pmp)->pm_cnshift - (pmp)->pm_bnshift))
 
 /*
  * Convert cluster number to block number
  */
 #define	de_cn2bn(pmp, cn) \
 	((cn) << ((pmp)->pm_cnshift - (pmp)->pm_bnshift))
 
 /*
  * Convert file offset to cluster number
  */
 #define de_cluster(pmp, off) \
 	((off) >> (pmp)->pm_cnshift)
 
 /*
  * Clusters required to hold size bytes
  */
 #define	de_clcount(pmp, size) \
 	(((size) + (pmp)->pm_bpcluster - 1) >> (pmp)->pm_cnshift)
 
 /*
  * Convert file offset to block number
  */
 #define de_blk(pmp, off) \
 	(de_cn2bn(pmp, de_cluster((pmp), (off))))
 
 /*
  * Convert cluster number to file offset
  */
 #define	de_cn2off(pmp, cn) \
 	((cn) << (pmp)->pm_cnshift)
 
 /*
  * Convert block number to file offset
  */
 #define	de_bn2off(pmp, bn) \
 	((bn) << (pmp)->pm_bnshift)
 /*
  * Map a cluster number into a filesystem relative block number.
  */
 #define	cntobn(pmp, cn) \
 	(de_cn2bn((pmp), (cn)-CLUST_FIRST) + (pmp)->pm_firstcluster)
 
 /*
  * Calculate block number for directory entry in root dir, offset dirofs
  */
 #define	roottobn(pmp, dirofs) \
 	(de_blk((pmp), (dirofs)) + (pmp)->pm_rootdirblk)
 
 /*
  * Calculate block number for directory entry at cluster dirclu, offset
  * dirofs
  */
 #define	detobn(pmp, dirclu, dirofs) \
 	((dirclu) == MSDOSFSROOT \
 	 ? roottobn((pmp), (dirofs)) \
 	 : cntobn((pmp), (dirclu)))
 
 #define	MSDOSFS_LOCK_MP(pmp) \
 	lockmgr(&(pmp)->pm_fatlock, LK_EXCLUSIVE, NULL)
 #define	MSDOSFS_UNLOCK_MP(pmp) \
 	lockmgr(&(pmp)->pm_fatlock, LK_RELEASE, NULL)
 #define	MSDOSFS_ASSERT_MP_LOCKED(pmp) \
 	lockmgr_assert(&(pmp)->pm_fatlock, KA_XLOCKED)
 
 #endif /* _KERNEL || MAKEFS */
 
 #ifndef MAKEFS
 /*
  *  Arguments to mount MSDOS filesystems.
  */
 struct msdosfs_args {
 	char	*fspec;		/* blocks special holding the fs to mount */
 	struct	oexport_args export;	/* network export information */
 	uid_t	uid;		/* uid that owns msdosfs files */
 	gid_t	gid;		/* gid that owns msdosfs files */
 	mode_t	mask;		/* file mask to be applied for msdosfs perms */
 	int	flags;		/* see below */
 	int	unused1;	/* unused, was version number */
 	uint16_t unused2[128];	/* no longer used, was Local->Unicode table */
 	char	*cs_win;	/* Windows(Unicode) Charset */
 	char	*cs_dos;	/* DOS Charset */
 	char	*cs_local;	/* Local Charset */
 	mode_t	dirmask;	/* dir  mask to be applied for msdosfs perms */
 };
 #endif /* MAKEFS */
 
 /*
  * Msdosfs mount options:
  */
 #define	MSDOSFSMNT_SHORTNAME	1	/* Force old DOS short names only */
 #define	MSDOSFSMNT_LONGNAME	2	/* Force Win'95 long names */
 #define	MSDOSFSMNT_NOWIN95	4	/* Completely ignore Win95 entries */
 #define	MSDOSFSMNT_KICONV	0x10    /* Use libiconv to convert chars */
 /* All flags above: */
 #define	MSDOSFSMNT_MNTOPT \
 	(MSDOSFSMNT_SHORTNAME|MSDOSFSMNT_LONGNAME|MSDOSFSMNT_NOWIN95 \
 	 |MSDOSFSMNT_KICONV)
 #define	MSDOSFSMNT_RONLY	0x80000000	/* mounted read-only	*/
 #define	MSDOSFSMNT_WAITONFAT	0x40000000	/* mounted synchronous	*/
 #define	MSDOSFS_FATMIRROR	0x20000000	/* FAT is mirrored */
 #define	MSDOSFS_FSIMOD		0x01000000
 
 #endif /* !_MSDOSFS_MSDOSFSMOUNT_H_ */
Index: projects/clang1000-import/sys/fs/nfsserver/nfs_nfsdstate.c
===================================================================
--- projects/clang1000-import/sys/fs/nfsserver/nfs_nfsdstate.c	(revision 357178)
+++ projects/clang1000-import/sys/fs/nfsserver/nfs_nfsdstate.c	(revision 357179)
@@ -1,8722 +1,8723 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2009 Rick Macklem, University of Guelph
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #ifndef APPLEKEXT
 #include <sys/extattr.h>
 #include <fs/nfs/nfsport.h>
 
 struct nfsrv_stablefirst nfsrv_stablefirst;
 int nfsrv_issuedelegs = 0;
 int nfsrv_dolocallocks = 0;
 struct nfsv4lock nfsv4rootfs_lock;
 time_t nfsdev_time = 0;
 int nfsrv_layouthashsize;
 volatile int nfsrv_layoutcnt = 0;
 
 extern int newnfs_numnfsd;
 extern struct nfsstatsv1 nfsstatsv1;
 extern int nfsrv_lease;
 extern struct timeval nfsboottime;
 extern u_int32_t newnfs_true, newnfs_false;
 extern struct mtx nfsrv_dslock_mtx;
 extern struct mtx nfsrv_recalllock_mtx;
 extern struct mtx nfsrv_dontlistlock_mtx;
 extern int nfsd_debuglevel;
 extern u_int nfsrv_dsdirsize;
 extern struct nfsdevicehead nfsrv_devidhead;
 extern int nfsrv_doflexfile;
 extern int nfsrv_maxpnfsmirror;
 NFSV4ROOTLOCKMUTEX;
 NFSSTATESPINLOCK;
 extern struct nfsdontlisthead nfsrv_dontlisthead;
 extern volatile int nfsrv_devidcnt;
 extern struct nfslayouthead nfsrv_recalllisthead;
 extern char *nfsrv_zeropnfsdat;
 
 SYSCTL_DECL(_vfs_nfsd);
 int	nfsrv_statehashsize = NFSSTATEHASHSIZE;
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
     &nfsrv_statehashsize, 0,
     "Size of state hash table set via loader.conf");
 
 int	nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
     &nfsrv_clienthashsize, 0,
     "Size of client hash table set via loader.conf");
 
 int	nfsrv_lockhashsize = NFSLOCKHASHSIZE;
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
     &nfsrv_lockhashsize, 0,
     "Size of file handle hash table set via loader.conf");
 
 int	nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
     &nfsrv_sessionhashsize, 0,
     "Size of session hash table set via loader.conf");
 
 int	nfsrv_layouthighwater = NFSLAYOUTHIGHWATER;
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, layouthighwater, CTLFLAG_RDTUN,
     &nfsrv_layouthighwater, 0,
     "High water mark for number of layouts set via loader.conf");
 
 static int	nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
     &nfsrv_v4statelimit, 0,
     "High water limit for NFSv4 opens+locks+delegations");
 
 static int	nfsrv_writedelegifpos = 0;
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW,
     &nfsrv_writedelegifpos, 0,
     "Issue a write delegation for read opens if possible");
 
 static int	nfsrv_allowreadforwriteopen = 1;
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW,
     &nfsrv_allowreadforwriteopen, 0,
     "Allow Reads to be done with Write Access StateIDs");
 
 int	nfsrv_pnfsatime = 0;
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsstrictatime, CTLFLAG_RW,
     &nfsrv_pnfsatime, 0,
     "For pNFS service, do Getattr ops to keep atime up-to-date");
 
 int	nfsrv_flexlinuxhack = 0;
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW,
     &nfsrv_flexlinuxhack, 0,
     "For Linux clients, hack around Flex File Layout bug");
 
 /*
  * Hash lists for nfs V4.
  */
 struct nfsclienthashhead	*nfsclienthash;
 struct nfslockhashhead		*nfslockhash;
 struct nfssessionhash		*nfssessionhash;
 struct nfslayouthash		*nfslayouthash;
 volatile int nfsrv_dontlistlen = 0;
 #endif	/* !APPLEKEXT */
 
 static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
 static time_t nfsrvboottime;
 static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
 static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
 static int nfsrv_nogsscallback = 0;
 static volatile int nfsrv_writedelegcnt = 0;
 static int nfsrv_faildscnt;
 
 /* local functions */
 static void nfsrv_dumpaclient(struct nfsclient *clp,
     struct nfsd_dumpclients *dumpp);
 static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
     NFSPROC_T *p);
 static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
     NFSPROC_T *p);
 static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
     NFSPROC_T *p);
 static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
     int cansleep, NFSPROC_T *p);
 static void nfsrv_freenfslock(struct nfslock *lop);
 static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
 static void nfsrv_freedeleg(struct nfsstate *);
 static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, 
     u_int32_t flags, struct nfsstate **stpp);
 static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
     struct nfsstate **stpp);
 static int nfsrv_getlockfh(vnode_t vp, u_short flags,
     struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
 static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
     struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
 static void nfsrv_insertlock(struct nfslock *new_lop,
     struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
 static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
     struct nfslock **other_lopp, struct nfslockfile *lfp);
 static int nfsrv_getipnumber(u_char *cp);
 static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
     nfsv4stateid_t *stateidp, int specialid);
 static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
     u_int32_t flags);
 static int nfsrv_docallback(struct nfsclient *clp, int procnum,
     nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
     struct nfsvattr *nap, nfsattrbit_t *attrbitp, int laytype, NFSPROC_T *p);
 static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
     uint32_t callback, int op, const char *optag, struct nfsdsession **sepp);
 static u_int32_t nfsrv_nextclientindex(void);
 static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
 static void nfsrv_markstable(struct nfsclient *clp);
 static void nfsrv_markreclaim(struct nfsclient *clp);
 static int nfsrv_checkstable(struct nfsclient *clp);
 static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct 
     vnode *vp, NFSPROC_T *p);
 static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
     NFSPROC_T *p, vnode_t vp);
 static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
     struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
 static int nfsrv_notsamecredname(struct nfsrv_descript *nd,
     struct nfsclient *clp);
 static time_t nfsrv_leaseexpiry(void);
 static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
 static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
     struct nfsstate *stp, struct nfsrvcache *op);
 static int nfsrv_nootherstate(struct nfsstate *stp);
 static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
 static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
     uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
 static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
     int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
     NFSPROC_T *p);
 static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
     NFSPROC_T *p);
 static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
     uint64_t first, uint64_t end);
 static void nfsrv_locklf(struct nfslockfile *lfp);
 static void nfsrv_unlocklf(struct nfslockfile *lfp);
 static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
 static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid);
 static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
     int dont_replycache, struct nfsdsession **sepp);
 static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
 static int nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
     nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p);
 static void nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp);
 static void nfsrv_freelayoutlist(nfsquad_t clientid);
 static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype,
     int iomode);
 static void nfsrv_freealllayouts(void);
 static void nfsrv_freedevid(struct nfsdevice *ds);
 static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
     struct nfsdevice **dsp);
 static void nfsrv_deleteds(struct nfsdevice *fndds);
 static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost);
 static void nfsrv_freealldevids(void);
 static void nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp,
     int maxcnt, NFSPROC_T *p);
 static int nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp,
     fhandle_t *fhp, struct nfslayout *lyp, int changed, int laytype,
     NFSPROC_T *p);
 static int nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
     NFSPROC_T *, struct nfslayout **lypp);
 static int nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt);
 static struct nfslayout *nfsrv_filelayout(struct nfsrv_descript *nd, int iomode,
     fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
 static struct nfslayout *nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode,
     int mirrorcnt, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
 static int nfsrv_dontlayout(fhandle_t *fhp);
 static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
     vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
     vnode_t *tvpp);
 static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp);
 
 /*
  * Scan the client list for a match and either return the current one,
  * create a new entry or return an error.
  * If returning a non-error, the clp structure must either be linked into
  * the client list or free'd.
  */
 APPLESTATIC int
 nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
     nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
 {
 	struct nfsclient *clp = NULL, *new_clp = *new_clpp;
 	int i, error = 0, ret;
 	struct nfsstate *stp, *tstp;
 #ifdef INET
 	struct sockaddr_in *sin, *rin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6, *rin6;
 #endif
 	struct nfsdsession *sep, *nsep;
 	int zapit = 0, gotit, hasstate = 0, igotlock;
 	static u_int64_t confirm_index = 0;
 
 	/*
 	 * Check for state resource limit exceeded.
 	 */
 	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
 		error = NFSERR_RESOURCE;
 		goto out;
 	}
 
 	if (nfsrv_issuedelegs == 0 ||
 	    ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
 		/*
 		 * Don't do callbacks when delegations are disabled or
 		 * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
 		 * If establishing a callback connection is attempted
 		 * when a firewall is blocking the callback path, the
 		 * server may wait too long for the connect attempt to
 		 * succeed during the Open. Some clients, such as Linux,
 		 * may timeout and give up on the Open before the server
 		 * replies. Also, since AUTH_GSS callbacks are not
 		 * yet interoperability tested, they might cause the
 		 * server to crap out, if they get past the Init call to
 		 * the client.
 		 */
 		new_clp->lc_program = 0;
 
 	/* Lock out other nfsd threads */
 	NFSLOCKV4ROOTMUTEX();
 	nfsv4_relref(&nfsv4rootfs_lock);
 	do {
 		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 		    NFSV4ROOTLOCKMUTEXPTR, NULL);
 	} while (!igotlock);
 	NFSUNLOCKV4ROOTMUTEX();
 
 	/*
 	 * Search for a match in the client list.
 	 */
 	gotit = i = 0;
 	while (i < nfsrv_clienthashsize && !gotit) {
 	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
 		if (new_clp->lc_idlen == clp->lc_idlen &&
 		    !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
 			gotit = 1;
 			break;
 		}
 	    }
 	    if (gotit == 0)
 		i++;
 	}
 	if (!gotit ||
 	    (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
 		if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
 			/*
 			 * For NFSv4.1, if confirmp->lval[1] is non-zero, the
 			 * client is trying to update a confirmed clientid.
 			 */
 			NFSLOCKV4ROOTMUTEX();
 			nfsv4_unlock(&nfsv4rootfs_lock, 1);
 			NFSUNLOCKV4ROOTMUTEX();
 			confirmp->lval[1] = 0;
 			error = NFSERR_NOENT;
 			goto out;
 		}
 		/*
 		 * Get rid of the old one.
 		 */
 		if (i != nfsrv_clienthashsize) {
 			LIST_REMOVE(clp, lc_hash);
 			nfsrv_cleanclient(clp, p);
 			nfsrv_freedeleglist(&clp->lc_deleg);
 			nfsrv_freedeleglist(&clp->lc_olddeleg);
 			zapit = 1;
 		}
 		/*
 		 * Add it after assigning a client id to it.
 		 */
 		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
 			    ++confirm_index;
 		else
 			confirmp->qval = new_clp->lc_confirm.qval =
 			    ++confirm_index;
 		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
 		    (u_int32_t)nfsrvboottime;
 		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
 		    nfsrv_nextclientindex();
 		new_clp->lc_stateindex = 0;
 		new_clp->lc_statemaxindex = 0;
 		new_clp->lc_cbref = 0;
 		new_clp->lc_expiry = nfsrv_leaseexpiry();
 		LIST_INIT(&new_clp->lc_open);
 		LIST_INIT(&new_clp->lc_deleg);
 		LIST_INIT(&new_clp->lc_olddeleg);
 		LIST_INIT(&new_clp->lc_session);
 		for (i = 0; i < nfsrv_statehashsize; i++)
 			LIST_INIT(&new_clp->lc_stateid[i]);
 		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
 		    lc_hash);
 		nfsstatsv1.srvclients++;
 		nfsrv_openpluslock++;
 		nfsrv_clients++;
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_unlock(&nfsv4rootfs_lock, 1);
 		NFSUNLOCKV4ROOTMUTEX();
 		if (zapit)
 			nfsrv_zapclient(clp, p);
 		*new_clpp = NULL;
 		goto out;
 	}
 
 	/*
 	 * Now, handle the cases where the id is already issued.
 	 */
 	if (nfsrv_notsamecredname(nd, clp)) {
 	    /*
 	     * Check to see if there is expired state that should go away.
 	     */
 	    if (clp->lc_expiry < NFSD_MONOSEC &&
 	        (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
 		nfsrv_cleanclient(clp, p);
 		nfsrv_freedeleglist(&clp->lc_deleg);
 	    }
 
 	    /*
 	     * If there is outstanding state, then reply NFSERR_CLIDINUSE per
 	     * RFC3530 Sec. 8.1.2 last para.
 	     */
 	    if (!LIST_EMPTY(&clp->lc_deleg)) {
 		hasstate = 1;
 	    } else if (LIST_EMPTY(&clp->lc_open)) {
 		hasstate = 0;
 	    } else {
 		hasstate = 0;
 		/* Look for an Open on the OpenOwner */
 		LIST_FOREACH(stp, &clp->lc_open, ls_list) {
 		    if (!LIST_EMPTY(&stp->ls_open)) {
 			hasstate = 1;
 			break;
 		    }
 		}
 	    }
 	    if (hasstate) {
 		/*
 		 * If the uid doesn't match, return NFSERR_CLIDINUSE after
 		 * filling out the correct ipaddr and portnum.
 		 */
 		switch (clp->lc_req.nr_nam->sa_family) {
 #ifdef INET
 		case AF_INET:
 			sin = (struct sockaddr_in *)new_clp->lc_req.nr_nam;
 			rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
 			sin->sin_addr.s_addr = rin->sin_addr.s_addr;
 			sin->sin_port = rin->sin_port;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			sin6 = (struct sockaddr_in6 *)new_clp->lc_req.nr_nam;
 			rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
 			sin6->sin6_addr = rin6->sin6_addr;
 			sin6->sin6_port = rin6->sin6_port;
 			break;
 #endif
 		}
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_unlock(&nfsv4rootfs_lock, 1);
 		NFSUNLOCKV4ROOTMUTEX();
 		error = NFSERR_CLIDINUSE;
 		goto out;
 	    }
 	}
 
 	if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
 		/*
 		 * If the verifier has changed, the client has rebooted
 		 * and a new client id is issued. The old state info
 		 * can be thrown away once the SETCLIENTID_CONFIRM occurs.
 		 */
 		LIST_REMOVE(clp, lc_hash);
 
 		/* Get rid of all sessions on this clientid. */
 		LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) {
 			ret = nfsrv_freesession(sep, NULL);
 			if (ret != 0)
 				printf("nfsrv_setclient: verifier changed free"
 				    " session failed=%d\n", ret);
 		}
 
 		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
 			    ++confirm_index;
 		else
 			confirmp->qval = new_clp->lc_confirm.qval =
 			    ++confirm_index;
 		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
 		    nfsrvboottime;
 		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
 		    nfsrv_nextclientindex();
 		new_clp->lc_stateindex = 0;
 		new_clp->lc_statemaxindex = 0;
 		new_clp->lc_cbref = 0;
 		new_clp->lc_expiry = nfsrv_leaseexpiry();
 
 		/*
 		 * Save the state until confirmed.
 		 */
 		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
 		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
 			tstp->ls_clp = new_clp;
 		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
 		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
 			tstp->ls_clp = new_clp;
 		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
 		    ls_list);
 		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
 			tstp->ls_clp = new_clp;
 		for (i = 0; i < nfsrv_statehashsize; i++) {
 			LIST_NEWHEAD(&new_clp->lc_stateid[i],
 			    &clp->lc_stateid[i], ls_hash);
 			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
 				tstp->ls_clp = new_clp;
 		}
 		LIST_INIT(&new_clp->lc_session);
 		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
 		    lc_hash);
 		nfsstatsv1.srvclients++;
 		nfsrv_openpluslock++;
 		nfsrv_clients++;
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_unlock(&nfsv4rootfs_lock, 1);
 		NFSUNLOCKV4ROOTMUTEX();
 
 		/*
 		 * Must wait until any outstanding callback on the old clp
 		 * completes.
 		 */
 		NFSLOCKSTATE();
 		while (clp->lc_cbref) {
 			clp->lc_flags |= LCL_WAKEUPWANTED;
 			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
 			    "nfsd clp", 10 * hz);
 		}
 		NFSUNLOCKSTATE();
 		nfsrv_zapclient(clp, p);
 		*new_clpp = NULL;
 		goto out;
 	}
 
 	/* For NFSv4.1, mark that we found a confirmed clientid. */
 	if ((nd->nd_flag & ND_NFSV41) != 0) {
 		clientidp->lval[0] = clp->lc_clientid.lval[0];
 		clientidp->lval[1] = clp->lc_clientid.lval[1];
 		confirmp->lval[0] = 0;	/* Ignored by client */
 		confirmp->lval[1] = 1;
 	} else {
 		/*
 		 * id and verifier match, so update the net address info
 		 * and get rid of any existing callback authentication
 		 * handle, so a new one will be acquired.
 		 */
 		LIST_REMOVE(clp, lc_hash);
 		new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
 		new_clp->lc_expiry = nfsrv_leaseexpiry();
 		confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
 		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
 		    clp->lc_clientid.lval[0];
 		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
 		    clp->lc_clientid.lval[1];
 		new_clp->lc_delegtime = clp->lc_delegtime;
 		new_clp->lc_stateindex = clp->lc_stateindex;
 		new_clp->lc_statemaxindex = clp->lc_statemaxindex;
 		new_clp->lc_cbref = 0;
 		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
 		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
 			tstp->ls_clp = new_clp;
 		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
 		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
 			tstp->ls_clp = new_clp;
 		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
 		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
 			tstp->ls_clp = new_clp;
 		for (i = 0; i < nfsrv_statehashsize; i++) {
 			LIST_NEWHEAD(&new_clp->lc_stateid[i],
 			    &clp->lc_stateid[i], ls_hash);
 			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
 				tstp->ls_clp = new_clp;
 		}
 		LIST_INIT(&new_clp->lc_session);
 		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
 		    lc_hash);
 		nfsstatsv1.srvclients++;
 		nfsrv_openpluslock++;
 		nfsrv_clients++;
 	}
 	NFSLOCKV4ROOTMUTEX();
 	nfsv4_unlock(&nfsv4rootfs_lock, 1);
 	NFSUNLOCKV4ROOTMUTEX();
 
 	if ((nd->nd_flag & ND_NFSV41) == 0) {
 		/*
 		 * Must wait until any outstanding callback on the old clp
 		 * completes.
 		 */
 		NFSLOCKSTATE();
 		while (clp->lc_cbref) {
 			clp->lc_flags |= LCL_WAKEUPWANTED;
 			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
 			    "nfsdclp", 10 * hz);
 		}
 		NFSUNLOCKSTATE();
 		nfsrv_zapclient(clp, p);
 		*new_clpp = NULL;
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Check to see if the client id exists and optionally confirm it.
  */
 APPLESTATIC int
 nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
     struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
     struct nfsrv_descript *nd, NFSPROC_T *p)
 {
 	struct nfsclient *clp;
 	struct nfsstate *stp;
 	int i;
 	struct nfsclienthashhead *hp;
 	int error = 0, igotlock, doneok;
 	struct nfssessionhash *shp;
 	struct nfsdsession *sep;
 	uint64_t sessid[2];
 	static uint64_t next_sess = 0;
 
 	if (clpp)
 		*clpp = NULL;
 	if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
 	    opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) {
 		error = NFSERR_STALECLIENTID;
 		goto out;
 	}
 
 	/*
 	 * If called with opflags == CLOPS_RENEW, the State Lock is
 	 * already held. Otherwise, we need to get either that or,
 	 * for the case of Confirm, lock out the nfsd threads.
 	 */
 	if (opflags & CLOPS_CONFIRM) {
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_relref(&nfsv4rootfs_lock);
 		do {
 			igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 			    NFSV4ROOTLOCKMUTEXPTR, NULL);
 		} while (!igotlock);
 		/*
 		 * Create a new sessionid here, since we need to do it where
 		 * there is a mutex held to serialize update of next_sess.
 		 */
 		if ((nd->nd_flag & ND_NFSV41) != 0) {
 			sessid[0] = ++next_sess;
 			sessid[1] = clientid.qval;
 		}
 		NFSUNLOCKV4ROOTMUTEX();
 	} else if (opflags != CLOPS_RENEW) {
 		NFSLOCKSTATE();
 	}
 
 	/* For NFSv4.1, the clp is acquired from the associated session. */
 	if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
 	    opflags == CLOPS_RENEW) {
 		clp = NULL;
 		if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
 			shp = NFSSESSIONHASH(nd->nd_sessionid);
 			NFSLOCKSESSION(shp);
 			sep = nfsrv_findsession(nd->nd_sessionid);
 			if (sep != NULL)
 				clp = sep->sess_clp;
 			NFSUNLOCKSESSION(shp);
 		}
 	} else {
 		hp = NFSCLIENTHASH(clientid);
 		LIST_FOREACH(clp, hp, lc_hash) {
 			if (clp->lc_clientid.lval[1] == clientid.lval[1])
 				break;
 		}
 	}
 	if (clp == NULL) {
 		if (opflags & CLOPS_CONFIRM)
 			error = NFSERR_STALECLIENTID;
 		else
 			error = NFSERR_EXPIRED;
 	} else if (clp->lc_flags & LCL_ADMINREVOKED) {
 		/*
 		 * If marked admin revoked, just return the error.
 		 */
 		error = NFSERR_ADMINREVOKED;
 	}
 	if (error) {
 		if (opflags & CLOPS_CONFIRM) {
 			NFSLOCKV4ROOTMUTEX();
 			nfsv4_unlock(&nfsv4rootfs_lock, 1);
 			NFSUNLOCKV4ROOTMUTEX();
 		} else if (opflags != CLOPS_RENEW) {
 			NFSUNLOCKSTATE();
 		}
 		goto out;
 	}
 
 	/*
 	 * Perform any operations specified by the opflags.
 	 */
 	if (opflags & CLOPS_CONFIRM) {
 		if (((nd->nd_flag & ND_NFSV41) != 0 &&
 		     clp->lc_confirm.lval[0] != confirm.lval[0]) ||
 		    ((nd->nd_flag & ND_NFSV41) == 0 &&
 		     clp->lc_confirm.qval != confirm.qval))
 			error = NFSERR_STALECLIENTID;
 		else if (nfsrv_notsamecredname(nd, clp))
 			error = NFSERR_CLIDINUSE;
 
 		if (!error) {
 		    if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
 			LCL_NEEDSCONFIRM) {
 			/*
 			 * Hang onto the delegations (as old delegations)
 			 * for an Open with CLAIM_DELEGATE_PREV unless in
 			 * grace, but get rid of the rest of the state.
 			 */
 			nfsrv_cleanclient(clp, p);
 			nfsrv_freedeleglist(&clp->lc_olddeleg);
 			if (nfsrv_checkgrace(nd, clp, 0)) {
 			    /* In grace, so just delete delegations */
 			    nfsrv_freedeleglist(&clp->lc_deleg);
 			} else {
 			    LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
 				stp->ls_flags |= NFSLCK_OLDDELEG;
 			    clp->lc_delegtime = NFSD_MONOSEC +
 				nfsrv_lease + NFSRV_LEASEDELTA;
 			    LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
 				ls_list);
 			}
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 			    clp->lc_program = cbprogram;
 		    }
 		    clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
 		    if (clp->lc_program)
 			clp->lc_flags |= LCL_NEEDSCBNULL;
 		    /* For NFSv4.1, link the session onto the client. */
 		    if (nsep != NULL) {
 			/* Hold a reference on the xprt for a backchannel. */
 			if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
 			    != 0) {
 			    if (clp->lc_req.nr_client == NULL)
 				clp->lc_req.nr_client = (struct __rpc_client *)
 				    clnt_bck_create(nd->nd_xprt->xp_socket,
 				    cbprogram, NFSV4_CBVERS);
 			    if (clp->lc_req.nr_client != NULL) {
 				SVC_ACQUIRE(nd->nd_xprt);
 				nd->nd_xprt->xp_p2 =
 				    clp->lc_req.nr_client->cl_private;
 				/* Disable idle timeout. */
 				nd->nd_xprt->xp_idletimeout = 0;
 				nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
 			    } else
 				nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
 			}
 			NFSBCOPY(sessid, nsep->sess_sessionid,
 			    NFSX_V4SESSIONID);
 			NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
 			    NFSX_V4SESSIONID);
 			shp = NFSSESSIONHASH(nsep->sess_sessionid);
 			NFSLOCKSTATE();
 			NFSLOCKSESSION(shp);
 			LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
 			LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
 			nsep->sess_clp = clp;
 			NFSUNLOCKSESSION(shp);
 			NFSUNLOCKSTATE();
 		    }
 		}
 	} else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
 		error = NFSERR_EXPIRED;
 	}
 
 	/*
 	 * If called by the Renew Op, we must check the principal.
 	 */
 	if (!error && (opflags & CLOPS_RENEWOP)) {
 	    if (nfsrv_notsamecredname(nd, clp)) {
 		doneok = 0;
 		for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
 		    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
 			if ((stp->ls_flags & NFSLCK_OPEN) &&
 			    stp->ls_uid == nd->nd_cred->cr_uid) {
 				doneok = 1;
 				break;
 			}
 		    }
 		}
 		if (!doneok)
 			error = NFSERR_ACCES;
 	    }
 	    if (!error && (clp->lc_flags & LCL_CBDOWN))
 		error = NFSERR_CBPATHDOWN;
 	}
 	if ((!error || error == NFSERR_CBPATHDOWN) &&
 	     (opflags & CLOPS_RENEW)) {
 		clp->lc_expiry = nfsrv_leaseexpiry();
 	}
 	if (opflags & CLOPS_CONFIRM) {
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_unlock(&nfsv4rootfs_lock, 1);
 		NFSUNLOCKV4ROOTMUTEX();
 	} else if (opflags != CLOPS_RENEW) {
 		NFSUNLOCKSTATE();
 	}
 	if (clpp)
 		*clpp = clp;
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Perform the NFSv4.1 destroy clientid.
  */
 int
 nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p)
 {
 	struct nfsclient *clp;
 	struct nfsclienthashhead *hp;
 	int error = 0, i, igotlock;
 
 	if (nfsrvboottime != clientid.lval[0]) {
 		error = NFSERR_STALECLIENTID;
 		goto out;
 	}
 
 	/* Lock out other nfsd threads */
 	NFSLOCKV4ROOTMUTEX();
 	nfsv4_relref(&nfsv4rootfs_lock);
 	do {
 		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 		    NFSV4ROOTLOCKMUTEXPTR, NULL);
 	} while (igotlock == 0);
 	NFSUNLOCKV4ROOTMUTEX();
 
 	hp = NFSCLIENTHASH(clientid);
 	LIST_FOREACH(clp, hp, lc_hash) {
 		if (clp->lc_clientid.lval[1] == clientid.lval[1])
 			break;
 	}
 	if (clp == NULL) {
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_unlock(&nfsv4rootfs_lock, 1);
 		NFSUNLOCKV4ROOTMUTEX();
 		/* Just return ok, since it is gone. */
 		goto out;
 	}
 
 	/*
 	 * Free up all layouts on the clientid.  Should the client return the
 	 * layouts?
 	 */
 	nfsrv_freelayoutlist(clientid);
 
 	/* Scan for state on the clientid. */
 	for (i = 0; i < nfsrv_statehashsize; i++)
 		if (!LIST_EMPTY(&clp->lc_stateid[i])) {
 			NFSLOCKV4ROOTMUTEX();
 			nfsv4_unlock(&nfsv4rootfs_lock, 1);
 			NFSUNLOCKV4ROOTMUTEX();
 			error = NFSERR_CLIENTIDBUSY;
 			goto out;
 		}
 	if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_unlock(&nfsv4rootfs_lock, 1);
 		NFSUNLOCKV4ROOTMUTEX();
 		error = NFSERR_CLIENTIDBUSY;
 		goto out;
 	}
 
 	/* Destroy the clientid and return ok. */
 	nfsrv_cleanclient(clp, p);
 	nfsrv_freedeleglist(&clp->lc_deleg);
 	nfsrv_freedeleglist(&clp->lc_olddeleg);
 	LIST_REMOVE(clp, lc_hash);
 	NFSLOCKV4ROOTMUTEX();
 	nfsv4_unlock(&nfsv4rootfs_lock, 1);
 	NFSUNLOCKV4ROOTMUTEX();
 	nfsrv_zapclient(clp, p);
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Called from the new nfssvc syscall to admin revoke a clientid.
  * Returns 0 for success, error otherwise.
  */
 APPLESTATIC int
 nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
 {
 	struct nfsclient *clp = NULL;
 	int i, error = 0;
 	int gotit, igotlock;
 
 	/*
 	 * First, lock out the nfsd so that state won't change while the
 	 * revocation record is being written to the stable storage restart
 	 * file.
 	 */
 	NFSLOCKV4ROOTMUTEX();
 	do {
 		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 		    NFSV4ROOTLOCKMUTEXPTR, NULL);
 	} while (!igotlock);
 	NFSUNLOCKV4ROOTMUTEX();
 
 	/*
 	 * Search for a match in the client list.
 	 */
 	gotit = i = 0;
 	while (i < nfsrv_clienthashsize && !gotit) {
 	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
 		if (revokep->nclid_idlen == clp->lc_idlen &&
 		    !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
 			gotit = 1;
 			break;
 		}
 	    }
 	    i++;
 	}
 	if (!gotit) {
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_unlock(&nfsv4rootfs_lock, 0);
 		NFSUNLOCKV4ROOTMUTEX();
 		error = EPERM;
 		goto out;
 	}
 
 	/*
 	 * Now, write out the revocation record
 	 */
 	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
 	nfsrv_backupstable();
 
 	/*
 	 * and clear out the state, marking the clientid revoked.
 	 */
 	clp->lc_flags &= ~LCL_CALLBACKSON;
 	clp->lc_flags |= LCL_ADMINREVOKED;
 	nfsrv_cleanclient(clp, p);
 	nfsrv_freedeleglist(&clp->lc_deleg);
 	nfsrv_freedeleglist(&clp->lc_olddeleg);
 	NFSLOCKV4ROOTMUTEX();
 	nfsv4_unlock(&nfsv4rootfs_lock, 0);
 	NFSUNLOCKV4ROOTMUTEX();
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Dump out stats for all clients. Called from nfssvc(2), that is used
  * nfsstatsv1.
  */
 APPLESTATIC void
 nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
 {
 	struct nfsclient *clp;
 	int i = 0, cnt = 0;
 
 	/*
 	 * First, get a reference on the nfsv4rootfs_lock so that an
 	 * exclusive lock cannot be acquired while dumping the clients.
 	 */
 	NFSLOCKV4ROOTMUTEX();
 	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 	NFSUNLOCKV4ROOTMUTEX();
 	NFSLOCKSTATE();
 	/*
 	 * Rattle through the client lists until done.
 	 */
 	while (i < nfsrv_clienthashsize && cnt < maxcnt) {
 	    clp = LIST_FIRST(&nfsclienthash[i]);
 	    while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) {
 		nfsrv_dumpaclient(clp, &dumpp[cnt]);
 		cnt++;
 		clp = LIST_NEXT(clp, lc_hash);
 	    }
 	    i++;
 	}
 	if (cnt < maxcnt)
 	    dumpp[cnt].ndcl_clid.nclid_idlen = 0;
 	NFSUNLOCKSTATE();
 	NFSLOCKV4ROOTMUTEX();
 	nfsv4_relref(&nfsv4rootfs_lock);
 	NFSUNLOCKV4ROOTMUTEX();
 }
 
 /*
  * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
  */
 static void
 nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
 {
 	struct nfsstate *stp, *openstp, *lckownstp;
 	struct nfslock *lop;
 	sa_family_t af;
 #ifdef INET
 	struct sockaddr_in *rin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *rin6;
 #endif
 
 	dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
 	dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
 	dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
 	dumpp->ndcl_flags = clp->lc_flags;
 	dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
 	NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
 	af = clp->lc_req.nr_nam->sa_family;
 	dumpp->ndcl_addrfam = af;
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
 		dumpp->ndcl_cbaddr.sin_addr = rin->sin_addr;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
 		dumpp->ndcl_cbaddr.sin6_addr = rin6->sin6_addr;
 		break;
 #endif
 	}
 
 	/*
 	 * Now, scan the state lists and total up the opens and locks.
 	 */
 	LIST_FOREACH(stp, &clp->lc_open, ls_list) {
 	    dumpp->ndcl_nopenowners++;
 	    LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
 		dumpp->ndcl_nopens++;
 		LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
 		    dumpp->ndcl_nlockowners++;
 		    LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
 			dumpp->ndcl_nlocks++;
 		    }
 		}
 	    }
 	}
 
 	/*
 	 * and the delegation lists.
 	 */
 	LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
 	    dumpp->ndcl_ndelegs++;
 	}
 	LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
 	    dumpp->ndcl_nolddelegs++;
 	}
 }
 
 /*
  * Dump out lock stats for a file.
  */
 APPLESTATIC void
 nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
     NFSPROC_T *p)
 {
 	struct nfsstate *stp;
 	struct nfslock *lop;
 	int cnt = 0;
 	struct nfslockfile *lfp;
 	sa_family_t af;
 #ifdef INET
 	struct sockaddr_in *rin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *rin6;
 #endif
 	int ret;
 	fhandle_t nfh;
 
 	ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
 	/*
 	 * First, get a reference on the nfsv4rootfs_lock so that an
 	 * exclusive lock on it cannot be acquired while dumping the locks.
 	 */
 	NFSLOCKV4ROOTMUTEX();
 	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 	NFSUNLOCKV4ROOTMUTEX();
 	NFSLOCKSTATE();
 	if (!ret)
 		ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
 	if (ret) {
 		ldumpp[0].ndlck_clid.nclid_idlen = 0;
 		NFSUNLOCKSTATE();
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_relref(&nfsv4rootfs_lock);
 		NFSUNLOCKV4ROOTMUTEX();
 		return;
 	}
 
 	/*
 	 * For each open share on file, dump it out.
 	 */
 	stp = LIST_FIRST(&lfp->lf_open);
 	while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
 		ldumpp[cnt].ndlck_flags = stp->ls_flags;
 		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
 		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
 		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
 		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
 		ldumpp[cnt].ndlck_owner.nclid_idlen =
 		    stp->ls_openowner->ls_ownerlen;
 		NFSBCOPY(stp->ls_openowner->ls_owner,
 		    ldumpp[cnt].ndlck_owner.nclid_id,
 		    stp->ls_openowner->ls_ownerlen);
 		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
 		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
 		    stp->ls_clp->lc_idlen);
 		af = stp->ls_clp->lc_req.nr_nam->sa_family;
 		ldumpp[cnt].ndlck_addrfam = af;
 		switch (af) {
 #ifdef INET
 		case AF_INET:
 			rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
 			ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			rin6 = (struct sockaddr_in6 *)
 			    stp->ls_clp->lc_req.nr_nam;
 			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
 			break;
 #endif
 		}
 		stp = LIST_NEXT(stp, ls_file);
 		cnt++;
 	}
 
 	/*
 	 * and all locks.
 	 */
 	lop = LIST_FIRST(&lfp->lf_lock);
 	while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
 		stp = lop->lo_stp;
 		ldumpp[cnt].ndlck_flags = lop->lo_flags;
 		ldumpp[cnt].ndlck_first = lop->lo_first;
 		ldumpp[cnt].ndlck_end = lop->lo_end;
 		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
 		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
 		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
 		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
 		ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
 		NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
 		    stp->ls_ownerlen);
 		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
 		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
 		    stp->ls_clp->lc_idlen);
 		af = stp->ls_clp->lc_req.nr_nam->sa_family;
 		ldumpp[cnt].ndlck_addrfam = af;
 		switch (af) {
 #ifdef INET
 		case AF_INET:
 			rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
 			ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			rin6 = (struct sockaddr_in6 *)
 			    stp->ls_clp->lc_req.nr_nam;
 			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
 			break;
 #endif
 		}
 		lop = LIST_NEXT(lop, lo_lckfile);
 		cnt++;
 	}
 
 	/*
 	 * and the delegations.
 	 */
 	stp = LIST_FIRST(&lfp->lf_deleg);
 	while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
 		ldumpp[cnt].ndlck_flags = stp->ls_flags;
 		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
 		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
 		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
 		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
 		ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
 		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
 		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
 		    stp->ls_clp->lc_idlen);
 		af = stp->ls_clp->lc_req.nr_nam->sa_family;
 		ldumpp[cnt].ndlck_addrfam = af;
 		switch (af) {
 #ifdef INET
 		case AF_INET:
 			rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
 			ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			rin6 = (struct sockaddr_in6 *)
 			    stp->ls_clp->lc_req.nr_nam;
 			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
 			break;
 #endif
 		}
 		stp = LIST_NEXT(stp, ls_file);
 		cnt++;
 	}
 
 	/*
 	 * If list isn't full, mark end of list by setting the client name
 	 * to zero length.
 	 */
 	if (cnt < maxcnt)
 		ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
 	NFSUNLOCKSTATE();
 	NFSLOCKV4ROOTMUTEX();
 	nfsv4_relref(&nfsv4rootfs_lock);
 	NFSUNLOCKV4ROOTMUTEX();
 }
 
 /*
  * Server timer routine. It can scan any linked list, so long
  * as it holds the spin/mutex lock and there is no exclusive lock on
  * nfsv4rootfs_lock.
  * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
  *  to do this from a callout, since the spin locks work. For
  *  Darwin, I'm not sure what will work correctly yet.)
  * Should be called once per second.
  */
 APPLESTATIC void
 nfsrv_servertimer(void)
 {
 	struct nfsclient *clp, *nclp;
 	struct nfsstate *stp, *nstp;
 	int got_ref, i;
 
 	/*
 	 * Make sure nfsboottime is set. This is used by V3 as well
 	 * as V4. Note that nfsboottime is not nfsrvboottime, which is
 	 * only used by the V4 server for leases.
 	 */
 	if (nfsboottime.tv_sec == 0)
 		NFSSETBOOTTIME(nfsboottime);
 
 	/*
 	 * If server hasn't started yet, just return.
 	 */
 	NFSLOCKSTATE();
 	if (nfsrv_stablefirst.nsf_eograce == 0) {
 		NFSUNLOCKSTATE();
 		return;
 	}
 	if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) {
 		if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) &&
 		    NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce)
 			nfsrv_stablefirst.nsf_flags |=
 			    (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
 		NFSUNLOCKSTATE();
 		return;
 	}
 
 	/*
 	 * Try and get a reference count on the nfsv4rootfs_lock so that
 	 * no nfsd thread can acquire an exclusive lock on it before this
 	 * call is done. If it is already exclusively locked, just return.
 	 */
 	NFSLOCKV4ROOTMUTEX();
 	got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
 	NFSUNLOCKV4ROOTMUTEX();
 	if (got_ref == 0) {
 		NFSUNLOCKSTATE();
 		return;
 	}
 
 	/*
 	 * For each client...
 	 */
 	for (i = 0; i < nfsrv_clienthashsize; i++) {
 	    clp = LIST_FIRST(&nfsclienthash[i]);
 	    while (clp != LIST_END(&nfsclienthash[i])) {
 		nclp = LIST_NEXT(clp, lc_hash);
 		if (!(clp->lc_flags & LCL_EXPIREIT)) {
 		    if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
 			 && ((LIST_EMPTY(&clp->lc_deleg)
 			      && LIST_EMPTY(&clp->lc_open)) ||
 			     nfsrv_clients > nfsrv_clienthighwater)) ||
 			(clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
 			(clp->lc_expiry < NFSD_MONOSEC &&
 			 (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
 			/*
 			 * Lease has expired several nfsrv_lease times ago:
 			 * PLUS
 			 *    - no state is associated with it
 			 *    OR
 			 *    - above high water mark for number of clients
 			 *      (nfsrv_clienthighwater should be large enough
 			 *       that this only occurs when clients fail to
 			 *       use the same nfs_client_id4.id. Maybe somewhat
 			 *       higher that the maximum number of clients that
 			 *       will mount this server?)
 			 * OR
 			 * Lease has expired a very long time ago
 			 * OR
 			 * Lease has expired PLUS the number of opens + locks
 			 * has exceeded 90% of capacity
 			 *
 			 * --> Mark for expiry. The actual expiry will be done
 			 *     by an nfsd sometime soon.
 			 */
 			clp->lc_flags |= LCL_EXPIREIT;
 			nfsrv_stablefirst.nsf_flags |=
 			    (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
 		    } else {
 			/*
 			 * If there are no opens, increment no open tick cnt
 			 * If time exceeds NFSNOOPEN, mark it to be thrown away
 			 * otherwise, if there is an open, reset no open time
 			 * Hopefully, this will avoid excessive re-creation
 			 * of open owners and subsequent open confirms.
 			 */
 			stp = LIST_FIRST(&clp->lc_open);
 			while (stp != LIST_END(&clp->lc_open)) {
 				nstp = LIST_NEXT(stp, ls_list);
 				if (LIST_EMPTY(&stp->ls_open)) {
 					stp->ls_noopens++;
 					if (stp->ls_noopens > NFSNOOPEN ||
 					    (nfsrv_openpluslock * 2) >
 					    nfsrv_v4statelimit)
 						nfsrv_stablefirst.nsf_flags |=
 							NFSNSF_NOOPENS;
 				} else {
 					stp->ls_noopens = 0;
 				}
 				stp = nstp;
 			}
 		    }
 		}
 		clp = nclp;
 	    }
 	}
 	NFSUNLOCKSTATE();
 	NFSLOCKV4ROOTMUTEX();
 	nfsv4_relref(&nfsv4rootfs_lock);
 	NFSUNLOCKV4ROOTMUTEX();
 }
 
 /*
  * The following set of functions free up the various data structures.
  */
 /*
  * Clear out all open/lock state related to this nfsclient.
  * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
  * there are no other active nfsd threads.
  */
 APPLESTATIC void
 nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
 {
 	struct nfsstate *stp, *nstp;
 	struct nfsdsession *sep, *nsep;
 
 	LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
 		nfsrv_freeopenowner(stp, 1, p);
 	if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
 		LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
 			(void)nfsrv_freesession(sep, NULL);
 }
 
 /*
  * Free a client that has been cleaned. It should also already have been
  * removed from the lists.
  * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
  *  softclock interrupts are enabled.)
  */
 APPLESTATIC void
 nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
 {
 
 #ifdef notyet
 	if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
 	     (LCL_GSS | LCL_CALLBACKSON) &&
 	    (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
 	    clp->lc_handlelen > 0) {
 		clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
 		clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
 		(void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
 			NULL, 0, NULL, NULL, NULL, 0, p);
 	}
 #endif
 	newnfs_disconnect(&clp->lc_req);
 	free(clp->lc_req.nr_nam, M_SONAME);
 	NFSFREEMUTEX(&clp->lc_req.nr_mtx);
 	free(clp->lc_stateid, M_NFSDCLIENT);
 	free(clp, M_NFSDCLIENT);
 	NFSLOCKSTATE();
 	nfsstatsv1.srvclients--;
 	nfsrv_openpluslock--;
 	nfsrv_clients--;
 	NFSUNLOCKSTATE();
 }
 
 /*
  * Free a list of delegation state structures.
  * (This function will also free all nfslockfile structures that no
  *  longer have associated state.)
  */
 APPLESTATIC void
 nfsrv_freedeleglist(struct nfsstatehead *sthp)
 {
 	struct nfsstate *stp, *nstp;
 
 	LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
 		nfsrv_freedeleg(stp);
 	}
 	LIST_INIT(sthp);
 }
 
 /*
  * Free up a delegation.
  */
 static void
 nfsrv_freedeleg(struct nfsstate *stp)
 {
 	struct nfslockfile *lfp;
 
 	LIST_REMOVE(stp, ls_hash);
 	LIST_REMOVE(stp, ls_list);
 	LIST_REMOVE(stp, ls_file);
 	if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
 		nfsrv_writedelegcnt--;
 	lfp = stp->ls_lfp;
 	if (LIST_EMPTY(&lfp->lf_open) &&
 	    LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
 	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
 	    lfp->lf_usecount == 0 &&
 	    nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
 		nfsrv_freenfslockfile(lfp);
 	free(stp, M_NFSDSTATE);
 	nfsstatsv1.srvdelegates--;
 	nfsrv_openpluslock--;
 	nfsrv_delegatecnt--;
 }
 
 /*
  * This function frees an open owner and all associated opens.
  */
 static void
 nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
 {
 	struct nfsstate *nstp, *tstp;
 
 	LIST_REMOVE(stp, ls_list);
 	/*
 	 * Now, free all associated opens.
 	 */
 	nstp = LIST_FIRST(&stp->ls_open);
 	while (nstp != LIST_END(&stp->ls_open)) {
 		tstp = nstp;
 		nstp = LIST_NEXT(nstp, ls_list);
 		(void) nfsrv_freeopen(tstp, NULL, cansleep, p);
 	}
 	if (stp->ls_op)
 		nfsrvd_derefcache(stp->ls_op);
 	free(stp, M_NFSDSTATE);
 	nfsstatsv1.srvopenowners--;
 	nfsrv_openpluslock--;
 }
 
 /*
  * This function frees an open (nfsstate open structure) with all associated
  * lock_owners and locks. It also frees the nfslockfile structure iff there
  * are no other opens on the file.
  * Returns 1 if it free'd the nfslockfile, 0 otherwise.
  */
 static int
 nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
 {
 	struct nfsstate *nstp, *tstp;
 	struct nfslockfile *lfp;
 	int ret;
 
 	LIST_REMOVE(stp, ls_hash);
 	LIST_REMOVE(stp, ls_list);
 	LIST_REMOVE(stp, ls_file);
 
 	lfp = stp->ls_lfp;
 	/*
 	 * Now, free all lockowners associated with this open.
 	 */
 	LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
 		nfsrv_freelockowner(tstp, vp, cansleep, p);
 
 	/*
 	 * The nfslockfile is freed here if there are no locks
 	 * associated with the open.
 	 * If there are locks associated with the open, the
 	 * nfslockfile structure can be freed via nfsrv_freelockowner().
 	 * Acquire the state mutex to avoid races with calls to
 	 * nfsrv_getlockfile().
 	 */
 	if (cansleep != 0)
 		NFSLOCKSTATE();
 	if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
 	    LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
 	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
 	    lfp->lf_usecount == 0 &&
 	    (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
 		nfsrv_freenfslockfile(lfp);
 		ret = 1;
 	} else
 		ret = 0;
 	if (cansleep != 0)
 		NFSUNLOCKSTATE();
 	free(stp, M_NFSDSTATE);
 	nfsstatsv1.srvopens--;
 	nfsrv_openpluslock--;
 	return (ret);
 }
 
 /*
  * Frees a lockowner and all associated locks.
  */
 static void
 nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
     NFSPROC_T *p)
 {
 
 	LIST_REMOVE(stp, ls_hash);
 	LIST_REMOVE(stp, ls_list);
 	nfsrv_freeallnfslocks(stp, vp, cansleep, p);
 	if (stp->ls_op)
 		nfsrvd_derefcache(stp->ls_op);
 	free(stp, M_NFSDSTATE);
 	nfsstatsv1.srvlockowners--;
 	nfsrv_openpluslock--;
 }
 
 /*
  * Free all the nfs locks on a lockowner.
  */
 static void
 nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
     NFSPROC_T *p)
 {
 	struct nfslock *lop, *nlop;
 	struct nfsrollback *rlp, *nrlp;
 	struct nfslockfile *lfp = NULL;
 	int gottvp = 0;
 	vnode_t tvp = NULL;
 	uint64_t first, end;
 
 	if (vp != NULL)
 		ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
 	lop = LIST_FIRST(&stp->ls_lock);
 	while (lop != LIST_END(&stp->ls_lock)) {
 		nlop = LIST_NEXT(lop, lo_lckowner);
 		/*
 		 * Since all locks should be for the same file, lfp should
 		 * not change.
 		 */
 		if (lfp == NULL)
 			lfp = lop->lo_lfp;
 		else if (lfp != lop->lo_lfp)
 			panic("allnfslocks");
 		/*
 		 * If vp is NULL and cansleep != 0, a vnode must be acquired
 		 * from the file handle. This only occurs when called from
 		 * nfsrv_cleanclient().
 		 */
 		if (gottvp == 0) {
 			if (nfsrv_dolocallocks == 0)
 				tvp = NULL;
 			else if (vp == NULL && cansleep != 0) {
 				tvp = nfsvno_getvp(&lfp->lf_fh);
-				NFSVOPUNLOCK(tvp);
+				if (tvp != NULL)
+					NFSVOPUNLOCK(tvp);
 			} else
 				tvp = vp;
 			gottvp = 1;
 		}
 
 		if (tvp != NULL) {
 			if (cansleep == 0)
 				panic("allnfs2");
 			first = lop->lo_first;
 			end = lop->lo_end;
 			nfsrv_freenfslock(lop);
 			nfsrv_localunlock(tvp, lfp, first, end, p);
 			LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
 			    nrlp)
 				free(rlp, M_NFSDROLLBACK);
 			LIST_INIT(&lfp->lf_rollback);
 		} else
 			nfsrv_freenfslock(lop);
 		lop = nlop;
 	}
 	if (vp == NULL && tvp != NULL)
 		vrele(tvp);
 }
 
 /*
  * Free an nfslock structure.
  */
 static void
 nfsrv_freenfslock(struct nfslock *lop)
 {
 
 	if (lop->lo_lckfile.le_prev != NULL) {
 		LIST_REMOVE(lop, lo_lckfile);
 		nfsstatsv1.srvlocks--;
 		nfsrv_openpluslock--;
 	}
 	LIST_REMOVE(lop, lo_lckowner);
 	free(lop, M_NFSDLOCK);
 }
 
 /*
  * This function frees an nfslockfile structure.
  */
 static void
 nfsrv_freenfslockfile(struct nfslockfile *lfp)
 {
 
 	LIST_REMOVE(lfp, lf_hash);
 	free(lfp, M_NFSDLOCKFILE);
 }
 
 /*
  * This function looks up an nfsstate structure via stateid.
  */
 static int
 nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
     struct nfsstate **stpp)
 {
 	struct nfsstate *stp;
 	struct nfsstatehead *hp;
 	int error = 0;
 
 	*stpp = NULL;
 	hp = NFSSTATEHASH(clp, *stateidp);
 	LIST_FOREACH(stp, hp, ls_hash) {
 		if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
 			NFSX_STATEIDOTHER))
 			break;
 	}
 
 	/*
 	 * If no state id in list, return NFSERR_BADSTATEID.
 	 */
 	if (stp == LIST_END(hp)) {
 		error = NFSERR_BADSTATEID;
 		goto out;
 	}
 	*stpp = stp;
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * This function gets an nfsstate structure via owner string.
  */
 static void
 nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
     struct nfsstate **stpp)
 {
 	struct nfsstate *stp;
 
 	*stpp = NULL;
 	LIST_FOREACH(stp, hp, ls_list) {
 		if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
 		  !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
 			*stpp = stp;
 			return;
 		}
 	}
 }
 
 /*
  * Lock control function called to update lock status.
  * Returns 0 upon success, -1 if there is no lock and the flags indicate
  * that one isn't to be created and an NFSERR_xxx for other errors.
  * The structures new_stp and new_lop are passed in as pointers that should
  * be set to NULL if the structure is used and shouldn't be free'd.
  * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
  * never used and can safely be allocated on the stack. For all other
  * cases, *new_stpp and *new_lopp should be malloc'd before the call,
  * in case they are used.
  */
 APPLESTATIC int
 nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
     struct nfslock **new_lopp, struct nfslockconflict *cfp,
     nfsquad_t clientid, nfsv4stateid_t *stateidp,
     __unused struct nfsexstuff *exp,
     struct nfsrv_descript *nd, NFSPROC_T *p)
 {
 	struct nfslock *lop;
 	struct nfsstate *new_stp = *new_stpp;
 	struct nfslock *new_lop = *new_lopp;
 	struct nfsstate *tstp, *mystp, *nstp;
 	int specialid = 0;
 	struct nfslockfile *lfp;
 	struct nfslock *other_lop = NULL;
 	struct nfsstate *stp, *lckstp = NULL;
 	struct nfsclient *clp = NULL;
 	u_int32_t bits;
 	int error = 0, haslock = 0, ret, reterr;
 	int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
 	fhandle_t nfh;
 	uint64_t first, end;
 	uint32_t lock_flags;
 
 	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
 		/*
 		 * Note the special cases of "all 1s" or "all 0s" stateids and
 		 * let reads with all 1s go ahead.
 		 */
 		if (new_stp->ls_stateid.seqid == 0x0 &&
 		    new_stp->ls_stateid.other[0] == 0x0 &&
 		    new_stp->ls_stateid.other[1] == 0x0 &&
 		    new_stp->ls_stateid.other[2] == 0x0)
 			specialid = 1;
 		else if (new_stp->ls_stateid.seqid == 0xffffffff &&
 		    new_stp->ls_stateid.other[0] == 0xffffffff &&
 		    new_stp->ls_stateid.other[1] == 0xffffffff &&
 		    new_stp->ls_stateid.other[2] == 0xffffffff)
 			specialid = 2;
 	}
 
 	/*
 	 * Check for restart conditions (client and server).
 	 */
 	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 	    &new_stp->ls_stateid, specialid);
 	if (error)
 		goto out;
 
 	/*
 	 * Check for state resource limit exceeded.
 	 */
 	if ((new_stp->ls_flags & NFSLCK_LOCK) &&
 	    nfsrv_openpluslock > nfsrv_v4statelimit) {
 		error = NFSERR_RESOURCE;
 		goto out;
 	}
 
 	/*
 	 * For the lock case, get another nfslock structure,
 	 * just in case we need it.
 	 * Malloc now, before we start sifting through the linked lists,
 	 * in case we have to wait for memory.
 	 */
 tryagain:
 	if (new_stp->ls_flags & NFSLCK_LOCK)
 		other_lop = malloc(sizeof (struct nfslock),
 		    M_NFSDLOCK, M_WAITOK);
 	filestruct_locked = 0;
 	reterr = 0;
 	lfp = NULL;
 
 	/*
 	 * Get the lockfile structure for CFH now, so we can do a sanity
 	 * check against the stateid, before incrementing the seqid#, since
 	 * we want to return NFSERR_BADSTATEID on failure and the seqid#
 	 * shouldn't be incremented for this case.
 	 * If nfsrv_getlockfile() returns -1, it means "not found", which
 	 * will be handled later.
 	 * If we are doing Lock/LockU and local locking is enabled, sleep
 	 * lock the nfslockfile structure.
 	 */
 	getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
 	NFSLOCKSTATE();
 	if (getlckret == 0) {
 		if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
 		    nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
 			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
 			    &lfp, &nfh, 1);
 			if (getlckret == 0)
 				filestruct_locked = 1;
 		} else
 			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
 			    &lfp, &nfh, 0);
 	}
 	if (getlckret != 0 && getlckret != -1)
 		reterr = getlckret;
 
 	if (filestruct_locked != 0) {
 		LIST_INIT(&lfp->lf_rollback);
 		if ((new_stp->ls_flags & NFSLCK_LOCK)) {
 			/*
 			 * For local locking, do the advisory locking now, so
 			 * that any conflict can be detected. A failure later
 			 * can be rolled back locally. If an error is returned,
 			 * struct nfslockfile has been unlocked and any local
 			 * locking rolled back.
 			 */
 			NFSUNLOCKSTATE();
 			if (vnode_unlocked == 0) {
 				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
 				vnode_unlocked = 1;
 				NFSVOPUNLOCK(vp);
 			}
 			reterr = nfsrv_locallock(vp, lfp,
 			    (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
 			    new_lop->lo_first, new_lop->lo_end, cfp, p);
 			NFSLOCKSTATE();
 		}
 	}
 
 	if (specialid == 0) {
 	    if (new_stp->ls_flags & NFSLCK_TEST) {
 		/*
 		 * RFC 3530 does not list LockT as an op that renews a
 		 * lease, but the consensus seems to be that it is ok
 		 * for a server to do so.
 		 */
 		error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 		    (nfsquad_t)((u_quad_t)0), 0, nd, p);
 
 		/*
 		 * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
 		 * error returns for LockT, just go ahead and test for a lock,
 		 * since there are no locks for this client, but other locks
 		 * can conflict. (ie. same client will always be false)
 		 */
 		if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
 		    error = 0;
 		lckstp = new_stp;
 	    } else {
 	      error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 		(nfsquad_t)((u_quad_t)0), 0, nd, p);
 	      if (error == 0)
 		/*
 		 * Look up the stateid
 		 */
 		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
 		  new_stp->ls_flags, &stp);
 	      /*
 	       * do some sanity checks for an unconfirmed open or a
 	       * stateid that refers to the wrong file, for an open stateid
 	       */
 	      if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
 		  ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
 		   (getlckret == 0 && stp->ls_lfp != lfp))){
 		      /*
 		       * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID
 		       * The only exception is using SETATTR with SIZE.
 		       * */
                     if ((new_stp->ls_flags &
                          (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR)
 			     error = NFSERR_BADSTATEID;
 	      }
 	      
 		if (error == 0 &&
 		  (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
 		  getlckret == 0 && stp->ls_lfp != lfp)
 			error = NFSERR_BADSTATEID;
 
 	      /*
 	       * If the lockowner stateid doesn't refer to the same file,
 	       * I believe that is considered ok, since some clients will
 	       * only create a single lockowner and use that for all locks
 	       * on all files.
 	       * For now, log it as a diagnostic, instead of considering it
 	       * a BadStateid.
 	       */
 	      if (error == 0 && (stp->ls_flags &
 		  (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
 		  getlckret == 0 && stp->ls_lfp != lfp) {
 #ifdef DIAGNOSTIC
 		  printf("Got a lock statid for different file open\n");
 #endif
 		  /*
 		  error = NFSERR_BADSTATEID;
 		  */
 	      }
 
 	      if (error == 0) {
 		    if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
 			/*
 			 * If haslock set, we've already checked the seqid.
 			 */
 			if (!haslock) {
 			    if (stp->ls_flags & NFSLCK_OPEN)
 				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 				    stp->ls_openowner, new_stp->ls_op);
 			    else
 				error = NFSERR_BADSTATEID;
 			}
 			if (!error)
 			    nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
 			if (lckstp)
 			    /*
 			     * I believe this should be an error, but it
 			     * isn't obvious what NFSERR_xxx would be
 			     * appropriate, so I'll use NFSERR_INVAL for now.
 			     */
 			    error = NFSERR_INVAL;
 			else
 			    lckstp = new_stp;
 		    } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
 			/*
 			 * If haslock set, ditto above.
 			 */
 			if (!haslock) {
 			    if (stp->ls_flags & NFSLCK_OPEN)
 				error = NFSERR_BADSTATEID;
 			    else
 				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 				    stp, new_stp->ls_op);
 			}
 			lckstp = stp;
 		    } else {
 			lckstp = stp;
 		    }
 	      }
 	      /*
 	       * If the seqid part of the stateid isn't the same, return
 	       * NFSERR_OLDSTATEID for cases other than I/O Ops.
 	       * For I/O Ops, only return NFSERR_OLDSTATEID if
 	       * nfsrv_returnoldstateid is set. (The consensus on the email
 	       * list was that most clients would prefer to not receive
 	       * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
 	       * is what will happen, so I use the nfsrv_returnoldstateid to
 	       * allow for either server configuration.)
 	       */
 	      if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
 		  (((nd->nd_flag & ND_NFSV41) == 0 &&
 		   (!(new_stp->ls_flags & NFSLCK_CHECK) ||
 		    nfsrv_returnoldstateid)) ||
 		   ((nd->nd_flag & ND_NFSV41) != 0 &&
 		    new_stp->ls_stateid.seqid != 0)))
 		    error = NFSERR_OLDSTATEID;
 	    }
 	}
 
 	/*
 	 * Now we can check for grace.
 	 */
 	if (!error)
 		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
 	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
 		nfsrv_checkstable(clp))
 		error = NFSERR_NOGRACE;
 	/*
 	 * If we successfully Reclaimed state, note that.
 	 */
 	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
 		nfsrv_markstable(clp);
 
 	/*
 	 * At this point, either error == NFSERR_BADSTATEID or the
 	 * seqid# has been updated, so we can return any error.
 	 * If error == 0, there may be an error in:
 	 *    nd_repstat - Set by the calling function.
 	 *    reterr - Set above, if getting the nfslockfile structure
 	 *       or acquiring the local lock failed.
 	 *    (If both of these are set, nd_repstat should probably be
 	 *     returned, since that error was detected before this
 	 *     function call.)
 	 */
 	if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
 		if (error == 0) {
 			if (nd->nd_repstat != 0)
 				error = nd->nd_repstat;
 			else
 				error = reterr;
 		}
 		if (filestruct_locked != 0) {
 			/* Roll back local locks. */
 			NFSUNLOCKSTATE();
 			if (vnode_unlocked == 0) {
 				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
 				vnode_unlocked = 1;
 				NFSVOPUNLOCK(vp);
 			}
 			nfsrv_locallock_rollback(vp, lfp, p);
 			NFSLOCKSTATE();
 			nfsrv_unlocklf(lfp);
 		}
 		NFSUNLOCKSTATE();
 		goto out;
 	}
 
 	/*
 	 * Check the nfsrv_getlockfile return.
 	 * Returned -1 if no structure found.
 	 */
 	if (getlckret == -1) {
 		error = NFSERR_EXPIRED;
 		/*
 		 * Called from lockt, so no lock is OK.
 		 */
 		if (new_stp->ls_flags & NFSLCK_TEST) {
 			error = 0;
 		} else if (new_stp->ls_flags &
 		    (NFSLCK_CHECK | NFSLCK_SETATTR)) {
 			/*
 			 * Called to check for a lock, OK if the stateid is all
 			 * 1s or all 0s, but there should be an nfsstate
 			 * otherwise.
 			 * (ie. If there is no open, I'll assume no share
 			 *  deny bits.)
 			 */
 			if (specialid)
 				error = 0;
 			else
 				error = NFSERR_BADSTATEID;
 		}
 		NFSUNLOCKSTATE();
 		goto out;
 	}
 
 	/*
 	 * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
 	 * For NFSLCK_CHECK, allow a read if write access is granted,
 	 * but check for a deny. For NFSLCK_LOCK, require correct access,
 	 * which implies a conflicting deny can't exist.
 	 */
 	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
 	    /*
 	     * Four kinds of state id:
 	     * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
 	     * - stateid for an open
 	     * - stateid for a delegation
 	     * - stateid for a lock owner
 	     */
 	    if (!specialid) {
 		if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
 		    delegation = 1;
 		    mystp = stp;
 		    nfsrv_delaydelegtimeout(stp);
 	        } else if (stp->ls_flags & NFSLCK_OPEN) {
 		    mystp = stp;
 		} else {
 		    mystp = stp->ls_openstp;
 		}
 		/*
 		 * If locking or checking, require correct access
 		 * bit set.
 		 */
 		if (((new_stp->ls_flags & NFSLCK_LOCK) &&
 		     !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
 		       mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
 		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
 		      (NFSLCK_CHECK | NFSLCK_READACCESS) &&
 		     !(mystp->ls_flags & NFSLCK_READACCESS) &&
 		     nfsrv_allowreadforwriteopen == 0) ||
 		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
 		      (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
 		     !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
 			if (filestruct_locked != 0) {
 				/* Roll back local locks. */
 				NFSUNLOCKSTATE();
 				if (vnode_unlocked == 0) {
 					ASSERT_VOP_ELOCKED(vp,
 					    "nfsrv_lockctrl3");
 					vnode_unlocked = 1;
 					NFSVOPUNLOCK(vp);
 				}
 				nfsrv_locallock_rollback(vp, lfp, p);
 				NFSLOCKSTATE();
 				nfsrv_unlocklf(lfp);
 			}
 			NFSUNLOCKSTATE();
 			error = NFSERR_OPENMODE;
 			goto out;
 		}
 	    } else
 		mystp = NULL;
 	    if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
 		/*
 		 * Check for a conflicting deny bit.
 		 */
 		LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
 		    if (tstp != mystp) {
 			bits = tstp->ls_flags;
 			bits >>= NFSLCK_SHIFT;
 			if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
 			    KASSERT(vnode_unlocked == 0,
 				("nfsrv_lockctrl: vnode unlocked1"));
 			    ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
 				vp, p);
 			    if (ret == 1) {
 				/*
 				* nfsrv_clientconflict unlocks state
 				 * when it returns non-zero.
 				 */
 				lckstp = NULL;
 				goto tryagain;
 			    }
 			    if (ret == 0)
 				NFSUNLOCKSTATE();
 			    if (ret == 2)
 				error = NFSERR_PERM;
 			    else
 				error = NFSERR_OPENMODE;
 			    goto out;
 			}
 		    }
 		}
 
 		/* We're outta here */
 		NFSUNLOCKSTATE();
 		goto out;
 	    }
 	}
 
 	/*
 	 * For setattr, just get rid of all the Delegations for other clients.
 	 */
 	if (new_stp->ls_flags & NFSLCK_SETATTR) {
 		KASSERT(vnode_unlocked == 0,
 		    ("nfsrv_lockctrl: vnode unlocked2"));
 		ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
 		if (ret) {
 			/*
 			 * nfsrv_cleandeleg() unlocks state when it
 			 * returns non-zero.
 			 */
 			if (ret == -1) {
 				lckstp = NULL;
 				goto tryagain;
 			}
 			error = ret;
 			goto out;
 		}
 		if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
 		    (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
 		     LIST_EMPTY(&lfp->lf_deleg))) {
 			NFSUNLOCKSTATE();
 			goto out;
 		}
 	}
 
 	/*
 	 * Check for a conflicting delegation. If one is found, call
 	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
 	 * been set yet, it will get the lock. Otherwise, it will recall
 	 * the delegation. Then, we try try again...
 	 * I currently believe the conflict algorithm to be:
 	 * For Lock Ops (Lock/LockT/LockU)
 	 * - there is a conflict iff a different client has a write delegation
 	 * For Reading (Read Op)
 	 * - there is a conflict iff a different client has a write delegation
 	 *   (the specialids are always a different client)
 	 * For Writing (Write/Setattr of size)
 	 * - there is a conflict if a different client has any delegation
 	 * - there is a conflict if the same client has a read delegation
 	 *   (I don't understand why this isn't allowed, but that seems to be
 	 *    the current consensus?)
 	 */
 	tstp = LIST_FIRST(&lfp->lf_deleg);
 	while (tstp != LIST_END(&lfp->lf_deleg)) {
 	    nstp = LIST_NEXT(tstp, ls_file);
 	    if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
 		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
 		  (new_lop->lo_flags & NFSLCK_READ))) &&
 		  clp != tstp->ls_clp &&
 		 (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
 		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
 		   (new_lop->lo_flags & NFSLCK_WRITE) &&
 		  (clp != tstp->ls_clp ||
 		   (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
 		ret = 0;
 		if (filestruct_locked != 0) {
 			/* Roll back local locks. */
 			NFSUNLOCKSTATE();
 			if (vnode_unlocked == 0) {
 				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
 				NFSVOPUNLOCK(vp);
 			}
 			nfsrv_locallock_rollback(vp, lfp, p);
 			NFSLOCKSTATE();
 			nfsrv_unlocklf(lfp);
 			NFSUNLOCKSTATE();
 			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 			vnode_unlocked = 0;
 			if (VN_IS_DOOMED(vp))
 				ret = NFSERR_SERVERFAULT;
 			NFSLOCKSTATE();
 		}
 		if (ret == 0)
 			ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
 		if (ret) {
 		    /*
 		     * nfsrv_delegconflict unlocks state when it
 		     * returns non-zero, which it always does.
 		     */
 		    if (other_lop) {
 			free(other_lop, M_NFSDLOCK);
 			other_lop = NULL;
 		    }
 		    if (ret == -1) {
 			lckstp = NULL;
 			goto tryagain;
 		    }
 		    error = ret;
 		    goto out;
 		}
 		/* Never gets here. */
 	    }
 	    tstp = nstp;
 	}
 
 	/*
 	 * Handle the unlock case by calling nfsrv_updatelock().
 	 * (Should I have done some access checking above for unlock? For now,
 	 *  just let it happen.)
 	 */
 	if (new_stp->ls_flags & NFSLCK_UNLOCK) {
 		first = new_lop->lo_first;
 		end = new_lop->lo_end;
 		nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
 		stateidp->seqid = ++(stp->ls_stateid.seqid);
 		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
 			stateidp->seqid = stp->ls_stateid.seqid = 1;
 		stateidp->other[0] = stp->ls_stateid.other[0];
 		stateidp->other[1] = stp->ls_stateid.other[1];
 		stateidp->other[2] = stp->ls_stateid.other[2];
 		if (filestruct_locked != 0) {
 			NFSUNLOCKSTATE();
 			if (vnode_unlocked == 0) {
 				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
 				vnode_unlocked = 1;
 				NFSVOPUNLOCK(vp);
 			}
 			/* Update the local locks. */
 			nfsrv_localunlock(vp, lfp, first, end, p);
 			NFSLOCKSTATE();
 			nfsrv_unlocklf(lfp);
 		}
 		NFSUNLOCKSTATE();
 		goto out;
 	}
 
 	/*
 	 * Search for a conflicting lock. A lock conflicts if:
 	 * - the lock range overlaps and
 	 * - at least one lock is a write lock and
 	 * - it is not owned by the same lock owner
 	 */
 	if (!delegation) {
 	  LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
 	    if (new_lop->lo_end > lop->lo_first &&
 		new_lop->lo_first < lop->lo_end &&
 		(new_lop->lo_flags == NFSLCK_WRITE ||
 		 lop->lo_flags == NFSLCK_WRITE) &&
 		lckstp != lop->lo_stp &&
 		(clp != lop->lo_stp->ls_clp ||
 		 lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
 		 NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
 		    lckstp->ls_ownerlen))) {
 		if (other_lop) {
 		    free(other_lop, M_NFSDLOCK);
 		    other_lop = NULL;
 		}
 		if (vnode_unlocked != 0)
 		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
 			NULL, p);
 		else
 		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
 			vp, p);
 		if (ret == 1) {
 		    if (filestruct_locked != 0) {
 			if (vnode_unlocked == 0) {
 				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
 				NFSVOPUNLOCK(vp);
 			}
 			/* Roll back local locks. */
 			nfsrv_locallock_rollback(vp, lfp, p);
 			NFSLOCKSTATE();
 			nfsrv_unlocklf(lfp);
 			NFSUNLOCKSTATE();
 			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 			vnode_unlocked = 0;
 			if (VN_IS_DOOMED(vp)) {
 				error = NFSERR_SERVERFAULT;
 				goto out;
 			}
 		    }
 		    /*
 		     * nfsrv_clientconflict() unlocks state when it
 		     * returns non-zero.
 		     */
 		    lckstp = NULL;
 		    goto tryagain;
 		}
 		/*
 		 * Found a conflicting lock, so record the conflict and
 		 * return the error.
 		 */
 		if (cfp != NULL && ret == 0) {
 		    cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
 		    cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
 		    cfp->cl_first = lop->lo_first;
 		    cfp->cl_end = lop->lo_end;
 		    cfp->cl_flags = lop->lo_flags;
 		    cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
 		    NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
 			cfp->cl_ownerlen);
 		}
 		if (ret == 2)
 		    error = NFSERR_PERM;
 		else if (new_stp->ls_flags & NFSLCK_RECLAIM)
 		    error = NFSERR_RECLAIMCONFLICT;
 		else if (new_stp->ls_flags & NFSLCK_CHECK)
 		    error = NFSERR_LOCKED;
 		else
 		    error = NFSERR_DENIED;
 		if (filestruct_locked != 0 && ret == 0) {
 			/* Roll back local locks. */
 			NFSUNLOCKSTATE();
 			if (vnode_unlocked == 0) {
 				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
 				vnode_unlocked = 1;
 				NFSVOPUNLOCK(vp);
 			}
 			nfsrv_locallock_rollback(vp, lfp, p);
 			NFSLOCKSTATE();
 			nfsrv_unlocklf(lfp);
 		}
 		if (ret == 0)
 			NFSUNLOCKSTATE();
 		goto out;
 	    }
 	  }
 	}
 
 	/*
 	 * We only get here if there was no lock that conflicted.
 	 */
 	if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
 		NFSUNLOCKSTATE();
 		goto out;
 	}
 
 	/*
 	 * We only get here when we are creating or modifying a lock.
 	 * There are two variants:
 	 * - exist_lock_owner where lock_owner exists
 	 * - open_to_lock_owner with new lock_owner
 	 */
 	first = new_lop->lo_first;
 	end = new_lop->lo_end;
 	lock_flags = new_lop->lo_flags;
 	if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
 		nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
 		stateidp->seqid = ++(lckstp->ls_stateid.seqid);
 		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
 			stateidp->seqid = lckstp->ls_stateid.seqid = 1;
 		stateidp->other[0] = lckstp->ls_stateid.other[0];
 		stateidp->other[1] = lckstp->ls_stateid.other[1];
 		stateidp->other[2] = lckstp->ls_stateid.other[2];
 	} else {
 		/*
 		 * The new open_to_lock_owner case.
 		 * Link the new nfsstate into the lists.
 		 */
 		new_stp->ls_seq = new_stp->ls_opentolockseq;
 		nfsrvd_refcache(new_stp->ls_op);
 		stateidp->seqid = new_stp->ls_stateid.seqid = 1;
 		stateidp->other[0] = new_stp->ls_stateid.other[0] =
 		    clp->lc_clientid.lval[0];
 		stateidp->other[1] = new_stp->ls_stateid.other[1] =
 		    clp->lc_clientid.lval[1];
 		stateidp->other[2] = new_stp->ls_stateid.other[2] =
 		    nfsrv_nextstateindex(clp);
 		new_stp->ls_clp = clp;
 		LIST_INIT(&new_stp->ls_lock);
 		new_stp->ls_openstp = stp;
 		new_stp->ls_lfp = lfp;
 		nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
 		    lfp);
 		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
 		    new_stp, ls_hash);
 		LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
 		*new_lopp = NULL;
 		*new_stpp = NULL;
 		nfsstatsv1.srvlockowners++;
 		nfsrv_openpluslock++;
 	}
 	if (filestruct_locked != 0) {
 		NFSUNLOCKSTATE();
 		nfsrv_locallock_commit(lfp, lock_flags, first, end);
 		NFSLOCKSTATE();
 		nfsrv_unlocklf(lfp);
 	}
 	NFSUNLOCKSTATE();
 
 out:
 	if (haslock) {
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_unlock(&nfsv4rootfs_lock, 1);
 		NFSUNLOCKV4ROOTMUTEX();
 	}
 	if (vnode_unlocked != 0) {
 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (error == 0 && VN_IS_DOOMED(vp))
 			error = NFSERR_SERVERFAULT;
 	}
 	if (other_lop)
 		free(other_lop, M_NFSDLOCK);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Check for state errors for Open.
  * repstat is passed back out as an error if more critical errors
  * are not detected.
  */
 APPLESTATIC int
 nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
     struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
     NFSPROC_T *p, int repstat)
 {
 	struct nfsstate *stp, *nstp;
 	struct nfsclient *clp;
 	struct nfsstate *ownerstp;
 	struct nfslockfile *lfp, *new_lfp;
 	int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
 
 	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
 		readonly = 1;
 	/*
 	 * Check for restart conditions (client and server).
 	 */
 	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 		&new_stp->ls_stateid, 0);
 	if (error)
 		goto out;
 
 	/*
 	 * Check for state resource limit exceeded.
 	 * Technically this should be SMP protected, but the worst
 	 * case error is "out by one or two" on the count when it
 	 * returns NFSERR_RESOURCE and the limit is just a rather
 	 * arbitrary high water mark, so no harm is done.
 	 */
 	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
 		error = NFSERR_RESOURCE;
 		goto out;
 	}
 
 tryagain:
 	new_lfp = malloc(sizeof (struct nfslockfile),
 	    M_NFSDLOCKFILE, M_WAITOK);
 	if (vp)
 		getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
 		    NULL, p);
 	NFSLOCKSTATE();
 	/*
 	 * Get the nfsclient structure.
 	 */
 	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
 
 	/*
 	 * Look up the open owner. See if it needs confirmation and
 	 * check the seq#, as required.
 	 */
 	if (!error)
 		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
 
 	if (!error && ownerstp) {
 		error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
 		    new_stp->ls_op);
 		/*
 		 * If the OpenOwner hasn't been confirmed, assume the
 		 * old one was a replay and this one is ok.
 		 * See: RFC3530 Sec. 14.2.18.
 		 */
 		if (error == NFSERR_BADSEQID &&
 		    (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
 			error = 0;
 	}
 
 	/*
 	 * Check for grace.
 	 */
 	if (!error)
 		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
 	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
 		nfsrv_checkstable(clp))
 		error = NFSERR_NOGRACE;
 
 	/*
 	 * If none of the above errors occurred, let repstat be
 	 * returned.
 	 */
 	if (repstat && !error)
 		error = repstat;
 	if (error) {
 		NFSUNLOCKSTATE();
 		if (haslock) {
 			NFSLOCKV4ROOTMUTEX();
 			nfsv4_unlock(&nfsv4rootfs_lock, 1);
 			NFSUNLOCKV4ROOTMUTEX();
 		}
 		free(new_lfp, M_NFSDLOCKFILE);
 		goto out;
 	}
 
 	/*
 	 * If vp == NULL, the file doesn't exist yet, so return ok.
 	 * (This always happens on the first pass, so haslock must be 0.)
 	 */
 	if (vp == NULL) {
 		NFSUNLOCKSTATE();
 		free(new_lfp, M_NFSDLOCKFILE);
 		goto out;
 	}
 
 	/*
 	 * Get the structure for the underlying file.
 	 */
 	if (getfhret)
 		error = getfhret;
 	else
 		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
 		    NULL, 0);
 	if (new_lfp)
 		free(new_lfp, M_NFSDLOCKFILE);
 	if (error) {
 		NFSUNLOCKSTATE();
 		if (haslock) {
 			NFSLOCKV4ROOTMUTEX();
 			nfsv4_unlock(&nfsv4rootfs_lock, 1);
 			NFSUNLOCKV4ROOTMUTEX();
 		}
 		goto out;
 	}
 
 	/*
 	 * Search for a conflicting open/share.
 	 */
 	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
 	    /*
 	     * For Delegate_Cur, search for the matching Delegation,
 	     * which indicates no conflict.
 	     * An old delegation should have been recovered by the
 	     * client doing a Claim_DELEGATE_Prev, so I won't let
 	     * it match and return NFSERR_EXPIRED. Should I let it
 	     * match?
 	     */
 	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
 		    (((nd->nd_flag & ND_NFSV41) != 0 &&
 		    stateidp->seqid == 0) ||
 		    stateidp->seqid == stp->ls_stateid.seqid) &&
 		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
 			  NFSX_STATEIDOTHER))
 			break;
 	    }
 	    if (stp == LIST_END(&lfp->lf_deleg) ||
 		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
 		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
 		NFSUNLOCKSTATE();
 		if (haslock) {
 			NFSLOCKV4ROOTMUTEX();
 			nfsv4_unlock(&nfsv4rootfs_lock, 1);
 			NFSUNLOCKV4ROOTMUTEX();
 		}
 		error = NFSERR_EXPIRED;
 		goto out;
 	    }
 	}
 
 	/*
 	 * Check for access/deny bit conflicts. I check for the same
 	 * owner as well, in case the client didn't bother.
 	 */
 	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
 		    (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
 		      ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
 		     ((stp->ls_flags & NFSLCK_ACCESSBITS) &
 		      ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
 			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
 			if (ret == 1) {
 				/*
 				 * nfsrv_clientconflict() unlocks
 				 * state when it returns non-zero.
 				 */
 				goto tryagain;
 			}
 			if (ret == 2)
 				error = NFSERR_PERM;
 			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
 				error = NFSERR_RECLAIMCONFLICT;
 			else
 				error = NFSERR_SHAREDENIED;
 			if (ret == 0)
 				NFSUNLOCKSTATE();
 			if (haslock) {
 				NFSLOCKV4ROOTMUTEX();
 				nfsv4_unlock(&nfsv4rootfs_lock, 1);
 				NFSUNLOCKV4ROOTMUTEX();
 			}
 			goto out;
 		}
 	}
 
 	/*
 	 * Check for a conflicting delegation. If one is found, call
 	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
 	 * been set yet, it will get the lock. Otherwise, it will recall
 	 * the delegation. Then, we try try again...
 	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
 	 *  isn't a conflict.)
 	 * I currently believe the conflict algorithm to be:
 	 * For Open with Read Access and Deny None
 	 * - there is a conflict iff a different client has a write delegation
 	 * For Open with other Write Access or any Deny except None
 	 * - there is a conflict if a different client has any delegation
 	 * - there is a conflict if the same client has a read delegation
 	 *   (The current consensus is that this last case should be
 	 *    considered a conflict since the client with a read delegation
 	 *    could have done an Open with ReadAccess and WriteDeny
 	 *    locally and then not have checked for the WriteDeny.)
 	 * Don't check for a Reclaim, since that will be dealt with
 	 * by nfsrv_openctrl().
 	 */
 	if (!(new_stp->ls_flags &
 		(NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
 	    stp = LIST_FIRST(&lfp->lf_deleg);
 	    while (stp != LIST_END(&lfp->lf_deleg)) {
 		nstp = LIST_NEXT(stp, ls_file);
 		if ((readonly && stp->ls_clp != clp &&
 		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
 		    (!readonly && (stp->ls_clp != clp ||
 		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
 			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
 			if (ret) {
 			    /*
 			     * nfsrv_delegconflict() unlocks state
 			     * when it returns non-zero.
 			     */
 			    if (ret == -1)
 				goto tryagain;
 			    error = ret;
 			    goto out;
 			}
 		}
 		stp = nstp;
 	    }
 	}
 	NFSUNLOCKSTATE();
 	if (haslock) {
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_unlock(&nfsv4rootfs_lock, 1);
 		NFSUNLOCKV4ROOTMUTEX();
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Open control function to create/update open state for an open.
  */
 APPLESTATIC int
 nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
     struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
     nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
     NFSPROC_T *p, u_quad_t filerev)
 {
 	struct nfsstate *new_stp = *new_stpp;
 	struct nfsstate *stp, *nstp;
 	struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
 	struct nfslockfile *lfp, *new_lfp;
 	struct nfsclient *clp;
 	int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
 	int readonly = 0, cbret = 1, getfhret = 0;
 	int gotstate = 0, len = 0;
 	u_char *clidp = NULL;
 
 	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
 		readonly = 1;
 	/*
 	 * Check for restart conditions (client and server).
 	 * (Paranoia, should have been detected by nfsrv_opencheck().)
 	 * If an error does show up, return NFSERR_EXPIRED, since the
 	 * the seqid# has already been incremented.
 	 */
 	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 	    &new_stp->ls_stateid, 0);
 	if (error) {
 		printf("Nfsd: openctrl unexpected restart err=%d\n",
 		    error);
 		error = NFSERR_EXPIRED;
 		goto out;
 	}
 
 	clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
 tryagain:
 	new_lfp = malloc(sizeof (struct nfslockfile),
 	    M_NFSDLOCKFILE, M_WAITOK);
 	new_open = malloc(sizeof (struct nfsstate),
 	    M_NFSDSTATE, M_WAITOK);
 	new_deleg = malloc(sizeof (struct nfsstate),
 	    M_NFSDSTATE, M_WAITOK);
 	getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
 	    NULL, p);
 	NFSLOCKSTATE();
 	/*
 	 * Get the client structure. Since the linked lists could be changed
 	 * by other nfsd processes if this process does a tsleep(), one of
 	 * two things must be done.
 	 * 1 - don't tsleep()
 	 * or
 	 * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
 	 *     before using the lists, since this lock stops the other
 	 *     nfsd. This should only be used for rare cases, since it
 	 *     essentially single threads the nfsd.
 	 *     At this time, it is only done for cases where the stable
 	 *     storage file must be written prior to completion of state
 	 *     expiration.
 	 */
 	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
 	if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
 	    clp->lc_program) {
 		/*
 		 * This happens on the first open for a client
 		 * that supports callbacks.
 		 */
 		NFSUNLOCKSTATE();
 		/*
 		 * Although nfsrv_docallback() will sleep, clp won't
 		 * go away, since they are only removed when the
 		 * nfsv4_lock() has blocked the nfsd threads. The
 		 * fields in clp can change, but having multiple
 		 * threads do this Null callback RPC should be
 		 * harmless.
 		 */
 		cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
 		    NULL, 0, NULL, NULL, NULL, 0, p);
 		NFSLOCKSTATE();
 		clp->lc_flags &= ~LCL_NEEDSCBNULL;
 		if (!cbret)
 			clp->lc_flags |= LCL_CALLBACKSON;
 	}
 
 	/*
 	 * Look up the open owner. See if it needs confirmation and
 	 * check the seq#, as required.
 	 */
 	if (!error)
 		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
 
 	if (error) {
 		NFSUNLOCKSTATE();
 		printf("Nfsd: openctrl unexpected state err=%d\n",
 			error);
 		free(new_lfp, M_NFSDLOCKFILE);
 		free(new_open, M_NFSDSTATE);
 		free(new_deleg, M_NFSDSTATE);
 		if (haslock) {
 			NFSLOCKV4ROOTMUTEX();
 			nfsv4_unlock(&nfsv4rootfs_lock, 1);
 			NFSUNLOCKV4ROOTMUTEX();
 		}
 		error = NFSERR_EXPIRED;
 		goto out;
 	}
 
 	if (new_stp->ls_flags & NFSLCK_RECLAIM)
 		nfsrv_markstable(clp);
 
 	/*
 	 * Get the structure for the underlying file.
 	 */
 	if (getfhret)
 		error = getfhret;
 	else
 		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
 		    NULL, 0);
 	if (new_lfp)
 		free(new_lfp, M_NFSDLOCKFILE);
 	if (error) {
 		NFSUNLOCKSTATE();
 		printf("Nfsd openctrl unexpected getlockfile err=%d\n",
 		    error);
 		free(new_open, M_NFSDSTATE);
 		free(new_deleg, M_NFSDSTATE);
 		if (haslock) {
 			NFSLOCKV4ROOTMUTEX();
 			nfsv4_unlock(&nfsv4rootfs_lock, 1);
 			NFSUNLOCKV4ROOTMUTEX();
 		}
 		goto out;
 	}
 
 	/*
 	 * Search for a conflicting open/share.
 	 */
 	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
 	    /*
 	     * For Delegate_Cur, search for the matching Delegation,
 	     * which indicates no conflict.
 	     * An old delegation should have been recovered by the
 	     * client doing a Claim_DELEGATE_Prev, so I won't let
 	     * it match and return NFSERR_EXPIRED. Should I let it
 	     * match?
 	     */
 	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
 		    (((nd->nd_flag & ND_NFSV41) != 0 &&
 		    stateidp->seqid == 0) ||
 		    stateidp->seqid == stp->ls_stateid.seqid) &&
 		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
 			NFSX_STATEIDOTHER))
 			break;
 	    }
 	    if (stp == LIST_END(&lfp->lf_deleg) ||
 		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
 		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
 		NFSUNLOCKSTATE();
 		printf("Nfsd openctrl unexpected expiry\n");
 		free(new_open, M_NFSDSTATE);
 		free(new_deleg, M_NFSDSTATE);
 		if (haslock) {
 			NFSLOCKV4ROOTMUTEX();
 			nfsv4_unlock(&nfsv4rootfs_lock, 1);
 			NFSUNLOCKV4ROOTMUTEX();
 		}
 		error = NFSERR_EXPIRED;
 		goto out;
 	    }
 
 	    /*
 	     * Don't issue a Delegation, since one already exists and
 	     * delay delegation timeout, as required.
 	     */
 	    delegate = 0;
 	    nfsrv_delaydelegtimeout(stp);
 	}
 
 	/*
 	 * Check for access/deny bit conflicts. I also check for the
 	 * same owner, since the client might not have bothered to check.
 	 * Also, note an open for the same file and owner, if found,
 	 * which is all we do here for Delegate_Cur, since conflict
 	 * checking is already done.
 	 */
 	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 		if (ownerstp && stp->ls_openowner == ownerstp)
 			openstp = stp;
 		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
 		    /*
 		     * If another client has the file open, the only
 		     * delegation that can be issued is a Read delegation
 		     * and only if it is a Read open with Deny none.
 		     */
 		    if (clp != stp->ls_clp) {
 			if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
 			    NFSLCK_READACCESS)
 			    writedeleg = 0;
 			else
 			    delegate = 0;
 		    }
 		    if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
 		        ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
 		       ((stp->ls_flags & NFSLCK_ACCESSBITS) &
 		        ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
 			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
 			if (ret == 1) {
 				/*
 				 * nfsrv_clientconflict() unlocks state
 				 * when it returns non-zero.
 				 */
 				free(new_open, M_NFSDSTATE);
 				free(new_deleg, M_NFSDSTATE);
 				openstp = NULL;
 				goto tryagain;
 			}
 			if (ret == 2)
 				error = NFSERR_PERM;
 			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
 				error = NFSERR_RECLAIMCONFLICT;
 			else
 				error = NFSERR_SHAREDENIED;
 			if (ret == 0)
 				NFSUNLOCKSTATE();
 			if (haslock) {
 				NFSLOCKV4ROOTMUTEX();
 				nfsv4_unlock(&nfsv4rootfs_lock, 1);
 				NFSUNLOCKV4ROOTMUTEX();
 			}
 			free(new_open, M_NFSDSTATE);
 			free(new_deleg, M_NFSDSTATE);
 			printf("nfsd openctrl unexpected client cnfl\n");
 			goto out;
 		    }
 		}
 	}
 
 	/*
 	 * Check for a conflicting delegation. If one is found, call
 	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
 	 * been set yet, it will get the lock. Otherwise, it will recall
 	 * the delegation. Then, we try try again...
 	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
 	 *  isn't a conflict.)
 	 * I currently believe the conflict algorithm to be:
 	 * For Open with Read Access and Deny None
 	 * - there is a conflict iff a different client has a write delegation
 	 * For Open with other Write Access or any Deny except None
 	 * - there is a conflict if a different client has any delegation
 	 * - there is a conflict if the same client has a read delegation
 	 *   (The current consensus is that this last case should be
 	 *    considered a conflict since the client with a read delegation
 	 *    could have done an Open with ReadAccess and WriteDeny
 	 *    locally and then not have checked for the WriteDeny.)
 	 */
 	if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
 	    stp = LIST_FIRST(&lfp->lf_deleg);
 	    while (stp != LIST_END(&lfp->lf_deleg)) {
 		nstp = LIST_NEXT(stp, ls_file);
 		if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
 			writedeleg = 0;
 		else
 			delegate = 0;
 		if ((readonly && stp->ls_clp != clp &&
 		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
 		    (!readonly && (stp->ls_clp != clp ||
 		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
 		    if (new_stp->ls_flags & NFSLCK_RECLAIM) {
 			delegate = 2;
 		    } else {
 			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
 			if (ret) {
 			    /*
 			     * nfsrv_delegconflict() unlocks state
 			     * when it returns non-zero.
 			     */
 			    printf("Nfsd openctrl unexpected deleg cnfl\n");
 			    free(new_open, M_NFSDSTATE);
 			    free(new_deleg, M_NFSDSTATE);
 			    if (ret == -1) {
 				openstp = NULL;
 				goto tryagain;
 			    }
 			    error = ret;
 			    goto out;
 			}
 		    }
 		}
 		stp = nstp;
 	    }
 	}
 
 	/*
 	 * We only get here if there was no open that conflicted.
 	 * If an open for the owner exists, or in the access/deny bits.
 	 * Otherwise it is a new open. If the open_owner hasn't been
 	 * confirmed, replace the open with the new one needing confirmation,
 	 * otherwise add the open.
 	 */
 	if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
 	    /*
 	     * Handle NFSLCK_DELEGPREV by searching the old delegations for
 	     * a match. If found, just move the old delegation to the current
 	     * delegation list and issue open. If not found, return
 	     * NFSERR_EXPIRED.
 	     */
 	    LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
 		if (stp->ls_lfp == lfp) {
 		    /* Found it */
 		    if (stp->ls_clp != clp)
 			panic("olddeleg clp");
 		    LIST_REMOVE(stp, ls_list);
 		    LIST_REMOVE(stp, ls_hash);
 		    stp->ls_flags &= ~NFSLCK_OLDDELEG;
 		    stp->ls_stateid.seqid = delegstateidp->seqid = 1;
 		    stp->ls_stateid.other[0] = delegstateidp->other[0] =
 			clp->lc_clientid.lval[0];
 		    stp->ls_stateid.other[1] = delegstateidp->other[1] =
 			clp->lc_clientid.lval[1];
 		    stp->ls_stateid.other[2] = delegstateidp->other[2] =
 			nfsrv_nextstateindex(clp);
 		    stp->ls_compref = nd->nd_compref;
 		    LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
 		    LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 			stp->ls_stateid), stp, ls_hash);
 		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
 			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 		    else
 			*rflagsp |= NFSV4OPEN_READDELEGATE;
 		    clp->lc_delegtime = NFSD_MONOSEC +
 			nfsrv_lease + NFSRV_LEASEDELTA;
 
 		    /*
 		     * Now, do the associated open.
 		     */
 		    new_open->ls_stateid.seqid = 1;
 		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 		    new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
 			NFSLCK_OPEN;
 		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
 			new_open->ls_flags |= (NFSLCK_READACCESS |
 			    NFSLCK_WRITEACCESS);
 		    else
 			new_open->ls_flags |= NFSLCK_READACCESS;
 		    new_open->ls_uid = new_stp->ls_uid;
 		    new_open->ls_lfp = lfp;
 		    new_open->ls_clp = clp;
 		    LIST_INIT(&new_open->ls_open);
 		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 			new_open, ls_hash);
 		    /*
 		     * and handle the open owner
 		     */
 		    if (ownerstp) {
 			new_open->ls_openowner = ownerstp;
 			LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
 		    } else {
 			new_open->ls_openowner = new_stp;
 			new_stp->ls_flags = 0;
 			nfsrvd_refcache(new_stp->ls_op);
 			new_stp->ls_noopens = 0;
 			LIST_INIT(&new_stp->ls_open);
 			LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
 			LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
 			*new_stpp = NULL;
 			nfsstatsv1.srvopenowners++;
 			nfsrv_openpluslock++;
 		    }
 		    openstp = new_open;
 		    new_open = NULL;
 		    nfsstatsv1.srvopens++;
 		    nfsrv_openpluslock++;
 		    break;
 		}
 	    }
 	    if (stp == LIST_END(&clp->lc_olddeleg))
 		error = NFSERR_EXPIRED;
 	} else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
 	    /*
 	     * Scan to see that no delegation for this client and file
 	     * doesn't already exist.
 	     * There also shouldn't yet be an Open for this file and
 	     * openowner.
 	     */
 	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 		if (stp->ls_clp == clp)
 		    break;
 	    }
 	    if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
 		/*
 		 * This is the Claim_Previous case with a delegation
 		 * type != Delegate_None.
 		 */
 		/*
 		 * First, add the delegation. (Although we must issue the
 		 * delegation, we can also ask for an immediate return.)
 		 */
 		new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 		new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
 		    clp->lc_clientid.lval[0];
 		new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
 		    clp->lc_clientid.lval[1];
 		new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
 		    nfsrv_nextstateindex(clp);
 		if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
 		    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 			NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 		    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 		    nfsrv_writedelegcnt++;
 		} else {
 		    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
 			NFSLCK_READACCESS);
 		    *rflagsp |= NFSV4OPEN_READDELEGATE;
 		}
 		new_deleg->ls_uid = new_stp->ls_uid;
 		new_deleg->ls_lfp = lfp;
 		new_deleg->ls_clp = clp;
 		new_deleg->ls_filerev = filerev;
 		new_deleg->ls_compref = nd->nd_compref;
 		LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 		LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 		    new_deleg->ls_stateid), new_deleg, ls_hash);
 		LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
 		new_deleg = NULL;
 		if (delegate == 2 || nfsrv_issuedelegs == 0 ||
 		    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
 		     LCL_CALLBACKSON ||
 		    NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
 		    !NFSVNO_DELEGOK(vp))
 		    *rflagsp |= NFSV4OPEN_RECALL;
 		nfsstatsv1.srvdelegates++;
 		nfsrv_openpluslock++;
 		nfsrv_delegatecnt++;
 
 		/*
 		 * Now, do the associated open.
 		 */
 		new_open->ls_stateid.seqid = 1;
 		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
 		    NFSLCK_OPEN;
 		if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
 			new_open->ls_flags |= (NFSLCK_READACCESS |
 			    NFSLCK_WRITEACCESS);
 		else
 			new_open->ls_flags |= NFSLCK_READACCESS;
 		new_open->ls_uid = new_stp->ls_uid;
 		new_open->ls_lfp = lfp;
 		new_open->ls_clp = clp;
 		LIST_INIT(&new_open->ls_open);
 		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 		   new_open, ls_hash);
 		/*
 		 * and handle the open owner
 		 */
 		if (ownerstp) {
 		    new_open->ls_openowner = ownerstp;
 		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
 		} else {
 		    new_open->ls_openowner = new_stp;
 		    new_stp->ls_flags = 0;
 		    nfsrvd_refcache(new_stp->ls_op);
 		    new_stp->ls_noopens = 0;
 		    LIST_INIT(&new_stp->ls_open);
 		    LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
 		    LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
 		    *new_stpp = NULL;
 		    nfsstatsv1.srvopenowners++;
 		    nfsrv_openpluslock++;
 		}
 		openstp = new_open;
 		new_open = NULL;
 		nfsstatsv1.srvopens++;
 		nfsrv_openpluslock++;
 	    } else {
 		error = NFSERR_RECLAIMCONFLICT;
 	    }
 	} else if (ownerstp) {
 		if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
 		    /* Replace the open */
 		    if (ownerstp->ls_op)
 			nfsrvd_derefcache(ownerstp->ls_op);
 		    ownerstp->ls_op = new_stp->ls_op;
 		    nfsrvd_refcache(ownerstp->ls_op);
 		    ownerstp->ls_seq = new_stp->ls_seq;
 		    *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
 		    stp = LIST_FIRST(&ownerstp->ls_open);
 		    stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
 			NFSLCK_OPEN;
 		    stp->ls_stateid.seqid = 1;
 		    stp->ls_uid = new_stp->ls_uid;
 		    if (lfp != stp->ls_lfp) {
 			LIST_REMOVE(stp, ls_file);
 			LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
 			stp->ls_lfp = lfp;
 		    }
 		    openstp = stp;
 		} else if (openstp) {
 		    openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
 		    openstp->ls_stateid.seqid++;
 		    if ((nd->nd_flag & ND_NFSV41) != 0 &&
 			openstp->ls_stateid.seqid == 0)
 			openstp->ls_stateid.seqid = 1;
 
 		    /*
 		     * This is where we can choose to issue a delegation.
 		     */
 		    if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
 			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
 		    else if (nfsrv_issuedelegs == 0)
 			*rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
 		    else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
 			*rflagsp |= NFSV4OPEN_WDRESOURCE;
 		    else if (delegate == 0 || writedeleg == 0 ||
 			NFSVNO_EXRDONLY(exp) || (readonly != 0 &&
 			nfsrv_writedelegifpos == 0) ||
 			!NFSVNO_DELEGOK(vp) ||
 			(new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 ||
 			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
 			 LCL_CALLBACKSON)
 			*rflagsp |= NFSV4OPEN_WDCONTENTION;
 		    else {
 			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
 			    = clp->lc_clientid.lval[0];
 			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
 			    = clp->lc_clientid.lval[1];
 			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
 			    = nfsrv_nextstateindex(clp);
 			new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 			    NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 			new_deleg->ls_uid = new_stp->ls_uid;
 			new_deleg->ls_lfp = lfp;
 			new_deleg->ls_clp = clp;
 			new_deleg->ls_filerev = filerev;
 			new_deleg->ls_compref = nd->nd_compref;
 			nfsrv_writedelegcnt++;
 			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 			    new_deleg->ls_stateid), new_deleg, ls_hash);
 			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
 			new_deleg = NULL;
 			nfsstatsv1.srvdelegates++;
 			nfsrv_openpluslock++;
 			nfsrv_delegatecnt++;
 		    }
 		} else {
 		    new_open->ls_stateid.seqid = 1;
 		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 		    new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
 			NFSLCK_OPEN;
 		    new_open->ls_uid = new_stp->ls_uid;
 		    new_open->ls_openowner = ownerstp;
 		    new_open->ls_lfp = lfp;
 		    new_open->ls_clp = clp;
 		    LIST_INIT(&new_open->ls_open);
 		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
 		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 			new_open, ls_hash);
 		    openstp = new_open;
 		    new_open = NULL;
 		    nfsstatsv1.srvopens++;
 		    nfsrv_openpluslock++;
 
 		    /*
 		     * This is where we can choose to issue a delegation.
 		     */
 		    if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
 			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
 		    else if (nfsrv_issuedelegs == 0)
 			*rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
 		    else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
 			*rflagsp |= NFSV4OPEN_WDRESOURCE;
 		    else if (delegate == 0 || (writedeleg == 0 &&
 			readonly == 0) || !NFSVNO_DELEGOK(vp) ||
 			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
 			 LCL_CALLBACKSON)
 			*rflagsp |= NFSV4OPEN_WDCONTENTION;
 		    else {
 			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
 			    = clp->lc_clientid.lval[0];
 			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
 			    = clp->lc_clientid.lval[1];
 			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
 			    = nfsrv_nextstateindex(clp);
 			if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
 			    (nfsrv_writedelegifpos || !readonly) &&
 			    (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) {
 			    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 				NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 			    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 			    nfsrv_writedelegcnt++;
 			} else {
 			    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
 				NFSLCK_READACCESS);
 			    *rflagsp |= NFSV4OPEN_READDELEGATE;
 			}
 			new_deleg->ls_uid = new_stp->ls_uid;
 			new_deleg->ls_lfp = lfp;
 			new_deleg->ls_clp = clp;
 			new_deleg->ls_filerev = filerev;
 			new_deleg->ls_compref = nd->nd_compref;
 			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 			    new_deleg->ls_stateid), new_deleg, ls_hash);
 			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
 			new_deleg = NULL;
 			nfsstatsv1.srvdelegates++;
 			nfsrv_openpluslock++;
 			nfsrv_delegatecnt++;
 		    }
 		}
 	} else {
 		/*
 		 * New owner case. Start the open_owner sequence with a
 		 * Needs confirmation (unless a reclaim) and hang the
 		 * new open off it.
 		 */
 		new_open->ls_stateid.seqid = 1;
 		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
 		    NFSLCK_OPEN;
 		new_open->ls_uid = new_stp->ls_uid;
 		LIST_INIT(&new_open->ls_open);
 		new_open->ls_openowner = new_stp;
 		new_open->ls_lfp = lfp;
 		new_open->ls_clp = clp;
 		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 		if (new_stp->ls_flags & NFSLCK_RECLAIM) {
 			new_stp->ls_flags = 0;
 		} else if ((nd->nd_flag & ND_NFSV41) != 0) {
 			/* NFSv4.1 never needs confirmation. */
 			new_stp->ls_flags = 0;
 
 			/*
 			 * This is where we can choose to issue a delegation.
 			 */
 			if (delegate && nfsrv_issuedelegs &&
 			    (writedeleg || readonly) &&
 			    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
 			     LCL_CALLBACKSON &&
 			    !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
 			    NFSVNO_DELEGOK(vp) &&
 			    ((nd->nd_flag & ND_NFSV41) == 0 ||
 			     (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) {
 				new_deleg->ls_stateid.seqid =
 				    delegstateidp->seqid = 1;
 				new_deleg->ls_stateid.other[0] =
 				    delegstateidp->other[0]
 				    = clp->lc_clientid.lval[0];
 				new_deleg->ls_stateid.other[1] =
 				    delegstateidp->other[1]
 				    = clp->lc_clientid.lval[1];
 				new_deleg->ls_stateid.other[2] =
 				    delegstateidp->other[2]
 				    = nfsrv_nextstateindex(clp);
 				if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
 				    (nfsrv_writedelegifpos || !readonly) &&
 				    ((nd->nd_flag & ND_NFSV41) == 0 ||
 				     (new_stp->ls_flags & NFSLCK_WANTRDELEG) ==
 				     0)) {
 					new_deleg->ls_flags =
 					    (NFSLCK_DELEGWRITE |
 					     NFSLCK_READACCESS |
 					     NFSLCK_WRITEACCESS);
 					*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 					nfsrv_writedelegcnt++;
 				} else {
 					new_deleg->ls_flags =
 					    (NFSLCK_DELEGREAD |
 					     NFSLCK_READACCESS);
 					*rflagsp |= NFSV4OPEN_READDELEGATE;
 				}
 				new_deleg->ls_uid = new_stp->ls_uid;
 				new_deleg->ls_lfp = lfp;
 				new_deleg->ls_clp = clp;
 				new_deleg->ls_filerev = filerev;
 				new_deleg->ls_compref = nd->nd_compref;
 				LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg,
 				    ls_file);
 				LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 				    new_deleg->ls_stateid), new_deleg, ls_hash);
 				LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg,
 				    ls_list);
 				new_deleg = NULL;
 				nfsstatsv1.srvdelegates++;
 				nfsrv_openpluslock++;
 				nfsrv_delegatecnt++;
 			}
 			/*
 			 * Since NFSv4.1 never does an OpenConfirm, the first
 			 * open state will be acquired here.
 			 */
 			if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
 				clp->lc_flags |= LCL_STAMPEDSTABLE;
 				len = clp->lc_idlen;
 				NFSBCOPY(clp->lc_id, clidp, len);
 				gotstate = 1;
 			}
 		} else {
 			*rflagsp |= NFSV4OPEN_RESULTCONFIRM;
 			new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
 		}
 		nfsrvd_refcache(new_stp->ls_op);
 		new_stp->ls_noopens = 0;
 		LIST_INIT(&new_stp->ls_open);
 		LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
 		LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
 		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 		    new_open, ls_hash);
 		openstp = new_open;
 		new_open = NULL;
 		*new_stpp = NULL;
 		nfsstatsv1.srvopens++;
 		nfsrv_openpluslock++;
 		nfsstatsv1.srvopenowners++;
 		nfsrv_openpluslock++;
 	}
 	if (!error) {
 		stateidp->seqid = openstp->ls_stateid.seqid;
 		stateidp->other[0] = openstp->ls_stateid.other[0];
 		stateidp->other[1] = openstp->ls_stateid.other[1];
 		stateidp->other[2] = openstp->ls_stateid.other[2];
 	}
 	NFSUNLOCKSTATE();
 	if (haslock) {
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_unlock(&nfsv4rootfs_lock, 1);
 		NFSUNLOCKV4ROOTMUTEX();
 	}
 	if (new_open)
 		free(new_open, M_NFSDSTATE);
 	if (new_deleg)
 		free(new_deleg, M_NFSDSTATE);
 
 	/*
 	 * If the NFSv4.1 client just acquired its first open, write a timestamp
 	 * to the stable storage file.
 	 */
 	if (gotstate != 0) {
 		nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
 		nfsrv_backupstable();
 	}
 
 out:
 	free(clidp, M_TEMP);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Open update. Does the confirm, downgrade and close.
  */
 APPLESTATIC int
 nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
     nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p,
     int *retwriteaccessp)
 {
 	struct nfsstate *stp;
 	struct nfsclient *clp;
 	struct nfslockfile *lfp;
 	u_int32_t bits;
 	int error = 0, gotstate = 0, len = 0;
 	u_char *clidp = NULL;
 
 	/*
 	 * Check for restart conditions (client and server).
 	 */
 	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 	    &new_stp->ls_stateid, 0);
 	if (error)
 		goto out;
 
 	clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
 	NFSLOCKSTATE();
 	/*
 	 * Get the open structure via clientid and stateid.
 	 */
 	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
 	if (!error)
 		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
 		    new_stp->ls_flags, &stp);
 
 	/*
 	 * Sanity check the open.
 	 */
 	if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
 		(!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
 		 (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
 		((new_stp->ls_flags & NFSLCK_CONFIRM) &&
 		 (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
 		error = NFSERR_BADSTATEID;
 
 	if (!error)
 		error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 		    stp->ls_openowner, new_stp->ls_op);
 	if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
 	    (((nd->nd_flag & ND_NFSV41) == 0 &&
 	      !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
 	     ((nd->nd_flag & ND_NFSV41) != 0 &&
 	      new_stp->ls_stateid.seqid != 0)))
 		error = NFSERR_OLDSTATEID;
 	if (!error && vnode_vtype(vp) != VREG) {
 		if (vnode_vtype(vp) == VDIR)
 			error = NFSERR_ISDIR;
 		else
 			error = NFSERR_INVAL;
 	}
 
 	if (error) {
 		/*
 		 * If a client tries to confirm an Open with a bad
 		 * seqid# and there are no byte range locks or other Opens
 		 * on the openowner, just throw it away, so the next use of the
 		 * openowner will start a fresh seq#.
 		 */
 		if (error == NFSERR_BADSEQID &&
 		    (new_stp->ls_flags & NFSLCK_CONFIRM) &&
 		    nfsrv_nootherstate(stp))
 			nfsrv_freeopenowner(stp->ls_openowner, 0, p);
 		NFSUNLOCKSTATE();
 		goto out;
 	}
 
 	/*
 	 * Set the return stateid.
 	 */
 	stateidp->seqid = stp->ls_stateid.seqid + 1;
 	if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
 		stateidp->seqid = 1;
 	stateidp->other[0] = stp->ls_stateid.other[0];
 	stateidp->other[1] = stp->ls_stateid.other[1];
 	stateidp->other[2] = stp->ls_stateid.other[2];
 	/*
 	 * Now, handle the three cases.
 	 */
 	if (new_stp->ls_flags & NFSLCK_CONFIRM) {
 		/*
 		 * If the open doesn't need confirmation, it seems to me that
 		 * there is a client error, but I'll just log it and keep going?
 		 */
 		if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
 			printf("Nfsv4d: stray open confirm\n");
 		stp->ls_openowner->ls_flags = 0;
 		stp->ls_stateid.seqid++;
 		if ((nd->nd_flag & ND_NFSV41) != 0 &&
 		    stp->ls_stateid.seqid == 0)
 			stp->ls_stateid.seqid = 1;
 		if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
 			clp->lc_flags |= LCL_STAMPEDSTABLE;
 			len = clp->lc_idlen;
 			NFSBCOPY(clp->lc_id, clidp, len);
 			gotstate = 1;
 		}
 		NFSUNLOCKSTATE();
 	} else if (new_stp->ls_flags & NFSLCK_CLOSE) {
 		lfp = stp->ls_lfp;
 		if (retwriteaccessp != NULL) {
 			if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0)
 				*retwriteaccessp = 1;
 			else
 				*retwriteaccessp = 0;
 		}
 		if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
 			/* Get the lf lock */
 			nfsrv_locklf(lfp);
 			NFSUNLOCKSTATE();
 			ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
 			NFSVOPUNLOCK(vp);
 			if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
 				NFSLOCKSTATE();
 				nfsrv_unlocklf(lfp);
 				NFSUNLOCKSTATE();
 			}
 			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 		} else {
 			(void) nfsrv_freeopen(stp, NULL, 0, p);
 			NFSUNLOCKSTATE();
 		}
 	} else {
 		/*
 		 * Update the share bits, making sure that the new set are a
 		 * subset of the old ones.
 		 */
 		bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
 		if (~(stp->ls_flags) & bits) {
 			NFSUNLOCKSTATE();
 			error = NFSERR_INVAL;
 			goto out;
 		}
 		stp->ls_flags = (bits | NFSLCK_OPEN);
 		stp->ls_stateid.seqid++;
 		if ((nd->nd_flag & ND_NFSV41) != 0 &&
 		    stp->ls_stateid.seqid == 0)
 			stp->ls_stateid.seqid = 1;
 		NFSUNLOCKSTATE();
 	}
 
 	/*
 	 * If the client just confirmed its first open, write a timestamp
 	 * to the stable storage file.
 	 */
 	if (gotstate != 0) {
 		nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
 		nfsrv_backupstable();
 	}
 
 out:
 	free(clidp, M_TEMP);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Delegation update. Does the purge and return.
  */
 APPLESTATIC int
 nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
     nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
     NFSPROC_T *p, int *retwriteaccessp)
 {
 	struct nfsstate *stp;
 	struct nfsclient *clp;
 	int error = 0;
 	fhandle_t fh;
 
 	/*
 	 * Do a sanity check against the file handle for DelegReturn.
 	 */
 	if (vp) {
 		error = nfsvno_getfh(vp, &fh, p);
 		if (error)
 			goto out;
 	}
 	/*
 	 * Check for restart conditions (client and server).
 	 */
 	if (op == NFSV4OP_DELEGRETURN)
 		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
 			stateidp, 0);
 	else
 		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
 			stateidp, 0);
 
 	NFSLOCKSTATE();
 	/*
 	 * Get the open structure via clientid and stateid.
 	 */
 	if (!error)
 	    error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 		(nfsquad_t)((u_quad_t)0), 0, nd, p);
 	if (error) {
 		if (error == NFSERR_CBPATHDOWN)
 			error = 0;
 		if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
 			error = NFSERR_STALESTATEID;
 	}
 	if (!error && op == NFSV4OP_DELEGRETURN) {
 	    error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
 	    if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
 		((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
 		error = NFSERR_OLDSTATEID;
 	}
 	/*
 	 * NFSERR_EXPIRED means that the state has gone away,
 	 * so Delegations have been purged. Just return ok.
 	 */
 	if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
 		NFSUNLOCKSTATE();
 		error = 0;
 		goto out;
 	}
 	if (error) {
 		NFSUNLOCKSTATE();
 		goto out;
 	}
 
 	if (op == NFSV4OP_DELEGRETURN) {
 		if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
 		    sizeof (fhandle_t))) {
 			NFSUNLOCKSTATE();
 			error = NFSERR_BADSTATEID;
 			goto out;
 		}
 		if (retwriteaccessp != NULL) {
 			if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
 				*retwriteaccessp = 1;
 			else
 				*retwriteaccessp = 0;
 		}
 		nfsrv_freedeleg(stp);
 	} else {
 		nfsrv_freedeleglist(&clp->lc_olddeleg);
 	}
 	NFSUNLOCKSTATE();
 	error = 0;
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Release lock owner.
  */
 APPLESTATIC int
 nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
     NFSPROC_T *p)
 {
 	struct nfsstate *stp, *nstp, *openstp, *ownstp;
 	struct nfsclient *clp;
 	int error = 0;
 
 	/*
 	 * Check for restart conditions (client and server).
 	 */
 	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 	    &new_stp->ls_stateid, 0);
 	if (error)
 		goto out;
 
 	NFSLOCKSTATE();
 	/*
 	 * Get the lock owner by name.
 	 */
 	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 	    (nfsquad_t)((u_quad_t)0), 0, NULL, p);
 	if (error) {
 		NFSUNLOCKSTATE();
 		goto out;
 	}
 	LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
 	    LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
 		stp = LIST_FIRST(&openstp->ls_open);
 		while (stp != LIST_END(&openstp->ls_open)) {
 		    nstp = LIST_NEXT(stp, ls_list);
 		    /*
 		     * If the owner matches, check for locks and
 		     * then free or return an error.
 		     */
 		    if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
 			!NFSBCMP(stp->ls_owner, new_stp->ls_owner,
 			 stp->ls_ownerlen)){
 			if (LIST_EMPTY(&stp->ls_lock)) {
 			    nfsrv_freelockowner(stp, NULL, 0, p);
 			} else {
 			    NFSUNLOCKSTATE();
 			    error = NFSERR_LOCKSHELD;
 			    goto out;
 			}
 		    }
 		    stp = nstp;
 		}
 	    }
 	}
 	NFSUNLOCKSTATE();
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Get the file handle for a lock structure.
  */
 static int
 nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
     fhandle_t *nfhp, NFSPROC_T *p)
 {
 	fhandle_t *fhp = NULL;
 	int error;
 
 	/*
 	 * For lock, use the new nfslock structure, otherwise just
 	 * a fhandle_t on the stack.
 	 */
 	if (flags & NFSLCK_OPEN) {
 		KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
 		fhp = &new_lfp->lf_fh;
 	} else if (nfhp) {
 		fhp = nfhp;
 	} else {
 		panic("nfsrv_getlockfh");
 	}
 	error = nfsvno_getfh(vp, fhp, p);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Get an nfs lock structure. Allocate one, as required, and return a
  * pointer to it.
  * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
  */
 static int
 nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
     struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
 {
 	struct nfslockfile *lfp;
 	fhandle_t *fhp = NULL, *tfhp;
 	struct nfslockhashhead *hp;
 	struct nfslockfile *new_lfp = NULL;
 
 	/*
 	 * For lock, use the new nfslock structure, otherwise just
 	 * a fhandle_t on the stack.
 	 */
 	if (flags & NFSLCK_OPEN) {
 		new_lfp = *new_lfpp;
 		fhp = &new_lfp->lf_fh;
 	} else if (nfhp) {
 		fhp = nfhp;
 	} else {
 		panic("nfsrv_getlockfile");
 	}
 
 	hp = NFSLOCKHASH(fhp);
 	LIST_FOREACH(lfp, hp, lf_hash) {
 		tfhp = &lfp->lf_fh;
 		if (NFSVNO_CMPFH(fhp, tfhp)) {
 			if (lockit)
 				nfsrv_locklf(lfp);
 			*lfpp = lfp;
 			return (0);
 		}
 	}
 	if (!(flags & NFSLCK_OPEN))
 		return (-1);
 
 	/*
 	 * No match, so chain the new one into the list.
 	 */
 	LIST_INIT(&new_lfp->lf_open);
 	LIST_INIT(&new_lfp->lf_lock);
 	LIST_INIT(&new_lfp->lf_deleg);
 	LIST_INIT(&new_lfp->lf_locallock);
 	LIST_INIT(&new_lfp->lf_rollback);
 	new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
 	new_lfp->lf_locallock_lck.nfslock_lock = 0;
 	new_lfp->lf_usecount = 0;
 	LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
 	*lfpp = new_lfp;
 	*new_lfpp = NULL;
 	return (0);
 }
 
 /*
  * This function adds a nfslock lock structure to the list for the associated
  * nfsstate and nfslockfile structures. It will be inserted after the
  * entry pointed at by insert_lop.
  */
 static void
 nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
     struct nfsstate *stp, struct nfslockfile *lfp)
 {
 	struct nfslock *lop, *nlop;
 
 	new_lop->lo_stp = stp;
 	new_lop->lo_lfp = lfp;
 
 	if (stp != NULL) {
 		/* Insert in increasing lo_first order */
 		lop = LIST_FIRST(&lfp->lf_lock);
 		if (lop == LIST_END(&lfp->lf_lock) ||
 		    new_lop->lo_first <= lop->lo_first) {
 			LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
 		} else {
 			nlop = LIST_NEXT(lop, lo_lckfile);
 			while (nlop != LIST_END(&lfp->lf_lock) &&
 			       nlop->lo_first < new_lop->lo_first) {
 				lop = nlop;
 				nlop = LIST_NEXT(lop, lo_lckfile);
 			}
 			LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
 		}
 	} else {
 		new_lop->lo_lckfile.le_prev = NULL;	/* list not used */
 	}
 
 	/*
 	 * Insert after insert_lop, which is overloaded as stp or lfp for
 	 * an empty list.
 	 */
 	if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
 		LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
 	else if ((struct nfsstate *)insert_lop == stp)
 		LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
 	else
 		LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
 	if (stp != NULL) {
 		nfsstatsv1.srvlocks++;
 		nfsrv_openpluslock++;
 	}
 }
 
 /*
  * This function updates the locking for a lock owner and given file. It
  * maintains a list of lock ranges ordered on increasing file offset that
  * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
  * It always adds new_lop to the list and sometimes uses the one pointed
  * at by other_lopp.
  */
 static void
 nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
     struct nfslock **other_lopp, struct nfslockfile *lfp)
 {
 	struct nfslock *new_lop = *new_lopp;
 	struct nfslock *lop, *tlop, *ilop;
 	struct nfslock *other_lop = *other_lopp;
 	int unlock = 0, myfile = 0;
 	u_int64_t tmp;
 
 	/*
 	 * Work down the list until the lock is merged.
 	 */
 	if (new_lop->lo_flags & NFSLCK_UNLOCK)
 		unlock = 1;
 	if (stp != NULL) {
 		ilop = (struct nfslock *)stp;
 		lop = LIST_FIRST(&stp->ls_lock);
 	} else {
 		ilop = (struct nfslock *)lfp;
 		lop = LIST_FIRST(&lfp->lf_locallock);
 	}
 	while (lop != NULL) {
 	    /*
 	     * Only check locks for this file that aren't before the start of
 	     * new lock's range.
 	     */
 	    if (lop->lo_lfp == lfp) {
 	      myfile = 1;
 	      if (lop->lo_end >= new_lop->lo_first) {
 		if (new_lop->lo_end < lop->lo_first) {
 			/*
 			 * If the new lock ends before the start of the
 			 * current lock's range, no merge, just insert
 			 * the new lock.
 			 */
 			break;
 		}
 		if (new_lop->lo_flags == lop->lo_flags ||
 		    (new_lop->lo_first <= lop->lo_first &&
 		     new_lop->lo_end >= lop->lo_end)) {
 			/*
 			 * This lock can be absorbed by the new lock/unlock.
 			 * This happens when it covers the entire range
 			 * of the old lock or is contiguous
 			 * with the old lock and is of the same type or an
 			 * unlock.
 			 */
 			if (lop->lo_first < new_lop->lo_first)
 				new_lop->lo_first = lop->lo_first;
 			if (lop->lo_end > new_lop->lo_end)
 				new_lop->lo_end = lop->lo_end;
 			tlop = lop;
 			lop = LIST_NEXT(lop, lo_lckowner);
 			nfsrv_freenfslock(tlop);
 			continue;
 		}
 
 		/*
 		 * All these cases are for contiguous locks that are not the
 		 * same type, so they can't be merged.
 		 */
 		if (new_lop->lo_first <= lop->lo_first) {
 			/*
 			 * This case is where the new lock overlaps with the
 			 * first part of the old lock. Move the start of the
 			 * old lock to just past the end of the new lock. The
 			 * new lock will be inserted in front of the old, since
 			 * ilop hasn't been updated. (We are done now.)
 			 */
 			lop->lo_first = new_lop->lo_end;
 			break;
 		}
 		if (new_lop->lo_end >= lop->lo_end) {
 			/*
 			 * This case is where the new lock overlaps with the
 			 * end of the old lock's range. Move the old lock's
 			 * end to just before the new lock's first and insert
 			 * the new lock after the old lock.
 			 * Might not be done yet, since the new lock could
 			 * overlap further locks with higher ranges.
 			 */
 			lop->lo_end = new_lop->lo_first;
 			ilop = lop;
 			lop = LIST_NEXT(lop, lo_lckowner);
 			continue;
 		}
 		/*
 		 * The final case is where the new lock's range is in the
 		 * middle of the current lock's and splits the current lock
 		 * up. Use *other_lopp to handle the second part of the
 		 * split old lock range. (We are done now.)
 		 * For unlock, we use new_lop as other_lop and tmp, since
 		 * other_lop and new_lop are the same for this case.
 		 * We noted the unlock case above, so we don't need
 		 * new_lop->lo_flags any longer.
 		 */
 		tmp = new_lop->lo_first;
 		if (other_lop == NULL) {
 			if (!unlock)
 				panic("nfsd srv update unlock");
 			other_lop = new_lop;
 			*new_lopp = NULL;
 		}
 		other_lop->lo_first = new_lop->lo_end;
 		other_lop->lo_end = lop->lo_end;
 		other_lop->lo_flags = lop->lo_flags;
 		other_lop->lo_stp = stp;
 		other_lop->lo_lfp = lfp;
 		lop->lo_end = tmp;
 		nfsrv_insertlock(other_lop, lop, stp, lfp);
 		*other_lopp = NULL;
 		ilop = lop;
 		break;
 	      }
 	    }
 	    ilop = lop;
 	    lop = LIST_NEXT(lop, lo_lckowner);
 	    if (myfile && (lop == NULL || lop->lo_lfp != lfp))
 		break;
 	}
 
 	/*
 	 * Insert the new lock in the list at the appropriate place.
 	 */
 	if (!unlock) {
 		nfsrv_insertlock(new_lop, ilop, stp, lfp);
 		*new_lopp = NULL;
 	}
 }
 
 /*
  * This function handles sequencing of locks, etc.
  * It returns an error that indicates what the caller should do.
  */
 static int
 nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
     struct nfsstate *stp, struct nfsrvcache *op)
 {
 	int error = 0;
 
 	if ((nd->nd_flag & ND_NFSV41) != 0)
 		/* NFSv4.1 ignores the open_seqid and lock_seqid. */
 		goto out;
 	if (op != nd->nd_rp)
 		panic("nfsrvstate checkseqid");
 	if (!(op->rc_flag & RC_INPROG))
 		panic("nfsrvstate not inprog");
 	if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
 		printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
 		panic("nfsrvstate op refcnt");
 	}
 	if ((stp->ls_seq + 1) == seqid) {
 		if (stp->ls_op)
 			nfsrvd_derefcache(stp->ls_op);
 		stp->ls_op = op;
 		nfsrvd_refcache(op);
 		stp->ls_seq = seqid;
 		goto out;
 	} else if (stp->ls_seq == seqid && stp->ls_op &&
 		op->rc_xid == stp->ls_op->rc_xid &&
 		op->rc_refcnt == 0 &&
 		op->rc_reqlen == stp->ls_op->rc_reqlen &&
 		op->rc_cksum == stp->ls_op->rc_cksum) {
 		if (stp->ls_op->rc_flag & RC_INPROG) {
 			error = NFSERR_DONTREPLY;
 			goto out;
 		}
 		nd->nd_rp = stp->ls_op;
 		nd->nd_rp->rc_flag |= RC_INPROG;
 		nfsrvd_delcache(op);
 		error = NFSERR_REPLYFROMCACHE;
 		goto out;
 	}
 	error = NFSERR_BADSEQID;
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Get the client ip address for callbacks. If the strings can't be parsed,
  * just set lc_program to 0 to indicate no callbacks are possible.
  * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
  *  the address to the client's transport address. This won't be used
  *  for callbacks, but can be printed out by nfsstats for info.)
  * Return error if the xdr can't be parsed, 0 otherwise.
  */
 APPLESTATIC int
 nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
 {
 	u_int32_t *tl;
 	u_char *cp, *cp2;
 	int i, j, maxalen = 0, minalen = 0;
 	sa_family_t af;
 #ifdef INET
 	struct sockaddr_in *rin = NULL, *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *rin6 = NULL, *sin6;
 #endif
 	u_char *addr;
 	int error = 0, cantparse = 0;
 	union {
 		in_addr_t ival;
 		u_char cval[4];
 	} ip;
 	union {
 		in_port_t sval;
 		u_char cval[2];
 	} port;
 
 	/* 8 is the maximum length of the port# string. */
 	addr = malloc(INET6_ADDRSTRLEN + 8, M_TEMP, M_WAITOK);
 	clp->lc_req.nr_client = NULL;
 	clp->lc_req.nr_lock = 0;
 	af = AF_UNSPEC;
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	i = fxdr_unsigned(int, *tl);
 	if (i >= 3 && i <= 4) {
 		error = nfsrv_mtostr(nd, addr, i);
 		if (error)
 			goto nfsmout;
 #ifdef INET
 		if (!strcmp(addr, "tcp")) {
 			clp->lc_flags |= LCL_TCPCALLBACK;
 			clp->lc_req.nr_sotype = SOCK_STREAM;
 			clp->lc_req.nr_soproto = IPPROTO_TCP;
 			af = AF_INET;
 		} else if (!strcmp(addr, "udp")) {
 			clp->lc_req.nr_sotype = SOCK_DGRAM;
 			clp->lc_req.nr_soproto = IPPROTO_UDP;
 			af = AF_INET;
 		}
 #endif
 #ifdef INET6
 		if (af == AF_UNSPEC) {
 			if (!strcmp(addr, "tcp6")) {
 				clp->lc_flags |= LCL_TCPCALLBACK;
 				clp->lc_req.nr_sotype = SOCK_STREAM;
 				clp->lc_req.nr_soproto = IPPROTO_TCP;
 				af = AF_INET6;
 			} else if (!strcmp(addr, "udp6")) {
 				clp->lc_req.nr_sotype = SOCK_DGRAM;
 				clp->lc_req.nr_soproto = IPPROTO_UDP;
 				af = AF_INET6;
 			}
 		}
 #endif
 		if (af == AF_UNSPEC) {
 			cantparse = 1;
 		}
 	} else {
 		cantparse = 1;
 		if (i > 0) {
 			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
 			if (error)
 				goto nfsmout;
 		}
 	}
 	/*
 	 * The caller has allocated clp->lc_req.nr_nam to be large enough
 	 * for either AF_INET or AF_INET6 and zeroed out the contents.
 	 * maxalen is set to the maximum length of the host IP address string
 	 * plus 8 for the maximum length of the port#.
 	 * minalen is set to the minimum length of the host IP address string
 	 * plus 4 for the minimum length of the port#.
 	 * These lengths do not include NULL termination,
 	 * so INET[6]_ADDRSTRLEN - 1 is used in the calculations.
 	 */
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
 		rin->sin_family = AF_INET;
 		rin->sin_len = sizeof(struct sockaddr_in);
 		maxalen = INET_ADDRSTRLEN - 1 + 8;
 		minalen = 7 + 4;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
 		rin6->sin6_family = AF_INET6;
 		rin6->sin6_len = sizeof(struct sockaddr_in6);
 		maxalen = INET6_ADDRSTRLEN - 1 + 8;
 		minalen = 3 + 4;
 		break;
 #endif
 	}
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	i = fxdr_unsigned(int, *tl);
 	if (i < 0) {
 		error = NFSERR_BADXDR;
 		goto nfsmout;
 	} else if (i == 0) {
 		cantparse = 1;
 	} else if (!cantparse && i <= maxalen && i >= minalen) {
 		error = nfsrv_mtostr(nd, addr, i);
 		if (error)
 			goto nfsmout;
 
 		/*
 		 * Parse out the address fields. We expect 6 decimal numbers
 		 * separated by '.'s for AF_INET and two decimal numbers
 		 * preceeded by '.'s for AF_INET6.
 		 */
 		cp = NULL;
 		switch (af) {
 #ifdef INET6
 		/*
 		 * For AF_INET6, first parse the host address.
 		 */
 		case AF_INET6:
 			cp = strchr(addr, '.');
 			if (cp != NULL) {
 				*cp++ = '\0';
 				if (inet_pton(af, addr, &rin6->sin6_addr) == 1)
 					i = 4;
 				else {
 					cp = NULL;
 					cantparse = 1;
 				}
 			}
 			break;
 #endif
 #ifdef INET
 		case AF_INET:
 			cp = addr;
 			i = 0;
 			break;
 #endif
 		}
 		while (cp != NULL && *cp && i < 6) {
 			cp2 = cp;
 			while (*cp2 && *cp2 != '.')
 				cp2++;
 			if (*cp2)
 				*cp2++ = '\0';
 			else if (i != 5) {
 				cantparse = 1;
 				break;
 			}
 			j = nfsrv_getipnumber(cp);
 			if (j >= 0) {
 				if (i < 4)
 					ip.cval[3 - i] = j;
 				else
 					port.cval[5 - i] = j;
 			} else {
 				cantparse = 1;
 				break;
 			}
 			cp = cp2;
 			i++;
 		}
 		if (!cantparse) {
 			/*
 			 * The host address INADDR_ANY is (mis)used to indicate
 			 * "there is no valid callback address".
 			 */
 			switch (af) {
 #ifdef INET6
 			case AF_INET6:
 				if (!IN6_ARE_ADDR_EQUAL(&rin6->sin6_addr,
 				    &in6addr_any))
 					rin6->sin6_port = htons(port.sval);
 				else
 					cantparse = 1;
 				break;
 #endif
 #ifdef INET
 			case AF_INET:
 				if (ip.ival != INADDR_ANY) {
 					rin->sin_addr.s_addr = htonl(ip.ival);
 					rin->sin_port = htons(port.sval);
 				} else {
 					cantparse = 1;
 				}
 				break;
 #endif
 			}
 		}
 	} else {
 		cantparse = 1;
 		if (i > 0) {
 			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
 			if (error)
 				goto nfsmout;
 		}
 	}
 	if (cantparse) {
 		switch (nd->nd_nam->sa_family) {
 #ifdef INET
 		case AF_INET:
 			sin = (struct sockaddr_in *)nd->nd_nam;
 			rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
 			rin->sin_family = AF_INET;
 			rin->sin_len = sizeof(struct sockaddr_in);
 			rin->sin_addr.s_addr = sin->sin_addr.s_addr;
 			rin->sin_port = 0x0;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			sin6 = (struct sockaddr_in6 *)nd->nd_nam;
 			rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
 			rin6->sin6_family = AF_INET6;
 			rin6->sin6_len = sizeof(struct sockaddr_in6);
 			rin6->sin6_addr = sin6->sin6_addr;
 			rin6->sin6_port = 0x0;
 			break;
 #endif
 		}
 		clp->lc_program = 0;
 	}
 nfsmout:
 	free(addr, M_TEMP);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Turn a string of up to three decimal digits into a number. Return -1 upon
  * error.
  */
 static int
 nfsrv_getipnumber(u_char *cp)
 {
 	int i = 0, j = 0;
 
 	while (*cp) {
 		if (j > 2 || *cp < '0' || *cp > '9')
 			return (-1);
 		i *= 10;
 		i += (*cp - '0');
 		cp++;
 		j++;
 	}
 	if (i < 256)
 		return (i);
 	return (-1);
 }
 
 /*
  * This function checks for restart conditions.
  */
 static int
 nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
     nfsv4stateid_t *stateidp, int specialid)
 {
 	int ret = 0;
 
 	/*
 	 * First check for a server restart. Open, LockT, ReleaseLockOwner
 	 * and DelegPurge have a clientid, the rest a stateid.
 	 */
 	if (flags &
 	    (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
 		if (clientid.lval[0] != nfsrvboottime) {
 			ret = NFSERR_STALECLIENTID;
 			goto out;
 		}
 	} else if (stateidp->other[0] != nfsrvboottime &&
 		specialid == 0) {
 		ret = NFSERR_STALESTATEID;
 		goto out;
 	}
 
 	/*
 	 * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
 	 * not use a lock/open owner seqid#, so the check can be done now.
 	 * (The others will be checked, as required, later.)
 	 */
 	if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
 		goto out;
 
 	NFSLOCKSTATE();
 	ret = nfsrv_checkgrace(NULL, NULL, flags);
 	NFSUNLOCKSTATE();
 
 out:
 	NFSEXITCODE(ret);
 	return (ret);
 }
 
 /*
  * Check for grace.
  */
 static int
 nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
     u_int32_t flags)
 {
 	int error = 0, notreclaimed;
 	struct nfsrv_stable *sp;
 
 	if ((nfsrv_stablefirst.nsf_flags & (NFSNSF_UPDATEDONE |
 	     NFSNSF_GRACEOVER)) == 0) {
 		/*
 		 * First, check to see if all of the clients have done a
 		 * ReclaimComplete.  If so, grace can end now.
 		 */
 		notreclaimed = 0;
 		LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 			if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
 				notreclaimed = 1;
 				break;
 			}
 		}
 		if (notreclaimed == 0)
 			nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER |
 			    NFSNSF_NEEDLOCK);
 	}
 
 	if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
 		if (flags & NFSLCK_RECLAIM) {
 			error = NFSERR_NOGRACE;
 			goto out;
 		}
 	} else {
 		if (!(flags & NFSLCK_RECLAIM)) {
 			error = NFSERR_GRACE;
 			goto out;
 		}
 		if (nd != NULL && clp != NULL &&
 		    (nd->nd_flag & ND_NFSV41) != 0 &&
 		    (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
 			error = NFSERR_NOGRACE;
 			goto out;
 		}
 
 		/*
 		 * If grace is almost over and we are still getting Reclaims,
 		 * extend grace a bit.
 		 */
 		if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
 		    nfsrv_stablefirst.nsf_eograce)
 			nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC +
 				NFSRV_LEASEDELTA;
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Do a server callback.
  * The "trunc" argument is slightly overloaded and refers to different
  * boolean arguments for CBRECALL and CBLAYOUTRECALL.
  */
 static int
 nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
     int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp,
     int laytype, NFSPROC_T *p)
 {
 	mbuf_t m;
 	u_int32_t *tl;
 	struct nfsrv_descript *nd;
 	struct ucred *cred;
 	int error = 0;
 	u_int32_t callback;
 	struct nfsdsession *sep = NULL;
 	uint64_t tval;
 
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	cred = newnfs_getcred();
 	NFSLOCKSTATE();	/* mostly for lc_cbref++ */
 	if (clp->lc_flags & LCL_NEEDSCONFIRM) {
 		NFSUNLOCKSTATE();
 		panic("docallb");
 	}
 	clp->lc_cbref++;
 
 	/*
 	 * Fill the callback program# and version into the request
 	 * structure for newnfs_connect() to use.
 	 */
 	clp->lc_req.nr_prog = clp->lc_program;
 #ifdef notnow
 	if ((clp->lc_flags & LCL_NFSV41) != 0)
 		clp->lc_req.nr_vers = NFSV41_CBVERS;
 	else
 #endif
 		clp->lc_req.nr_vers = NFSV4_CBVERS;
 
 	/*
 	 * First, fill in some of the fields of nd and cr.
 	 */
 	nd->nd_flag = ND_NFSV4;
 	if (clp->lc_flags & LCL_GSS)
 		nd->nd_flag |= ND_KERBV;
 	if ((clp->lc_flags & LCL_NFSV41) != 0)
 		nd->nd_flag |= ND_NFSV41;
 	if ((clp->lc_flags & LCL_NFSV42) != 0)
 		nd->nd_flag |= ND_NFSV42;
 	nd->nd_repstat = 0;
 	cred->cr_uid = clp->lc_uid;
 	cred->cr_gid = clp->lc_gid;
 	callback = clp->lc_callback;
 	NFSUNLOCKSTATE();
 	cred->cr_ngroups = 1;
 
 	/*
 	 * Get the first mbuf for the request.
 	 */
 	MGET(m, M_WAITOK, MT_DATA);
 	mbuf_setlen(m, 0);
 	nd->nd_mreq = nd->nd_mb = m;
 	nd->nd_bpos = NFSMTOD(m, caddr_t);
 	
 	/*
 	 * and build the callback request.
 	 */
 	if (procnum == NFSV4OP_CBGETATTR) {
 		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
 		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
 		    "CB Getattr", &sep);
 		if (error != 0) {
 			mbuf_freem(nd->nd_mreq);
 			goto errout;
 		}
 		(void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
 		(void)nfsrv_putattrbit(nd, attrbitp);
 	} else if (procnum == NFSV4OP_CBRECALL) {
 		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
 		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
 		    "CB Recall", &sep);
 		if (error != 0) {
 			mbuf_freem(nd->nd_mreq);
 			goto errout;
 		}
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
 		*tl++ = txdr_unsigned(stateidp->seqid);
 		NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
 		    NFSX_STATEIDOTHER);
 		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 		if (trunc)
 			*tl = newnfs_true;
 		else
 			*tl = newnfs_false;
 		(void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
 	} else if (procnum == NFSV4OP_CBLAYOUTRECALL) {
 		NFSD_DEBUG(4, "docallback layout recall\n");
 		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
 		error = nfsrv_cbcallargs(nd, clp, callback,
 		    NFSV4OP_CBLAYOUTRECALL, "CB Reclayout", &sep);
 		NFSD_DEBUG(4, "aft cbcallargs=%d\n", error);
 		if (error != 0) {
 			mbuf_freem(nd->nd_mreq);
 			goto errout;
 		}
 		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(laytype);
 		*tl++ = txdr_unsigned(NFSLAYOUTIOMODE_ANY);
 		if (trunc)
 			*tl++ = newnfs_true;
 		else
 			*tl++ = newnfs_false;
 		*tl = txdr_unsigned(NFSV4LAYOUTRET_FILE);
 		nfsm_fhtom(nd, (uint8_t *)fhp, NFSX_MYFH, 0);
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID);
 		tval = 0;
 		txdr_hyper(tval, tl); tl += 2;
 		tval = UINT64_MAX;
 		txdr_hyper(tval, tl); tl += 2;
 		*tl++ = txdr_unsigned(stateidp->seqid);
 		NFSBCOPY(stateidp->other, tl, NFSX_STATEIDOTHER);
 		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 		NFSD_DEBUG(4, "aft args\n");
 	} else if (procnum == NFSV4PROC_CBNULL) {
 		nd->nd_procnum = NFSV4PROC_CBNULL;
 		if ((clp->lc_flags & LCL_NFSV41) != 0) {
 			error = nfsv4_getcbsession(clp, &sep);
 			if (error != 0) {
 				mbuf_freem(nd->nd_mreq);
 				goto errout;
 			}
 		}
 	} else {
 		error = NFSERR_SERVERFAULT;
 		mbuf_freem(nd->nd_mreq);
 		goto errout;
 	}
 
 	/*
 	 * Call newnfs_connect(), as required, and then newnfs_request().
 	 */
 	(void) newnfs_sndlock(&clp->lc_req.nr_lock);
 	if (clp->lc_req.nr_client == NULL) {
 		if ((clp->lc_flags & LCL_NFSV41) != 0) {
 			error = ECONNREFUSED;
 			nfsrv_freesession(sep, NULL);
 		} else if (nd->nd_procnum == NFSV4PROC_CBNULL)
 			error = newnfs_connect(NULL, &clp->lc_req, cred,
 			    NULL, 1);
 		else
 			error = newnfs_connect(NULL, &clp->lc_req, cred,
 			    NULL, 3);
 	}
 	newnfs_sndunlock(&clp->lc_req.nr_lock);
 	NFSD_DEBUG(4, "aft sndunlock=%d\n", error);
 	if (!error) {
 		if ((nd->nd_flag & ND_NFSV41) != 0) {
 			KASSERT(sep != NULL, ("sep NULL"));
 			if (sep->sess_cbsess.nfsess_xprt != NULL)
 				error = newnfs_request(nd, NULL, clp,
 				    &clp->lc_req, NULL, NULL, cred,
 				    clp->lc_program, clp->lc_req.nr_vers, NULL,
 				    1, NULL, &sep->sess_cbsess);
 			else {
 				/*
 				 * This should probably never occur, but if a
 				 * client somehow does an RPC without a
 				 * SequenceID Op that causes a callback just
 				 * after the nfsd threads have been terminated
 				 * and restared we could conceivably get here
 				 * without a backchannel xprt.
 				 */
 				printf("nfsrv_docallback: no xprt\n");
 				error = ECONNREFUSED;
 			}
 			NFSD_DEBUG(4, "aft newnfs_request=%d\n", error);
 			nfsrv_freesession(sep, NULL);
 		} else
 			error = newnfs_request(nd, NULL, clp, &clp->lc_req,
 			    NULL, NULL, cred, clp->lc_program,
 			    clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
 	}
 errout:
 	NFSFREECRED(cred);
 
 	/*
 	 * If error is set here, the Callback path isn't working
 	 * properly, so twiddle the appropriate LCL_ flags.
 	 * (nd_repstat != 0 indicates the Callback path is working,
 	 *  but the callback failed on the client.)
 	 */
 	if (error) {
 		/*
 		 * Mark the callback pathway down, which disabled issuing
 		 * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
 		 */
 		NFSLOCKSTATE();
 		clp->lc_flags |= LCL_CBDOWN;
 		NFSUNLOCKSTATE();
 	} else {
 		/*
 		 * Callback worked. If the callback path was down, disable
 		 * callbacks, so no more delegations will be issued. (This
 		 * is done on the assumption that the callback pathway is
 		 * flakey.)
 		 */
 		NFSLOCKSTATE();
 		if (clp->lc_flags & LCL_CBDOWN)
 			clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
 		NFSUNLOCKSTATE();
 		if (nd->nd_repstat) {
 			error = nd->nd_repstat;
 			NFSD_DEBUG(1, "nfsrv_docallback op=%d err=%d\n",
 			    procnum, error);
 		} else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 			    NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
 			    p, NULL);
 		mbuf_freem(nd->nd_mrep);
 	}
 	NFSLOCKSTATE();
 	clp->lc_cbref--;
 	if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
 		clp->lc_flags &= ~LCL_WAKEUPWANTED;
 		wakeup(clp);
 	}
 	NFSUNLOCKSTATE();
 
 	free(nd, M_TEMP);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Set up the compound RPC for the callback.
  */
 static int
 nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
     uint32_t callback, int op, const char *optag, struct nfsdsession **sepp)
 {
 	uint32_t *tl;
 	int error, len;
 
 	len = strlen(optag);
 	(void)nfsm_strtom(nd, optag, len);
 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
 	if ((nd->nd_flag & ND_NFSV41) != 0) {
 		if ((nd->nd_flag & ND_NFSV42) != 0)
 			*tl++ = txdr_unsigned(NFSV42_MINORVERSION);
 		else
 			*tl++ = txdr_unsigned(NFSV41_MINORVERSION);
 		*tl++ = txdr_unsigned(callback);
 		*tl++ = txdr_unsigned(2);
 		*tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
 		error = nfsv4_setcbsequence(nd, clp, 1, sepp);
 		if (error != 0)
 			return (error);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(op);
 	} else {
 		*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
 		*tl++ = txdr_unsigned(callback);
 		*tl++ = txdr_unsigned(1);
 		*tl = txdr_unsigned(op);
 	}
 	return (0);
 }
 
 /*
  * Return the next index# for a clientid. Mostly just increment and return
  * the next one, but... if the 32bit unsigned does actually wrap around,
  * it should be rebooted.
  * At an average rate of one new client per second, it will wrap around in
  * approximately 136 years. (I think the server will have been shut
  * down or rebooted before then.)
  */
 static u_int32_t
 nfsrv_nextclientindex(void)
 {
 	static u_int32_t client_index = 0;
 
 	client_index++;
 	if (client_index != 0)
 		return (client_index);
 
 	printf("%s: out of clientids\n", __func__);
 	return (client_index);
 }
 
 /*
  * Return the next index# for a stateid. Mostly just increment and return
  * the next one, but... if the 32bit unsigned does actually wrap around
  * (will a BSD server stay up that long?), find
  * new start and end values.
  */
 static u_int32_t
 nfsrv_nextstateindex(struct nfsclient *clp)
 {
 	struct nfsstate *stp;
 	int i;
 	u_int32_t canuse, min_index, max_index;
 
 	if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
 		clp->lc_stateindex++;
 		if (clp->lc_stateindex != clp->lc_statemaxindex)
 			return (clp->lc_stateindex);
 	}
 
 	/*
 	 * Yuck, we've hit the end.
 	 * Look for a new min and max.
 	 */
 	min_index = 0;
 	max_index = 0xffffffff;
 	for (i = 0; i < nfsrv_statehashsize; i++) {
 	    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
 		if (stp->ls_stateid.other[2] > 0x80000000) {
 		    if (stp->ls_stateid.other[2] < max_index)
 			max_index = stp->ls_stateid.other[2];
 		} else {
 		    if (stp->ls_stateid.other[2] > min_index)
 			min_index = stp->ls_stateid.other[2];
 		}
 	    }
 	}
 
 	/*
 	 * Yikes, highly unlikely, but I'll handle it anyhow.
 	 */
 	if (min_index == 0x80000000 && max_index == 0x80000001) {
 	    canuse = 0;
 	    /*
 	     * Loop around until we find an unused entry. Return that
 	     * and set LCL_INDEXNOTOK, so the search will continue next time.
 	     * (This is one of those rare cases where a goto is the
 	     *  cleanest way to code the loop.)
 	     */
 tryagain:
 	    for (i = 0; i < nfsrv_statehashsize; i++) {
 		LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
 		    if (stp->ls_stateid.other[2] == canuse) {
 			canuse++;
 			goto tryagain;
 		    }
 		}
 	    }
 	    clp->lc_flags |= LCL_INDEXNOTOK;
 	    return (canuse);
 	}
 
 	/*
 	 * Ok to start again from min + 1.
 	 */
 	clp->lc_stateindex = min_index + 1;
 	clp->lc_statemaxindex = max_index;
 	clp->lc_flags &= ~LCL_INDEXNOTOK;
 	return (clp->lc_stateindex);
 }
 
 /*
  * The following functions handle the stable storage file that deals with
  * the edge conditions described in RFC3530 Sec. 8.6.3.
  * The file is as follows:
  * - a single record at the beginning that has the lease time of the
  *   previous server instance (before the last reboot) and the nfsrvboottime
  *   values for the previous server boots.
  *   These previous boot times are used to ensure that the current
  *   nfsrvboottime does not, somehow, get set to a previous one.
  *   (This is important so that Stale ClientIDs and StateIDs can
  *    be recognized.)
  *   The number of previous nfsvrboottime values precedes the list.
  * - followed by some number of appended records with:
  *   - client id string
  *   - flag that indicates it is a record revoking state via lease
  *     expiration or similar
  *     OR has successfully acquired state.
  * These structures vary in length, with the client string at the end, up
  * to NFSV4_OPAQUELIMIT in size.
  *
  * At the end of the grace period, the file is truncated, the first
  * record is rewritten with updated information and any acquired state
  * records for successful reclaims of state are written.
  *
  * Subsequent records are appended when the first state is issued to
  * a client and when state is revoked for a client.
  *
  * When reading the file in, state issued records that come later in
  * the file override older ones, since the append log is in cronological order.
  * If, for some reason, the file can't be read, the grace period is
  * immediately terminated and all reclaims get NFSERR_NOGRACE.
  */
 
 /*
  * Read in the stable storage file. Called by nfssvc() before the nfsd
  * processes start servicing requests.
  */
 APPLESTATIC void
 nfsrv_setupstable(NFSPROC_T *p)
 {
 	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
 	struct nfsrv_stable *sp, *nsp;
 	struct nfst_rec *tsp;
 	int error, i, tryagain;
 	off_t off = 0;
 	ssize_t aresid, len;
 
 	/*
 	 * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
 	 * a reboot, so state has not been lost.
 	 */
 	if (sf->nsf_flags & NFSNSF_UPDATEDONE)
 		return;
 	/*
 	 * Set Grace over just until the file reads successfully.
 	 */
 	nfsrvboottime = time_second;
 	LIST_INIT(&sf->nsf_head);
 	sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
 	sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
 	if (sf->nsf_fp == NULL)
 		return;
 	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
 	    (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
 	    0, NFSFPCRED(sf->nsf_fp), &aresid, p);
 	if (error || aresid || sf->nsf_numboots == 0 ||
 		sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
 		return;
 
 	/*
 	 * Now, read in the boottimes.
 	 */
 	sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
 		sizeof (time_t), M_TEMP, M_WAITOK);
 	off = sizeof (struct nfsf_rec);
 	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
 	    (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
 	    UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
 	if (error || aresid) {
 		free(sf->nsf_bootvals, M_TEMP);
 		sf->nsf_bootvals = NULL;
 		return;
 	}
 
 	/*
 	 * Make sure this nfsrvboottime is different from all recorded
 	 * previous ones.
 	 */
 	do {
 		tryagain = 0;
 		for (i = 0; i < sf->nsf_numboots; i++) {
 			if (nfsrvboottime == sf->nsf_bootvals[i]) {
 				nfsrvboottime++;
 				tryagain = 1;
 				break;
 			}
 		}
 	} while (tryagain);
 
 	sf->nsf_flags |= NFSNSF_OK;
 	off += (sf->nsf_numboots * sizeof (time_t));
 
 	/*
 	 * Read through the file, building a list of records for grace
 	 * checking.
 	 * Each record is between sizeof (struct nfst_rec) and
 	 * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
 	 * and is actually sizeof (struct nfst_rec) + nst_len - 1.
 	 */
 	tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
 		NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
 	do {
 	    error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
 	        (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
 	        off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
 	    len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
 	    if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
 		len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
 		/*
 		 * Yuck, the file has been corrupted, so just return
 		 * after clearing out any restart state, so the grace period
 		 * is over.
 		 */
 		LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
 			LIST_REMOVE(sp, nst_list);
 			free(sp, M_TEMP);
 		}
 		free(tsp, M_TEMP);
 		sf->nsf_flags &= ~NFSNSF_OK;
 		free(sf->nsf_bootvals, M_TEMP);
 		sf->nsf_bootvals = NULL;
 		return;
 	    }
 	    if (len > 0) {
 		off += sizeof (struct nfst_rec) + tsp->len - 1;
 		/*
 		 * Search the list for a matching client.
 		 */
 		LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
 			if (tsp->len == sp->nst_len &&
 			    !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
 				break;
 		}
 		if (sp == LIST_END(&sf->nsf_head)) {
 			sp = (struct nfsrv_stable *)malloc(tsp->len +
 				sizeof (struct nfsrv_stable) - 1, M_TEMP,
 				M_WAITOK);
 			NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
 				sizeof (struct nfst_rec) + tsp->len - 1);
 			LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
 		} else {
 			if (tsp->flag == NFSNST_REVOKE)
 				sp->nst_flag |= NFSNST_REVOKE;
 			else
 				/*
 				 * A subsequent timestamp indicates the client
 				 * did a setclientid/confirm and any previous
 				 * revoke is no longer relevant.
 				 */
 				sp->nst_flag &= ~NFSNST_REVOKE;
 		}
 	    }
 	} while (len > 0);
 	free(tsp, M_TEMP);
 	sf->nsf_flags = NFSNSF_OK;
 	sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
 		NFSRV_LEASEDELTA;
 }
 
 /*
  * Update the stable storage file, now that the grace period is over.
  */
 APPLESTATIC void
 nfsrv_updatestable(NFSPROC_T *p)
 {
 	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
 	struct nfsrv_stable *sp, *nsp;
 	int i;
 	struct nfsvattr nva;
 	vnode_t vp;
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
 	mount_t mp = NULL;
 #endif
 	int error;
 
 	if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
 		return;
 	sf->nsf_flags |= NFSNSF_UPDATEDONE;
 	/*
 	 * Ok, we need to rewrite the stable storage file.
 	 * - truncate to 0 length
 	 * - write the new first structure
 	 * - loop through the data structures, writing out any that
 	 *   have timestamps older than the old boot
 	 */
 	if (sf->nsf_bootvals) {
 		sf->nsf_numboots++;
 		for (i = sf->nsf_numboots - 2; i >= 0; i--)
 			sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
 	} else {
 		sf->nsf_numboots = 1;
 		sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t),
 			M_TEMP, M_WAITOK);
 	}
 	sf->nsf_bootvals[0] = nfsrvboottime;
 	sf->nsf_lease = nfsrv_lease;
 	NFSVNO_ATTRINIT(&nva);
 	NFSVNO_SETATTRVAL(&nva, size, 0);
 	vp = NFSFPVNODE(sf->nsf_fp);
 	vn_start_write(vp, &mp, V_WAIT);
 	if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
 		error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
 		    NULL);
 		NFSVOPUNLOCK(vp);
 	} else
 		error = EPERM;
 	vn_finished_write(mp);
 	if (!error)
 	    error = NFSD_RDWR(UIO_WRITE, vp,
 		(caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
 		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
 	if (!error)
 	    error = NFSD_RDWR(UIO_WRITE, vp,
 		(caddr_t)sf->nsf_bootvals,
 		sf->nsf_numboots * sizeof (time_t),
 		(off_t)(sizeof (struct nfsf_rec)),
 		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
 	free(sf->nsf_bootvals, M_TEMP);
 	sf->nsf_bootvals = NULL;
 	if (error) {
 		sf->nsf_flags &= ~NFSNSF_OK;
 		printf("EEK! Can't write NfsV4 stable storage file\n");
 		return;
 	}
 	sf->nsf_flags |= NFSNSF_OK;
 
 	/*
 	 * Loop through the list and write out timestamp records for
 	 * any clients that successfully reclaimed state.
 	 */
 	LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
 		if (sp->nst_flag & NFSNST_GOTSTATE) {
 			nfsrv_writestable(sp->nst_client, sp->nst_len,
 				NFSNST_NEWSTATE, p);
 			sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
 		}
 		LIST_REMOVE(sp, nst_list);
 		free(sp, M_TEMP);
 	}
 	nfsrv_backupstable();
 }
 
 /*
  * Append a record to the stable storage file.
  */
 APPLESTATIC void
 nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
 {
 	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
 	struct nfst_rec *sp;
 	int error;
 
 	if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
 		return;
 	sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
 		len - 1, M_TEMP, M_WAITOK);
 	sp->len = len;
 	NFSBCOPY(client, sp->client, len);
 	sp->flag = flag;
 	error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
 	    (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
 	    UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
 	free(sp, M_TEMP);
 	if (error) {
 		sf->nsf_flags &= ~NFSNSF_OK;
 		printf("EEK! Can't write NfsV4 stable storage file\n");
 	}
 }
 
 /*
  * This function is called during the grace period to mark a client
  * that successfully reclaimed state.
  */
 static void
 nfsrv_markstable(struct nfsclient *clp)
 {
 	struct nfsrv_stable *sp;
 
 	/*
 	 * First find the client structure.
 	 */
 	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 		if (sp->nst_len == clp->lc_idlen &&
 		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
 			break;
 	}
 	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
 		return;
 
 	/*
 	 * Now, just mark it and set the nfsclient back pointer.
 	 */
 	sp->nst_flag |= NFSNST_GOTSTATE;
 	sp->nst_clp = clp;
 }
 
 /*
  * This function is called when a NFSv4.1 client does a ReclaimComplete.
  * Very similar to nfsrv_markstable(), except for the flag being set.
  */
 static void
 nfsrv_markreclaim(struct nfsclient *clp)
 {
 	struct nfsrv_stable *sp;
 
 	/*
 	 * First find the client structure.
 	 */
 	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 		if (sp->nst_len == clp->lc_idlen &&
 		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
 			break;
 	}
 	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
 		return;
 
 	/*
 	 * Now, just set the flag.
 	 */
 	sp->nst_flag |= NFSNST_RECLAIMED;
 }
 
 /*
  * This function is called for a reclaim, to see if it gets grace.
  * It returns 0 if a reclaim is allowed, 1 otherwise.
  */
 static int
 nfsrv_checkstable(struct nfsclient *clp)
 {
 	struct nfsrv_stable *sp;
 
 	/*
 	 * First, find the entry for the client.
 	 */
 	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 		if (sp->nst_len == clp->lc_idlen &&
 		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
 			break;
 	}
 
 	/*
 	 * If not in the list, state was revoked or no state was issued
 	 * since the previous reboot, a reclaim is denied.
 	 */
 	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) ||
 	    (sp->nst_flag & NFSNST_REVOKE) ||
 	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK))
 		return (1);
 	return (0);
 }
 
 /*
  * Test for and try to clear out a conflicting client. This is called by
  * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
  * a found.
  * The trick here is that it can't revoke a conflicting client with an
  * expired lease unless it holds the v4root lock, so...
  * If no v4root lock, get the lock and return 1 to indicate "try again".
  * Return 0 to indicate the conflict can't be revoked and 1 to indicate
  * the revocation worked and the conflicting client is "bye, bye", so it
  * can be tried again.
  * Return 2 to indicate that the vnode is VIRF_DOOMED after NFSVOPLOCK().
  * Unlocks State before a non-zero value is returned.
  */
 static int
 nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
     NFSPROC_T *p)
 {
 	int gotlock, lktype = 0;
 
 	/*
 	 * If lease hasn't expired, we can't fix it.
 	 */
 	if (clp->lc_expiry >= NFSD_MONOSEC ||
 	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE))
 		return (0);
 	if (*haslockp == 0) {
 		NFSUNLOCKSTATE();
 		if (vp != NULL) {
 			lktype = NFSVOPISLOCKED(vp);
 			NFSVOPUNLOCK(vp);
 		}
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_relref(&nfsv4rootfs_lock);
 		do {
 			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 			    NFSV4ROOTLOCKMUTEXPTR, NULL);
 		} while (!gotlock);
 		NFSUNLOCKV4ROOTMUTEX();
 		*haslockp = 1;
 		if (vp != NULL) {
 			NFSVOPLOCK(vp, lktype | LK_RETRY);
 			if (VN_IS_DOOMED(vp))
 				return (2);
 		}
 		return (1);
 	}
 	NFSUNLOCKSTATE();
 
 	/*
 	 * Ok, we can expire the conflicting client.
 	 */
 	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
 	nfsrv_backupstable();
 	nfsrv_cleanclient(clp, p);
 	nfsrv_freedeleglist(&clp->lc_deleg);
 	nfsrv_freedeleglist(&clp->lc_olddeleg);
 	LIST_REMOVE(clp, lc_hash);
 	nfsrv_zapclient(clp, p);
 	return (1);
 }
 
 /*
  * Resolve a delegation conflict.
  * Returns 0 to indicate the conflict was resolved without sleeping.
  * Return -1 to indicate that the caller should check for conflicts again.
  * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
  *
  * Also, manipulate the nfsv4root_lock, as required. It isn't changed
  * for a return of 0, since there was no sleep and it could be required
  * later. It is released for a return of NFSERR_DELAY, since the caller
  * will return that error. It is released when a sleep was done waiting
  * for the delegation to be returned or expire (so that other nfsds can
  * handle ops). Then, it must be acquired for the write to stable storage.
  * (This function is somewhat similar to nfsrv_clientconflict(), but
  *  the semantics differ in a couple of subtle ways. The return of 0
  *  indicates the conflict was resolved without sleeping here, not
  *  that the conflict can't be resolved and the handling of nfsv4root_lock
  *  differs, as noted above.)
  * Unlocks State before returning a non-zero value.
  */
 static int
 nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
     vnode_t vp)
 {
 	struct nfsclient *clp = stp->ls_clp;
 	int gotlock, error, lktype = 0, retrycnt, zapped_clp;
 	nfsv4stateid_t tstateid;
 	fhandle_t tfh;
 
 	/*
 	 * If the conflict is with an old delegation...
 	 */
 	if (stp->ls_flags & NFSLCK_OLDDELEG) {
 		/*
 		 * You can delete it, if it has expired.
 		 */
 		if (clp->lc_delegtime < NFSD_MONOSEC) {
 			nfsrv_freedeleg(stp);
 			NFSUNLOCKSTATE();
 			error = -1;
 			goto out;
 		}
 		NFSUNLOCKSTATE();
 		/*
 		 * During this delay, the old delegation could expire or it
 		 * could be recovered by the client via an Open with
 		 * CLAIM_DELEGATE_PREV.
 		 * Release the nfsv4root_lock, if held.
 		 */
 		if (*haslockp) {
 			*haslockp = 0;
 			NFSLOCKV4ROOTMUTEX();
 			nfsv4_unlock(&nfsv4rootfs_lock, 1);
 			NFSUNLOCKV4ROOTMUTEX();
 		}
 		error = NFSERR_DELAY;
 		goto out;
 	}
 
 	/*
 	 * It's a current delegation, so:
 	 * - check to see if the delegation has expired
 	 *   - if so, get the v4root lock and then expire it
 	 */
 	if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) {
 		/*
 		 * - do a recall callback, since not yet done
 		 * For now, never allow truncate to be set. To use
 		 * truncate safely, it must be guaranteed that the
 		 * Remove, Rename or Setattr with size of 0 will
 		 * succeed and that would require major changes to
 		 * the VFS/Vnode OPs.
 		 * Set the expiry time large enough so that it won't expire
 		 * until after the callback, then set it correctly, once
 		 * the callback is done. (The delegation will now time
 		 * out whether or not the Recall worked ok. The timeout
 		 * will be extended when ops are done on the delegation
 		 * stateid, up to the timelimit.)
 		 */
 		stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
 		    NFSRV_LEASEDELTA;
 		stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) +
 		    NFSRV_LEASEDELTA;
 		stp->ls_flags |= NFSLCK_DELEGRECALL;
 
 		/*
 		 * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
 		 * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
 		 * in order to try and avoid a race that could happen
 		 * when a CBRecall request passed the Open reply with
 		 * the delegation in it when transitting the network.
 		 * Since nfsrv_docallback will sleep, don't use stp after
 		 * the call.
 		 */
 		NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
 		    sizeof (tstateid));
 		NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
 		    sizeof (tfh));
 		NFSUNLOCKSTATE();
 		if (*haslockp) {
 			*haslockp = 0;
 			NFSLOCKV4ROOTMUTEX();
 			nfsv4_unlock(&nfsv4rootfs_lock, 1);
 			NFSUNLOCKV4ROOTMUTEX();
 		}
 		retrycnt = 0;
 		do {
 		    error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
 			&tstateid, 0, &tfh, NULL, NULL, 0, p);
 		    retrycnt++;
 		} while ((error == NFSERR_BADSTATEID ||
 		    error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
 		error = NFSERR_DELAY;
 		goto out;
 	}
 
 	if (clp->lc_expiry >= NFSD_MONOSEC &&
 	    stp->ls_delegtime >= NFSD_MONOSEC) {
 		NFSUNLOCKSTATE();
 		/*
 		 * A recall has been done, but it has not yet expired.
 		 * So, RETURN_DELAY.
 		 */
 		if (*haslockp) {
 			*haslockp = 0;
 			NFSLOCKV4ROOTMUTEX();
 			nfsv4_unlock(&nfsv4rootfs_lock, 1);
 			NFSUNLOCKV4ROOTMUTEX();
 		}
 		error = NFSERR_DELAY;
 		goto out;
 	}
 
 	/*
 	 * If we don't yet have the lock, just get it and then return,
 	 * since we need that before deleting expired state, such as
 	 * this delegation.
 	 * When getting the lock, unlock the vnode, so other nfsds that
 	 * are in progress, won't get stuck waiting for the vnode lock.
 	 */
 	if (*haslockp == 0) {
 		NFSUNLOCKSTATE();
 		if (vp != NULL) {
 			lktype = NFSVOPISLOCKED(vp);
 			NFSVOPUNLOCK(vp);
 		}
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_relref(&nfsv4rootfs_lock);
 		do {
 			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 			    NFSV4ROOTLOCKMUTEXPTR, NULL);
 		} while (!gotlock);
 		NFSUNLOCKV4ROOTMUTEX();
 		*haslockp = 1;
 		if (vp != NULL) {
 			NFSVOPLOCK(vp, lktype | LK_RETRY);
 			if (VN_IS_DOOMED(vp)) {
 				*haslockp = 0;
 				NFSLOCKV4ROOTMUTEX();
 				nfsv4_unlock(&nfsv4rootfs_lock, 1);
 				NFSUNLOCKV4ROOTMUTEX();
 				error = NFSERR_PERM;
 				goto out;
 			}
 		}
 		error = -1;
 		goto out;
 	}
 
 	NFSUNLOCKSTATE();
 	/*
 	 * Ok, we can delete the expired delegation.
 	 * First, write the Revoke record to stable storage and then
 	 * clear out the conflict.
 	 * Since all other nfsd threads are now blocked, we can safely
 	 * sleep without the state changing.
 	 */
 	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
 	nfsrv_backupstable();
 	if (clp->lc_expiry < NFSD_MONOSEC) {
 		nfsrv_cleanclient(clp, p);
 		nfsrv_freedeleglist(&clp->lc_deleg);
 		nfsrv_freedeleglist(&clp->lc_olddeleg);
 		LIST_REMOVE(clp, lc_hash);
 		zapped_clp = 1;
 	} else {
 		nfsrv_freedeleg(stp);
 		zapped_clp = 0;
 	}
 	if (zapped_clp)
 		nfsrv_zapclient(clp, p);
 	error = -1;
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Check for a remove allowed, if remove is set to 1 and get rid of
  * delegations.
  */
 APPLESTATIC int
 nfsrv_checkremove(vnode_t vp, int remove, struct nfsrv_descript *nd,
     nfsquad_t clientid, NFSPROC_T *p)
 {
 	struct nfsclient *clp;
 	struct nfsstate *stp;
 	struct nfslockfile *lfp;
 	int error, haslock = 0;
 	fhandle_t nfh;
 
 	clp = NULL;
 	/*
 	 * First, get the lock file structure.
 	 * (A return of -1 means no associated state, so remove ok.)
 	 */
 	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
 tryagain:
 	NFSLOCKSTATE();
 	if (error == 0 && clientid.qval != 0)
 		error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 		    (nfsquad_t)((u_quad_t)0), 0, nd, p);
 	if (!error)
 		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
 	if (error) {
 		NFSUNLOCKSTATE();
 		if (haslock) {
 			NFSLOCKV4ROOTMUTEX();
 			nfsv4_unlock(&nfsv4rootfs_lock, 1);
 			NFSUNLOCKV4ROOTMUTEX();
 		}
 		if (error == -1)
 			error = 0;
 		goto out;
 	}
 
 	/*
 	 * Now, we must Recall any delegations.
 	 */
 	error = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
 	if (error) {
 		/*
 		 * nfsrv_cleandeleg() unlocks state for non-zero
 		 * return.
 		 */
 		if (error == -1)
 			goto tryagain;
 		if (haslock) {
 			NFSLOCKV4ROOTMUTEX();
 			nfsv4_unlock(&nfsv4rootfs_lock, 1);
 			NFSUNLOCKV4ROOTMUTEX();
 		}
 		goto out;
 	}
 
 	/*
 	 * Now, look for a conflicting open share.
 	 */
 	if (remove) {
 		/*
 		 * If the entry in the directory was the last reference to the
 		 * corresponding filesystem object, the object can be destroyed
 		 * */
 		if(lfp->lf_usecount>1)
 			LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 				if (stp->ls_flags & NFSLCK_WRITEDENY) {
 					error = NFSERR_FILEOPEN;
 					break;
 				}
 			}
 	}
 
 	NFSUNLOCKSTATE();
 	if (haslock) {
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_unlock(&nfsv4rootfs_lock, 1);
 		NFSUNLOCKV4ROOTMUTEX();
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Clear out all delegations for the file referred to by lfp.
  * May return NFSERR_DELAY, if there will be a delay waiting for
  * delegations to expire.
  * Returns -1 to indicate it slept while recalling a delegation.
  * This function has the side effect of deleting the nfslockfile structure,
  * if it no longer has associated state and didn't have to sleep.
  * Unlocks State before a non-zero value is returned.
  */
 static int
 nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
     struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
 {
 	struct nfsstate *stp, *nstp;
 	int ret = 0;
 
 	stp = LIST_FIRST(&lfp->lf_deleg);
 	while (stp != LIST_END(&lfp->lf_deleg)) {
 		nstp = LIST_NEXT(stp, ls_file);
 		if (stp->ls_clp != clp) {
 			ret = nfsrv_delegconflict(stp, haslockp, p, vp);
 			if (ret) {
 				/*
 				 * nfsrv_delegconflict() unlocks state
 				 * when it returns non-zero.
 				 */
 				goto out;
 			}
 		}
 		stp = nstp;
 	}
 out:
 	NFSEXITCODE(ret);
 	return (ret);
 }
 
 /*
  * There are certain operations that, when being done outside of NFSv4,
  * require that any NFSv4 delegation for the file be recalled.
  * This function is to be called for those cases:
  * VOP_RENAME() - When a delegation is being recalled for any reason,
  *	the client may have to do Opens against the server, using the file's
  *	final component name. If the file has been renamed on the server,
  *	that component name will be incorrect and the Open will fail.
  * VOP_REMOVE() - Theoretically, a client could Open a file after it has
  *	been removed on the server, if there is a delegation issued to
  *	that client for the file. I say "theoretically" since clients
  *	normally do an Access Op before the Open and that Access Op will
  *	fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
  *	they will detect the file's removal in the same manner. (There is
  *	one case where RFC3530 allows a client to do an Open without first
  *	doing an Access Op, which is passage of a check against the ACE
  *	returned with a Write delegation, but current practice is to ignore
  *	the ACE and always do an Access Op.)
  *	Since the functions can only be called with an unlocked vnode, this
  *	can't be done at this time.
  * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
  *	locks locally in the client, which are not visible to the server. To
  *	deal with this, issuing of delegations for a vnode must be disabled
  *	and all delegations for the vnode recalled. This is done via the
  *	second function, using the VV_DISABLEDELEG vflag on the vnode.
  */
 APPLESTATIC void
 nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
 {
 	time_t starttime;
 	int error;
 
 	/*
 	 * First, check to see if the server is currently running and it has
 	 * been called for a regular file when issuing delegations.
 	 */
 	if (newnfs_numnfsd == 0 || vp->v_type != VREG ||
 	    nfsrv_issuedelegs == 0)
 		return;
 
 	KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
 	/*
 	 * First, get a reference on the nfsv4rootfs_lock so that an
 	 * exclusive lock cannot be acquired by another thread.
 	 */
 	NFSLOCKV4ROOTMUTEX();
 	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 	NFSUNLOCKV4ROOTMUTEX();
 
 	/*
 	 * Now, call nfsrv_checkremove() in a loop while it returns
 	 * NFSERR_DELAY. Return upon any other error or when timed out.
 	 */
 	starttime = NFSD_MONOSEC;
 	do {
 		if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
 			error = nfsrv_checkremove(vp, 0, NULL,
 			    (nfsquad_t)((u_quad_t)0), p);
 			NFSVOPUNLOCK(vp);
 		} else
 			error = EPERM;
 		if (error == NFSERR_DELAY) {
 			if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
 				break;
 			/* Sleep for a short period of time */
 			(void) nfs_catnap(PZERO, 0, "nfsremove");
 		}
 	} while (error == NFSERR_DELAY);
 	NFSLOCKV4ROOTMUTEX();
 	nfsv4_relref(&nfsv4rootfs_lock);
 	NFSUNLOCKV4ROOTMUTEX();
 }
 
 APPLESTATIC void
 nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
 {
 
 #ifdef VV_DISABLEDELEG
 	/*
 	 * First, flag issuance of delegations disabled.
 	 */
 	atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
 #endif
 
 	/*
 	 * Then call nfsd_recalldelegation() to get rid of all extant
 	 * delegations.
 	 */
 	nfsd_recalldelegation(vp, p);
 }
 
 /*
  * Check for conflicting locks, etc. and then get rid of delegations.
  * (At one point I thought that I should get rid of delegations for any
  *  Setattr, since it could potentially disallow the I/O op (read or write)
  *  allowed by the delegation. However, Setattr Ops that aren't changing
  *  the size get a stateid of all 0s, so you can't tell if it is a delegation
  *  for the same client or a different one, so I decided to only get rid
  *  of delegations for other clients when the size is being changed.)
  * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
  * as Write backs, even if there is no delegation, so it really isn't any
  * different?)
  */
 APPLESTATIC int
 nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
     nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
     struct nfsexstuff *exp, NFSPROC_T *p)
 {
 	struct nfsstate st, *stp = &st;
 	struct nfslock lo, *lop = &lo;
 	int error = 0;
 	nfsquad_t clientid;
 
 	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
 		stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
 		lop->lo_first = nvap->na_size;
 	} else {
 		stp->ls_flags = 0;
 		lop->lo_first = 0;
 	}
 	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
 		stp->ls_flags |= NFSLCK_SETATTR;
 	if (stp->ls_flags == 0)
 		goto out;
 	lop->lo_end = NFS64BITSSET;
 	lop->lo_flags = NFSLCK_WRITE;
 	stp->ls_ownerlen = 0;
 	stp->ls_op = NULL;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	stp->ls_stateid.seqid = stateidp->seqid;
 	clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
 	clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
 	stp->ls_stateid.other[2] = stateidp->other[2];
 	error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
 	    stateidp, exp, nd, p);
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Check for a write delegation and do a CBGETATTR if there is one, updating
  * the attributes, as required.
  * Should I return an error if I can't get the attributes? (For now, I'll
  * just return ok.
  */
 APPLESTATIC int
 nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
     struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
 {
 	struct nfsstate *stp;
 	struct nfslockfile *lfp;
 	struct nfsclient *clp;
 	struct nfsvattr nva;
 	fhandle_t nfh;
 	int error = 0;
 	nfsattrbit_t cbbits;
 	u_quad_t delegfilerev;
 
 	NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
 	if (!NFSNONZERO_ATTRBIT(&cbbits))
 		goto out;
 	if (nfsrv_writedelegcnt == 0)
 		goto out;
 
 	/*
 	 * Get the lock file structure.
 	 * (A return of -1 means no associated state, so return ok.)
 	 */
 	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
 	NFSLOCKSTATE();
 	if (!error)
 		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
 	if (error) {
 		NFSUNLOCKSTATE();
 		if (error == -1)
 			error = 0;
 		goto out;
 	}
 
 	/*
 	 * Now, look for a write delegation.
 	 */
 	LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 		if (stp->ls_flags & NFSLCK_DELEGWRITE)
 			break;
 	}
 	if (stp == LIST_END(&lfp->lf_deleg)) {
 		NFSUNLOCKSTATE();
 		goto out;
 	}
 	clp = stp->ls_clp;
 	delegfilerev = stp->ls_filerev;
 
 	/*
 	 * If the Write delegation was issued as a part of this Compound RPC
 	 * or if we have an Implied Clientid (used in a previous Op in this
 	 * compound) and it is the client the delegation was issued to,
 	 * just return ok.
 	 * I also assume that it is from the same client iff the network
 	 * host IP address is the same as the callback address. (Not
 	 * exactly correct by the RFC, but avoids a lot of Getattr
 	 * callbacks.)
 	 */
 	if (nd->nd_compref == stp->ls_compref ||
 	    ((nd->nd_flag & ND_IMPLIEDCLID) &&
 	     clp->lc_clientid.qval == nd->nd_clientid.qval) ||
 	     nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
 		NFSUNLOCKSTATE();
 		goto out;
 	}
 
 	/*
 	 * We are now done with the delegation state structure,
 	 * so the statelock can be released and we can now tsleep().
 	 */
 
 	/*
 	 * Now, we must do the CB Getattr callback, to see if Change or Size
 	 * has changed.
 	 */
 	if (clp->lc_expiry >= NFSD_MONOSEC) {
 		NFSUNLOCKSTATE();
 		NFSVNO_ATTRINIT(&nva);
 		nva.na_filerev = NFS64BITSSET;
 		error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
 		    0, &nfh, &nva, &cbbits, 0, p);
 		if (!error) {
 			if ((nva.na_filerev != NFS64BITSSET &&
 			    nva.na_filerev > delegfilerev) ||
 			    (NFSVNO_ISSETSIZE(&nva) &&
 			     nva.na_size != nvap->na_size)) {
 				error = nfsvno_updfilerev(vp, nvap, nd, p);
 				if (NFSVNO_ISSETSIZE(&nva))
 					nvap->na_size = nva.na_size;
 			}
 		} else
 			error = 0;	/* Ignore callback errors for now. */
 	} else {
 		NFSUNLOCKSTATE();
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * This function looks for openowners that haven't had any opens for
  * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
  * is set.
  */
 APPLESTATIC void
 nfsrv_throwawayopens(NFSPROC_T *p)
 {
 	struct nfsclient *clp, *nclp;
 	struct nfsstate *stp, *nstp;
 	int i;
 
 	NFSLOCKSTATE();
 	nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS;
 	/*
 	 * For each client...
 	 */
 	for (i = 0; i < nfsrv_clienthashsize; i++) {
 	    LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
 		LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
 			if (LIST_EMPTY(&stp->ls_open) &&
 			    (stp->ls_noopens > NFSNOOPEN ||
 			     (nfsrv_openpluslock * 2) >
 			     nfsrv_v4statelimit))
 				nfsrv_freeopenowner(stp, 0, p);
 		}
 	    }
 	}
 	NFSUNLOCKSTATE();
 }
 
 /*
  * This function checks to see if the credentials are the same.
  * Returns 1 for not same, 0 otherwise.
  */
 static int
 nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp)
 {
 
 	if (nd->nd_flag & ND_GSS) {
 		if (!(clp->lc_flags & LCL_GSS))
 			return (1);
 		if (clp->lc_flags & LCL_NAME) {
 			if (nd->nd_princlen != clp->lc_namelen ||
 			    NFSBCMP(nd->nd_principal, clp->lc_name,
 				clp->lc_namelen))
 				return (1);
 			else
 				return (0);
 		}
 		if (nd->nd_cred->cr_uid == clp->lc_uid)
 			return (0);
 		else
 			return (1);
 	} else if (clp->lc_flags & LCL_GSS)
 		return (1);
 	/*
 	 * For AUTH_SYS, allow the same uid or root. (This is underspecified
 	 * in RFC3530, which talks about principals, but doesn't say anything
 	 * about uids for AUTH_SYS.)
 	 */
 	if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
 		return (0);
 	else
 		return (1);
 }
 
 /*
  * Calculate the lease expiry time.
  */
 static time_t
 nfsrv_leaseexpiry(void)
 {
 
 	if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC)
 		return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
 	return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
 }
 
 /*
  * Delay the delegation timeout as far as ls_delegtimelimit, as required.
  */
 static void
 nfsrv_delaydelegtimeout(struct nfsstate *stp)
 {
 
 	if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
 		return;
 
 	if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
 	    stp->ls_delegtime < stp->ls_delegtimelimit) {
 		stp->ls_delegtime += nfsrv_lease;
 		if (stp->ls_delegtime > stp->ls_delegtimelimit)
 			stp->ls_delegtime = stp->ls_delegtimelimit;
 	}
 }
 
 /*
  * This function checks to see if there is any other state associated
  * with the openowner for this Open.
  * It returns 1 if there is no other state, 0 otherwise.
  */
 static int
 nfsrv_nootherstate(struct nfsstate *stp)
 {
 	struct nfsstate *tstp;
 
 	LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
 		if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
 			return (0);
 	}
 	return (1);
 }
 
 /*
  * Create a list of lock deltas (changes to local byte range locking
  * that can be rolled back using the list) and apply the changes via
  * nfsvno_advlock(). Optionally, lock the list. It is expected that either
  * the rollback or update function will be called after this.
  * It returns an error (and rolls back, as required), if any nfsvno_advlock()
  * call fails. If it returns an error, it will unlock the list.
  */
 static int
 nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
 {
 	struct nfslock *lop, *nlop;
 	int error = 0;
 
 	/* Loop through the list of locks. */
 	lop = LIST_FIRST(&lfp->lf_locallock);
 	while (first < end && lop != NULL) {
 		nlop = LIST_NEXT(lop, lo_lckowner);
 		if (first >= lop->lo_end) {
 			/* not there yet */
 			lop = nlop;
 		} else if (first < lop->lo_first) {
 			/* new one starts before entry in list */
 			if (end <= lop->lo_first) {
 				/* no overlap between old and new */
 				error = nfsrv_dolocal(vp, lfp, flags,
 				    NFSLCK_UNLOCK, first, end, cfp, p);
 				if (error != 0)
 					break;
 				first = end;
 			} else {
 				/* handle fragment overlapped with new one */
 				error = nfsrv_dolocal(vp, lfp, flags,
 				    NFSLCK_UNLOCK, first, lop->lo_first, cfp,
 				    p);
 				if (error != 0)
 					break;
 				first = lop->lo_first;
 			}
 		} else {
 			/* new one overlaps this entry in list */
 			if (end <= lop->lo_end) {
 				/* overlaps all of new one */
 				error = nfsrv_dolocal(vp, lfp, flags,
 				    lop->lo_flags, first, end, cfp, p);
 				if (error != 0)
 					break;
 				first = end;
 			} else {
 				/* handle fragment overlapped with new one */
 				error = nfsrv_dolocal(vp, lfp, flags,
 				    lop->lo_flags, first, lop->lo_end, cfp, p);
 				if (error != 0)
 					break;
 				first = lop->lo_end;
 				lop = nlop;
 			}
 		}
 	}
 	if (first < end && error == 0)
 		/* handle fragment past end of list */
 		error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
 		    end, cfp, p);
 
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Local lock unlock. Unlock all byte ranges that are no longer locked
  * by NFSv4. To do this, unlock any subranges of first-->end that
  * do not overlap with the byte ranges of any lock in the lfp->lf_lock
  * list. This list has all locks for the file held by other
  * <clientid, lockowner> tuples. The list is ordered by increasing
  * lo_first value, but may have entries that overlap each other, for
  * the case of read locks.
  */
 static void
 nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
     uint64_t init_end, NFSPROC_T *p)
 {
 	struct nfslock *lop;
 	uint64_t first, end, prevfirst __unused;
 
 	first = init_first;
 	end = init_end;
 	while (first < init_end) {
 		/* Loop through all nfs locks, adjusting first and end */
 		prevfirst = 0;
 		LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
 			KASSERT(prevfirst <= lop->lo_first,
 			    ("nfsv4 locks out of order"));
 			KASSERT(lop->lo_first < lop->lo_end,
 			    ("nfsv4 bogus lock"));
 			prevfirst = lop->lo_first;
 			if (first >= lop->lo_first &&
 			    first < lop->lo_end)
 				/*
 				 * Overlaps with initial part, so trim
 				 * off that initial part by moving first past
 				 * it.
 				 */
 				first = lop->lo_end;
 			else if (end > lop->lo_first &&
 			    lop->lo_first > first) {
 				/*
 				 * This lock defines the end of the
 				 * segment to unlock, so set end to the
 				 * start of it and break out of the loop.
 				 */
 				end = lop->lo_first;
 				break;
 			}
 			if (first >= end)
 				/*
 				 * There is no segment left to do, so
 				 * break out of this loop and then exit
 				 * the outer while() since first will be set
 				 * to end, which must equal init_end here.
 				 */
 				break;
 		}
 		if (first < end) {
 			/* Unlock this segment */
 			(void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
 			    NFSLCK_READ, first, end, NULL, p);
 			nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
 			    first, end);
 		}
 		/*
 		 * Now move past this segment and look for any further
 		 * segment in the range, if there is one.
 		 */
 		first = end;
 		end = init_end;
 	}
 }
 
 /*
  * Do the local lock operation and update the rollback list, as required.
  * Perform the rollback and return the error if nfsvno_advlock() fails.
  */
 static int
 nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
 {
 	struct nfsrollback *rlp;
 	int error = 0, ltype, oldltype;
 
 	if (flags & NFSLCK_WRITE)
 		ltype = F_WRLCK;
 	else if (flags & NFSLCK_READ)
 		ltype = F_RDLCK;
 	else
 		ltype = F_UNLCK;
 	if (oldflags & NFSLCK_WRITE)
 		oldltype = F_WRLCK;
 	else if (oldflags & NFSLCK_READ)
 		oldltype = F_RDLCK;
 	else
 		oldltype = F_UNLCK;
 	if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
 		/* nothing to do */
 		goto out;
 	error = nfsvno_advlock(vp, ltype, first, end, p);
 	if (error != 0) {
 		if (cfp != NULL) {
 			cfp->cl_clientid.lval[0] = 0;
 			cfp->cl_clientid.lval[1] = 0;
 			cfp->cl_first = 0;
 			cfp->cl_end = NFS64BITSSET;
 			cfp->cl_flags = NFSLCK_WRITE;
 			cfp->cl_ownerlen = 5;
 			NFSBCOPY("LOCAL", cfp->cl_owner, 5);
 		}
 		nfsrv_locallock_rollback(vp, lfp, p);
 	} else if (ltype != F_UNLCK) {
 		rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
 		    M_WAITOK);
 		rlp->rlck_first = first;
 		rlp->rlck_end = end;
 		rlp->rlck_type = oldltype;
 		LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Roll back local lock changes and free up the rollback list.
  */
 static void
 nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
 {
 	struct nfsrollback *rlp, *nrlp;
 
 	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
 		(void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
 		    rlp->rlck_end, p);
 		free(rlp, M_NFSDROLLBACK);
 	}
 	LIST_INIT(&lfp->lf_rollback);
 }
 
 /*
  * Update local lock list and delete rollback list (ie now committed to the
  * local locks). Most of the work is done by the internal function.
  */
 static void
 nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
     uint64_t end)
 {
 	struct nfsrollback *rlp, *nrlp;
 	struct nfslock *new_lop, *other_lop;
 
 	new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
 	if (flags & (NFSLCK_READ | NFSLCK_WRITE))
 		other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
 		    M_WAITOK);
 	else
 		other_lop = NULL;
 	new_lop->lo_flags = flags;
 	new_lop->lo_first = first;
 	new_lop->lo_end = end;
 	nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
 	if (new_lop != NULL)
 		free(new_lop, M_NFSDLOCK);
 	if (other_lop != NULL)
 		free(other_lop, M_NFSDLOCK);
 
 	/* and get rid of the rollback list */
 	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
 		free(rlp, M_NFSDROLLBACK);
 	LIST_INIT(&lfp->lf_rollback);
 }
 
 /*
  * Lock the struct nfslockfile for local lock updating.
  */
 static void
 nfsrv_locklf(struct nfslockfile *lfp)
 {
 	int gotlock;
 
 	/* lf_usecount ensures *lfp won't be free'd */
 	lfp->lf_usecount++;
 	do {
 		gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
 		    NFSSTATEMUTEXPTR, NULL);
 	} while (gotlock == 0);
 	lfp->lf_usecount--;
 }
 
 /*
  * Unlock the struct nfslockfile after local lock updating.
  */
 static void
 nfsrv_unlocklf(struct nfslockfile *lfp)
 {
 
 	nfsv4_unlock(&lfp->lf_locallock_lck, 0);
 }
 
 /*
  * Clear out all state for the NFSv4 server.
  * Must be called by a thread that can sleep when no nfsds are running.
  */
 void
 nfsrv_throwawayallstate(NFSPROC_T *p)
 {
 	struct nfsclient *clp, *nclp;
 	struct nfslockfile *lfp, *nlfp;
 	int i;
 
 	/*
 	 * For each client, clean out the state and then free the structure.
 	 */
 	for (i = 0; i < nfsrv_clienthashsize; i++) {
 		LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
 			nfsrv_cleanclient(clp, p);
 			nfsrv_freedeleglist(&clp->lc_deleg);
 			nfsrv_freedeleglist(&clp->lc_olddeleg);
 			free(clp->lc_stateid, M_NFSDCLIENT);
 			free(clp, M_NFSDCLIENT);
 		}
 	}
 
 	/*
 	 * Also, free up any remaining lock file structures.
 	 */
 	for (i = 0; i < nfsrv_lockhashsize; i++) {
 		LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) {
 			printf("nfsd unload: fnd a lock file struct\n");
 			nfsrv_freenfslockfile(lfp);
 		}
 	}
 
 	/* And get rid of the deviceid structures and layouts. */
 	nfsrv_freealllayoutsanddevids();
 }
 
 /*
  * Check the sequence# for the session and slot provided as an argument.
  * Also, renew the lease if the session will return NFS_OK.
  */
 int
 nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
     uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
     uint32_t *sflagsp, NFSPROC_T *p)
 {
 	struct nfsdsession *sep;
 	struct nfssessionhash *shp;
 	int error;
 	SVCXPRT *savxprt;
 
 	shp = NFSSESSIONHASH(nd->nd_sessionid);
 	NFSLOCKSESSION(shp);
 	sep = nfsrv_findsession(nd->nd_sessionid);
 	if (sep == NULL) {
 		NFSUNLOCKSESSION(shp);
 		return (NFSERR_BADSESSION);
 	}
 	error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
 	    sep->sess_slots, NULL, NFSV4_SLOTS - 1);
 	if (error != 0) {
 		NFSUNLOCKSESSION(shp);
 		return (error);
 	}
 	if (cache_this != 0)
 		nd->nd_flag |= ND_SAVEREPLY;
 	/* Renew the lease. */
 	sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
 	nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
 	nd->nd_flag |= ND_IMPLIEDCLID;
 
 	/* Save maximum request and reply sizes. */
 	nd->nd_maxreq = sep->sess_maxreq;
 	nd->nd_maxresp = sep->sess_maxresp;
 
 	/*
 	 * If this session handles the backchannel, save the nd_xprt for this
 	 * RPC, since this is the one being used.
 	 * RFC-5661 specifies that the fore channel will be implicitly
 	 * bound by a Sequence operation.  However, since some NFSv4.1 clients
 	 * erroneously assumed that the back channel would be implicitly
 	 * bound as well, do the implicit binding unless a
 	 * BindConnectiontoSession has already been done on the session.
 	 */
 	if (sep->sess_clp->lc_req.nr_client != NULL &&
 	    sep->sess_cbsess.nfsess_xprt != nd->nd_xprt &&
 	    (sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0 &&
 	    (sep->sess_clp->lc_flags & LCL_DONEBINDCONN) == 0) {
 		NFSD_DEBUG(2,
 		    "nfsrv_checksequence: implicit back channel bind\n");
 		savxprt = sep->sess_cbsess.nfsess_xprt;
 		SVC_ACQUIRE(nd->nd_xprt);
 		nd->nd_xprt->xp_p2 =
 		    sep->sess_clp->lc_req.nr_client->cl_private;
 		nd->nd_xprt->xp_idletimeout = 0;	/* Disable timeout. */
 		sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
 		if (savxprt != NULL)
 			SVC_RELEASE(savxprt);
 	}
 
 	*sflagsp = 0;
 	if (sep->sess_clp->lc_req.nr_client == NULL)
 		*sflagsp |= NFSV4SEQ_CBPATHDOWN;
 	NFSUNLOCKSESSION(shp);
 	if (error == NFSERR_EXPIRED) {
 		*sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
 		error = 0;
 	} else if (error == NFSERR_ADMINREVOKED) {
 		*sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
 		error = 0;
 	}
 	*highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
 	return (0);
 }
 
 /*
  * Check/set reclaim complete for this session/clientid.
  */
 int
 nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd, int onefs)
 {
 	struct nfsdsession *sep;
 	struct nfssessionhash *shp;
 	int error = 0;
 
 	shp = NFSSESSIONHASH(nd->nd_sessionid);
 	NFSLOCKSTATE();
 	NFSLOCKSESSION(shp);
 	sep = nfsrv_findsession(nd->nd_sessionid);
 	if (sep == NULL) {
 		NFSUNLOCKSESSION(shp);
 		NFSUNLOCKSTATE();
 		return (NFSERR_BADSESSION);
 	}
 
 	if (onefs != 0)
 		sep->sess_clp->lc_flags |= LCL_RECLAIMONEFS;
 		/* Check to see if reclaim complete has already happened. */
 	else if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
 		error = NFSERR_COMPLETEALREADY;
 	else {
 		sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
 		nfsrv_markreclaim(sep->sess_clp);
 	}
 	NFSUNLOCKSESSION(shp);
 	NFSUNLOCKSTATE();
 	return (error);
 }
 
 /*
  * Cache the reply in a session slot.
  */
 void
 nfsrv_cache_session(uint8_t *sessionid, uint32_t slotid, int repstat,
    struct mbuf **m)
 {
 	struct nfsdsession *sep;
 	struct nfssessionhash *shp;
 
 	shp = NFSSESSIONHASH(sessionid);
 	NFSLOCKSESSION(shp);
 	sep = nfsrv_findsession(sessionid);
 	if (sep == NULL) {
 		NFSUNLOCKSESSION(shp);
 		printf("nfsrv_cache_session: no session\n");
 		m_freem(*m);
 		return;
 	}
 	nfsv4_seqsess_cacherep(slotid, sep->sess_slots, repstat, m);
 	NFSUNLOCKSESSION(shp);
 }
 
 /*
  * Search for a session that matches the sessionid.
  */
 static struct nfsdsession *
 nfsrv_findsession(uint8_t *sessionid)
 {
 	struct nfsdsession *sep;
 	struct nfssessionhash *shp;
 
 	shp = NFSSESSIONHASH(sessionid);
 	LIST_FOREACH(sep, &shp->list, sess_hash) {
 		if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
 			break;
 	}
 	return (sep);
 }
 
 /*
  * Destroy a session.
  */
 int
 nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
 {
 	int error, igotlock, samesess;
 
 	samesess = 0;
 	if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID) &&
 	    (nd->nd_flag & ND_HASSEQUENCE) != 0) {
 		samesess = 1;
 		if ((nd->nd_flag & ND_LASTOP) == 0)
 			return (NFSERR_BADSESSION);
 	}
 
 	/* Lock out other nfsd threads */
 	NFSLOCKV4ROOTMUTEX();
 	nfsv4_relref(&nfsv4rootfs_lock);
 	do {
 		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 		    NFSV4ROOTLOCKMUTEXPTR, NULL);
 	} while (igotlock == 0);
 	NFSUNLOCKV4ROOTMUTEX();
 
 	error = nfsrv_freesession(NULL, sessionid);
 	if (error == 0 && samesess != 0)
 		nd->nd_flag &= ~ND_HASSEQUENCE;
 
 	NFSLOCKV4ROOTMUTEX();
 	nfsv4_unlock(&nfsv4rootfs_lock, 1);
 	NFSUNLOCKV4ROOTMUTEX();
 	return (error);
 }
 
 /*
  * Bind a connection to a session.
  * For now, only certain variants are supported, since the current session
  * structure can only handle a single backchannel entry, which will be
  * applied to all connections if it is set.
  */
 int
 nfsrv_bindconnsess(struct nfsrv_descript *nd, uint8_t *sessionid, int *foreaftp)
 {
 	struct nfssessionhash *shp;
 	struct nfsdsession *sep;
 	struct nfsclient *clp;
 	SVCXPRT *savxprt;
 	int error;
 
 	error = 0;
 	shp = NFSSESSIONHASH(sessionid);
 	NFSLOCKSTATE();
 	NFSLOCKSESSION(shp);
 	sep = nfsrv_findsession(sessionid);
 	if (sep != NULL) {
 		clp = sep->sess_clp;
 		if (*foreaftp == NFSCDFC4_BACK ||
 		    *foreaftp == NFSCDFC4_BACK_OR_BOTH ||
 		    *foreaftp == NFSCDFC4_FORE_OR_BOTH) {
 			/* Try to set up a backchannel. */
 			if (clp->lc_req.nr_client == NULL) {
 				NFSD_DEBUG(2, "nfsrv_bindconnsess: acquire "
 				    "backchannel\n");
 				clp->lc_req.nr_client = (struct __rpc_client *)
 				    clnt_bck_create(nd->nd_xprt->xp_socket,
 				    sep->sess_cbprogram, NFSV4_CBVERS);
 			}
 			if (clp->lc_req.nr_client != NULL) {
 				NFSD_DEBUG(2, "nfsrv_bindconnsess: set up "
 				    "backchannel\n");
 				savxprt = sep->sess_cbsess.nfsess_xprt;
 				SVC_ACQUIRE(nd->nd_xprt);
 				nd->nd_xprt->xp_p2 =
 				    clp->lc_req.nr_client->cl_private;
 				/* Disable idle timeout. */
 				nd->nd_xprt->xp_idletimeout = 0;
 				sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
 				if (savxprt != NULL)
 					SVC_RELEASE(savxprt);
 				sep->sess_crflags |= NFSV4CRSESS_CONNBACKCHAN;
 				clp->lc_flags |= LCL_DONEBINDCONN;
 				if (*foreaftp == NFSCDFS4_BACK)
 					*foreaftp = NFSCDFS4_BACK;
 				else
 					*foreaftp = NFSCDFS4_BOTH;
 			} else if (*foreaftp != NFSCDFC4_BACK) {
 				NFSD_DEBUG(2, "nfsrv_bindconnsess: can't set "
 				    "up backchannel\n");
 				sep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
 				clp->lc_flags |= LCL_DONEBINDCONN;
 				*foreaftp = NFSCDFS4_FORE;
 			} else {
 				error = NFSERR_NOTSUPP;
 				printf("nfsrv_bindconnsess: Can't add "
 				    "backchannel\n");
 			}
 		} else {
 			NFSD_DEBUG(2, "nfsrv_bindconnsess: Set forechannel\n");
 			clp->lc_flags |= LCL_DONEBINDCONN;
 			*foreaftp = NFSCDFS4_FORE;
 		}
 	} else
 		error = NFSERR_BADSESSION;
 	NFSUNLOCKSESSION(shp);
 	NFSUNLOCKSTATE();
 	return (error);
 }
 
 /*
  * Free up a session structure.
  */
 static int
 nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid)
 {
 	struct nfssessionhash *shp;
 	int i;
 
 	NFSLOCKSTATE();
 	if (sep == NULL) {
 		shp = NFSSESSIONHASH(sessionid);
 		NFSLOCKSESSION(shp);
 		sep = nfsrv_findsession(sessionid);
 	} else {
 		shp = NFSSESSIONHASH(sep->sess_sessionid);
 		NFSLOCKSESSION(shp);
 	}
 	if (sep != NULL) {
 		sep->sess_refcnt--;
 		if (sep->sess_refcnt > 0) {
 			NFSUNLOCKSESSION(shp);
 			NFSUNLOCKSTATE();
 			return (NFSERR_BACKCHANBUSY);
 		}
 		LIST_REMOVE(sep, sess_hash);
 		LIST_REMOVE(sep, sess_list);
 	}
 	NFSUNLOCKSESSION(shp);
 	NFSUNLOCKSTATE();
 	if (sep == NULL)
 		return (NFSERR_BADSESSION);
 	for (i = 0; i < NFSV4_SLOTS; i++)
 		if (sep->sess_slots[i].nfssl_reply != NULL)
 			m_freem(sep->sess_slots[i].nfssl_reply);
 	if (sep->sess_cbsess.nfsess_xprt != NULL)
 		SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
 	free(sep, M_NFSDSESSION);
 	return (0);
 }
 
 /*
  * Free a stateid.
  * RFC5661 says that it should fail when there are associated opens, locks
  * or delegations. Since stateids represent opens, I don't see how you can
  * free an open stateid (it will be free'd when closed), so this function
  * only works for lock stateids (freeing the lock_owner) or delegations.
  */
 int
 nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
     NFSPROC_T *p)
 {
 	struct nfsclient *clp;
 	struct nfsstate *stp;
 	int error;
 
 	NFSLOCKSTATE();
 	/*
 	 * Look up the stateid
 	 */
 	error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
 	    NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 	if (error == 0) {
 		/* First, check for a delegation. */
 		LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
 			if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
 			    NFSX_STATEIDOTHER))
 				break;
 		}
 		if (stp != NULL) {
 			nfsrv_freedeleg(stp);
 			NFSUNLOCKSTATE();
 			return (error);
 		}
 	}
 	/* Not a delegation, try for a lock_owner. */
 	if (error == 0)
 		error = nfsrv_getstate(clp, stateidp, 0, &stp);
 	if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
 	    NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
 		/* Not a lock_owner stateid. */
 		error = NFSERR_LOCKSHELD;
 	if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
 		error = NFSERR_LOCKSHELD;
 	if (error == 0)
 		nfsrv_freelockowner(stp, NULL, 0, p);
 	NFSUNLOCKSTATE();
 	return (error);
 }
 
 /*
  * Test a stateid.
  */
 int
 nfsrv_teststateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
     NFSPROC_T *p)
 {
 	struct nfsclient *clp;
 	struct nfsstate *stp;
 	int error;
 
 	NFSLOCKSTATE();
 	/*
 	 * Look up the stateid
 	 */
 	error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
 	    NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 	if (error == 0)
 		error = nfsrv_getstate(clp, stateidp, 0, &stp);
 	if (error == 0 && stateidp->seqid != 0 &&
 	    SEQ_LT(stateidp->seqid, stp->ls_stateid.seqid))
 		error = NFSERR_OLDSTATEID;
 	NFSUNLOCKSTATE();
 	return (error);
 }
 
 /*
  * Generate the xdr for an NFSv4.1 CBSequence Operation.
  */
 static int
 nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
     int dont_replycache, struct nfsdsession **sepp)
 {
 	struct nfsdsession *sep;
 	uint32_t *tl, slotseq = 0;
 	int maxslot, slotpos;
 	uint8_t sessionid[NFSX_V4SESSIONID];
 	int error;
 
 	error = nfsv4_getcbsession(clp, sepp);
 	if (error != 0)
 		return (error);
 	sep = *sepp;
 	(void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, &slotpos, &maxslot,
 	    &slotseq, sessionid);
 	KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
 
 	/* Build the Sequence arguments. */
 	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
 	bcopy(sessionid, tl, NFSX_V4SESSIONID);
 	tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
 	nd->nd_slotseq = tl;
 	*tl++ = txdr_unsigned(slotseq);
 	*tl++ = txdr_unsigned(slotpos);
 	*tl++ = txdr_unsigned(maxslot);
 	if (dont_replycache == 0)
 		*tl++ = newnfs_true;
 	else
 		*tl++ = newnfs_false;
 	*tl = 0;			/* No referring call list, for now. */
 	nd->nd_flag |= ND_HASSEQUENCE;
 	return (0);
 }
 
 /*
  * Get a session for the callback.
  */
 static int
 nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
 {
 	struct nfsdsession *sep;
 
 	NFSLOCKSTATE();
 	LIST_FOREACH(sep, &clp->lc_session, sess_list) {
 		if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
 			break;
 	}
 	if (sep == NULL) {
 		NFSUNLOCKSTATE();
 		return (NFSERR_BADSESSION);
 	}
 	sep->sess_refcnt++;
 	*sepp = sep;
 	NFSUNLOCKSTATE();
 	return (0);
 }
 
 /*
  * Free up all backchannel xprts.  This needs to be done when the nfsd threads
  * exit, since those transports will all be going away.
  * This is only called after all the nfsd threads are done performing RPCs,
  * so locking shouldn't be an issue.
  */
 APPLESTATIC void
 nfsrv_freeallbackchannel_xprts(void)
 {
 	struct nfsdsession *sep;
 	struct nfsclient *clp;
 	SVCXPRT *xprt;
 	int i;
 
 	for (i = 0; i < nfsrv_clienthashsize; i++) {
 		LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
 			LIST_FOREACH(sep, &clp->lc_session, sess_list) {
 				xprt = sep->sess_cbsess.nfsess_xprt;
 				sep->sess_cbsess.nfsess_xprt = NULL;
 				if (xprt != NULL)
 					SVC_RELEASE(xprt);
 			}
 		}
 	}
 }
 
 /*
  * Do a layout commit.  Actually just call nfsrv_updatemdsattr().
  * I have no idea if the rest of these arguments will ever be useful?
  */
 int
 nfsrv_layoutcommit(struct nfsrv_descript *nd, vnode_t vp, int layouttype,
     int hasnewoff, uint64_t newoff, uint64_t offset, uint64_t len,
     int hasnewmtime, struct timespec *newmtimep, int reclaim,
     nfsv4stateid_t *stateidp, int maxcnt, char *layp, int *hasnewsizep,
     uint64_t *newsizep, struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfsvattr na;
 	int error;
 
 	error = nfsrv_updatemdsattr(vp, &na, p);
 	if (error == 0) {
 		*hasnewsizep = 1;
 		*newsizep = na.na_size;
 	}
 	return (error);
 }
 
 /*
  * Try and get a layout.
  */
 int
 nfsrv_layoutget(struct nfsrv_descript *nd, vnode_t vp, struct nfsexstuff *exp,
     int layouttype, int *iomode, uint64_t *offset, uint64_t *len,
     uint64_t minlen, nfsv4stateid_t *stateidp, int maxcnt, int *retonclose,
     int *layoutlenp, char *layp, struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfslayouthash *lhyp;
 	struct nfslayout *lyp;
 	char *devid;
 	fhandle_t fh, *dsfhp;
 	int error, mirrorcnt;
 
 	if (nfsrv_devidcnt == 0)
 		return (NFSERR_UNKNLAYOUTTYPE);
 
 	if (*offset != 0)
 		printf("nfsrv_layoutget: off=%ju len=%ju\n", (uintmax_t)*offset,
 		    (uintmax_t)*len);
 	error = nfsvno_getfh(vp, &fh, p);
 	NFSD_DEBUG(4, "layoutget getfh=%d\n", error);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * For now, all layouts are for entire files.
 	 * Only issue Read/Write layouts if requested for a non-readonly fs.
 	 */
 	if (NFSVNO_EXRDONLY(exp)) {
 		if (*iomode == NFSLAYOUTIOMODE_RW)
 			return (NFSERR_LAYOUTTRYLATER);
 		*iomode = NFSLAYOUTIOMODE_READ;
 	}
 	if (*iomode != NFSLAYOUTIOMODE_RW)
 		*iomode = NFSLAYOUTIOMODE_READ;
 
 	/*
 	 * Check to see if a write layout can be issued for this file.
 	 * This is used during mirror recovery to avoid RW layouts being
 	 * issued for a file while it is being copied to the recovered
 	 * mirror.
 	 */
 	if (*iomode == NFSLAYOUTIOMODE_RW && nfsrv_dontlayout(&fh) != 0)
 		return (NFSERR_LAYOUTTRYLATER);
 
 	*retonclose = 0;
 	*offset = 0;
 	*len = UINT64_MAX;
 
 	/* First, see if a layout already exists and return if found. */
 	lhyp = NFSLAYOUTHASH(&fh);
 	NFSLOCKLAYOUT(lhyp);
 	error = nfsrv_findlayout(&nd->nd_clientid, &fh, layouttype, p, &lyp);
 	NFSD_DEBUG(4, "layoutget findlay=%d\n", error);
 	/*
 	 * Not sure if the seqid must be the same, so I won't check it.
 	 */
 	if (error == 0 && (stateidp->other[0] != lyp->lay_stateid.other[0] ||
 	    stateidp->other[1] != lyp->lay_stateid.other[1] ||
 	    stateidp->other[2] != lyp->lay_stateid.other[2])) {
 		if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
 			NFSUNLOCKLAYOUT(lhyp);
 			NFSD_DEBUG(1, "ret bad stateid\n");
 			return (NFSERR_BADSTATEID);
 		}
 		/*
 		 * I believe we get here because there is a race between
 		 * the client processing the CBLAYOUTRECALL and the layout
 		 * being deleted here on the server.
 		 * The client has now done a LayoutGet with a non-layout
 		 * stateid, as it would when there is no layout.
 		 * As such, free this layout and set error == NFSERR_BADSTATEID
 		 * so the code below will create a new layout structure as
 		 * would happen if no layout was found.
 		 * "lyp" will be set before being used below, but set it NULL
 		 * as a safety belt.
 		 */
 		nfsrv_freelayout(&lhyp->list, lyp);
 		lyp = NULL;
 		error = NFSERR_BADSTATEID;
 	}
 	if (error == 0) {
 		if (lyp->lay_layoutlen > maxcnt) {
 			NFSUNLOCKLAYOUT(lhyp);
 			NFSD_DEBUG(1, "ret layout too small\n");
 			return (NFSERR_TOOSMALL);
 		}
 		if (*iomode == NFSLAYOUTIOMODE_RW)
 			lyp->lay_flags |= NFSLAY_RW;
 		else
 			lyp->lay_flags |= NFSLAY_READ;
 		NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
 		*layoutlenp = lyp->lay_layoutlen;
 		if (++lyp->lay_stateid.seqid == 0)
 			lyp->lay_stateid.seqid = 1;
 		stateidp->seqid = lyp->lay_stateid.seqid;
 		NFSUNLOCKLAYOUT(lhyp);
 		NFSD_DEBUG(4, "ret fnd layout\n");
 		return (0);
 	}
 	NFSUNLOCKLAYOUT(lhyp);
 
 	/* Find the device id and file handle. */
 	dsfhp = malloc(sizeof(fhandle_t) * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
 	devid = malloc(NFSX_V4DEVICEID * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
 	error = nfsrv_dsgetdevandfh(vp, p, &mirrorcnt, dsfhp, devid);
 	NFSD_DEBUG(4, "layoutget devandfh=%d\n", error);
 	if (error == 0) {
 		if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
 			if (NFSX_V4FILELAYOUT > maxcnt)
 				error = NFSERR_TOOSMALL;
 			else
 				lyp = nfsrv_filelayout(nd, *iomode, &fh, dsfhp,
 				    devid, vp->v_mount->mnt_stat.f_fsid);
 		} else {
 			if (NFSX_V4FLEXLAYOUT(mirrorcnt) > maxcnt)
 				error = NFSERR_TOOSMALL;
 			else
 				lyp = nfsrv_flexlayout(nd, *iomode, mirrorcnt,
 				    &fh, dsfhp, devid,
 				    vp->v_mount->mnt_stat.f_fsid);
 		}
 	}
 	free(dsfhp, M_TEMP);
 	free(devid, M_TEMP);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Now, add this layout to the list.
 	 */
 	error = nfsrv_addlayout(nd, &lyp, stateidp, layp, layoutlenp, p);
 	NFSD_DEBUG(4, "layoutget addl=%d\n", error);
 	/*
 	 * The lyp will be set to NULL by nfsrv_addlayout() if it
 	 * linked the new structure into the lists.
 	 */
 	free(lyp, M_NFSDSTATE);
 	return (error);
 }
 
 /*
  * Generate a File Layout.
  */
 static struct nfslayout *
 nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp,
     fhandle_t *dsfhp, char *devid, fsid_t fs)
 {
 	uint32_t *tl;
 	struct nfslayout *lyp;
 	uint64_t pattern_offset;
 
 	lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FILELAYOUT, M_NFSDSTATE,
 	    M_WAITOK | M_ZERO);
 	lyp->lay_type = NFSLAYOUT_NFSV4_1_FILES;
 	if (iomode == NFSLAYOUTIOMODE_RW)
 		lyp->lay_flags = NFSLAY_RW;
 	else
 		lyp->lay_flags = NFSLAY_READ;
 	NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
 	lyp->lay_clientid.qval = nd->nd_clientid.qval;
 	lyp->lay_fsid = fs;
 
 	/* Fill in the xdr for the files layout. */
 	tl = (uint32_t *)lyp->lay_xdr;
 	NFSBCOPY(devid, tl, NFSX_V4DEVICEID);		/* Device ID. */
 	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 
 	/*
 	 * Make the stripe size as many 64K blocks as will fit in the stripe
 	 * mask. Since there is only one stripe, the stripe size doesn't really
 	 * matter, except that the Linux client will only handle an exact
 	 * multiple of their PAGE_SIZE (usually 4K).  I chose 64K as a value
 	 * that should cover most/all arches w.r.t. PAGE_SIZE.
 	 */
 	*tl++ = txdr_unsigned(NFSFLAYUTIL_STRIPE_MASK & ~0xffff);
 	*tl++ = 0;					/* 1st stripe index. */
 	pattern_offset = 0;
 	txdr_hyper(pattern_offset, tl); tl += 2;	/* Pattern offset. */
 	*tl++ = txdr_unsigned(1);			/* 1 file handle. */
 	*tl++ = txdr_unsigned(NFSX_V4PNFSFH);
 	NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
 	lyp->lay_layoutlen = NFSX_V4FILELAYOUT;
 	return (lyp);
 }
 
 #define	FLEX_OWNERID	"999"
 #define	FLEX_UID0	"0"
 /*
  * Generate a Flex File Layout.
  * The FLEX_OWNERID can be any string of 3 decimal digits. Although this
  * string goes on the wire, it isn't supposed to be used by the client,
  * since this server uses tight coupling.
  * Although not recommended by the spec., if vfs.nfsd.flexlinuxhack=1 use
  * a string of "0". This works around the Linux Flex File Layout driver bug
  * which uses the synthetic uid/gid strings for the "tightly coupled" case.
  */
 static struct nfslayout *
 nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt,
     fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs)
 {
 	uint32_t *tl;
 	struct nfslayout *lyp;
 	uint64_t lenval;
 	int i;
 
 	lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FLEXLAYOUT(mirrorcnt),
 	    M_NFSDSTATE, M_WAITOK | M_ZERO);
 	lyp->lay_type = NFSLAYOUT_FLEXFILE;
 	if (iomode == NFSLAYOUTIOMODE_RW)
 		lyp->lay_flags = NFSLAY_RW;
 	else
 		lyp->lay_flags = NFSLAY_READ;
 	NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
 	lyp->lay_clientid.qval = nd->nd_clientid.qval;
 	lyp->lay_fsid = fs;
 	lyp->lay_mirrorcnt = mirrorcnt;
 
 	/* Fill in the xdr for the files layout. */
 	tl = (uint32_t *)lyp->lay_xdr;
 	lenval = 0;
 	txdr_hyper(lenval, tl); tl += 2;		/* Stripe unit. */
 	*tl++ = txdr_unsigned(mirrorcnt);		/* # of mirrors. */
 	for (i = 0; i < mirrorcnt; i++) {
 		*tl++ = txdr_unsigned(1);		/* One stripe. */
 		NFSBCOPY(devid, tl, NFSX_V4DEVICEID);	/* Device ID. */
 		tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 		devid += NFSX_V4DEVICEID;
 		*tl++ = txdr_unsigned(1);		/* Efficiency. */
 		*tl++ = 0;				/* Proxy Stateid. */
 		*tl++ = 0x55555555;
 		*tl++ = 0x55555555;
 		*tl++ = 0x55555555;
 		*tl++ = txdr_unsigned(1);		/* 1 file handle. */
 		*tl++ = txdr_unsigned(NFSX_V4PNFSFH);
 		NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
 		tl += (NFSM_RNDUP(NFSX_V4PNFSFH) / NFSX_UNSIGNED);
 		dsfhp++;
 		if (nfsrv_flexlinuxhack != 0) {
 			*tl++ = txdr_unsigned(strlen(FLEX_UID0));
 			*tl = 0;		/* 0 pad string. */
 			NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
 			*tl++ = txdr_unsigned(strlen(FLEX_UID0));
 			*tl = 0;		/* 0 pad string. */
 			NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
 		} else {
 			*tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
 			NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
 			NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
 		}
 	}
 	*tl++ = txdr_unsigned(0);		/* ff_flags. */
 	*tl = txdr_unsigned(60);		/* Status interval hint. */
 	lyp->lay_layoutlen = NFSX_V4FLEXLAYOUT(mirrorcnt);
 	return (lyp);
 }
 
 /*
  * Parse and process Flex File errors returned via LayoutReturn.
  */
 static void
 nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt,
     NFSPROC_T *p)
 {
 	uint32_t *tl;
 	int cnt, errcnt, i, j, opnum, stat;
 	char devid[NFSX_V4DEVICEID];
 
 	tl = layp;
 	cnt = fxdr_unsigned(int, *tl++);
 	NFSD_DEBUG(4, "flexlayouterr cnt=%d\n", cnt);
 	for (i = 0; i < cnt; i++) {
 		/* Skip offset, length and stateid for now. */
 		tl += (4 + NFSX_STATEID / NFSX_UNSIGNED);
 		errcnt = fxdr_unsigned(int, *tl++);
 		NFSD_DEBUG(4, "flexlayouterr errcnt=%d\n", errcnt);
 		for (j = 0; j < errcnt; j++) {
 			NFSBCOPY(tl, devid, NFSX_V4DEVICEID);
 			tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 			stat = fxdr_unsigned(int, *tl++);
 			opnum = fxdr_unsigned(int, *tl++);
 			NFSD_DEBUG(4, "flexlayouterr op=%d stat=%d\n", opnum,
 			    stat);
 			/*
 			 * Except for NFSERR_ACCES and NFSERR_STALE errors,
 			 * disable the mirror.
 			 */
 			if (stat != NFSERR_ACCES && stat != NFSERR_STALE)
 				nfsrv_delds(devid, p);
 		}
 	}
 }
 
 /*
  * This function removes all flex file layouts which has a mirror with
  * a device id that matches the argument.
  * Called when the DS represented by the device id has failed.
  */
 void
 nfsrv_flexmirrordel(char *devid, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfslayout *lyp, *nlyp;
 	struct nfslayouthash *lhyp;
 	struct nfslayouthead loclyp;
 	int i, j;
 
 	NFSD_DEBUG(4, "flexmirrordel\n");
 	/* Move all layouts found onto a local list. */
 	TAILQ_INIT(&loclyp);
 	for (i = 0; i < nfsrv_layouthashsize; i++) {
 		lhyp = &nfslayouthash[i];
 		NFSLOCKLAYOUT(lhyp);
 		TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 			if (lyp->lay_type == NFSLAYOUT_FLEXFILE &&
 			    lyp->lay_mirrorcnt > 1) {
 				NFSD_DEBUG(4, "possible match\n");
 				tl = lyp->lay_xdr;
 				tl += 3;
 				for (j = 0; j < lyp->lay_mirrorcnt; j++) {
 					tl++;
 					if (NFSBCMP(devid, tl, NFSX_V4DEVICEID)
 					    == 0) {
 						/* Found one. */
 						NFSD_DEBUG(4, "fnd one\n");
 						TAILQ_REMOVE(&lhyp->list, lyp,
 						    lay_list);
 						TAILQ_INSERT_HEAD(&loclyp, lyp,
 						    lay_list);
 						break;
 					}
 					tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED +
 					    NFSM_RNDUP(NFSX_V4PNFSFH) /
 					    NFSX_UNSIGNED + 11 * NFSX_UNSIGNED);
 				}
 			}
 		}
 		NFSUNLOCKLAYOUT(lhyp);
 	}
 
 	/* Now, try to do a Layout recall for each one found. */
 	TAILQ_FOREACH_SAFE(lyp, &loclyp, lay_list, nlyp) {
 		NFSD_DEBUG(4, "do layout recall\n");
 		/*
 		 * The layout stateid.seqid needs to be incremented
 		 * before doing a LAYOUT_RECALL callback.
 		 */
 		if (++lyp->lay_stateid.seqid == 0)
 			lyp->lay_stateid.seqid = 1;
 		nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
 		    &lyp->lay_fh, lyp, 1, lyp->lay_type, p);
 		nfsrv_freelayout(&loclyp, lyp);
 	}
 }
 
 /*
  * Do a recall callback to the client for this layout.
  */
 static int
 nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, fhandle_t *fhp,
     struct nfslayout *lyp, int changed, int laytype, NFSPROC_T *p)
 {
 	struct nfsclient *clp;
 	int error;
 
 	NFSD_DEBUG(4, "nfsrv_recalllayout\n");
 	error = nfsrv_getclient(clid, 0, &clp, NULL, (nfsquad_t)((u_quad_t)0),
 	    0, NULL, p);
 	NFSD_DEBUG(4, "aft nfsrv_getclient=%d\n", error);
 	if (error != 0) {
 		printf("nfsrv_recalllayout: getclient err=%d\n", error);
 		return (error);
 	}
 	if ((clp->lc_flags & LCL_NFSV41) != 0) {
 		error = nfsrv_docallback(clp, NFSV4OP_CBLAYOUTRECALL,
 		    stateidp, changed, fhp, NULL, NULL, laytype, p);
 		/* If lyp != NULL, handle an error return here. */
 		if (error != 0 && lyp != NULL) {
 			NFSDRECALLLOCK();
 			/*
 			 * Mark it returned, since no layout recall
 			 * has been done.
 			 * All errors seem to be non-recoverable, although
 			 * NFSERR_NOMATCHLAYOUT is a normal event.
 			 */
 			if ((lyp->lay_flags & NFSLAY_RECALL) != 0) {
 				lyp->lay_flags |= NFSLAY_RETURNED;
 				wakeup(lyp);
 			}
 			NFSDRECALLUNLOCK();
 			if (error != NFSERR_NOMATCHLAYOUT)
 				printf("nfsrv_recalllayout: err=%d\n", error);
 		}
 	} else
 		printf("nfsrv_recalllayout: clp not NFSv4.1\n");
 	return (error);
 }
 
 /*
  * Find a layout to recall when we exceed our high water mark.
  */
 void
 nfsrv_recalloldlayout(NFSPROC_T *p)
 {
 	struct nfslayouthash *lhyp;
 	struct nfslayout *lyp;
 	nfsquad_t clientid;
 	nfsv4stateid_t stateid;
 	fhandle_t fh;
 	int error, laytype = 0, ret;
 
 	lhyp = &nfslayouthash[arc4random() % nfsrv_layouthashsize];
 	NFSLOCKLAYOUT(lhyp);
 	TAILQ_FOREACH_REVERSE(lyp, &lhyp->list, nfslayouthead, lay_list) {
 		if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
 			lyp->lay_flags |= NFSLAY_CALLB;
 			/*
 			 * The layout stateid.seqid needs to be incremented
 			 * before doing a LAYOUT_RECALL callback.
 			 */
 			if (++lyp->lay_stateid.seqid == 0)
 				lyp->lay_stateid.seqid = 1;
 			clientid = lyp->lay_clientid;
 			stateid = lyp->lay_stateid;
 			NFSBCOPY(&lyp->lay_fh, &fh, sizeof(fh));
 			laytype = lyp->lay_type;
 			break;
 		}
 	}
 	NFSUNLOCKLAYOUT(lhyp);
 	if (lyp != NULL) {
 		error = nfsrv_recalllayout(clientid, &stateid, &fh, NULL, 0,
 		    laytype, p);
 		if (error != 0 && error != NFSERR_NOMATCHLAYOUT)
 			NFSD_DEBUG(4, "recallold=%d\n", error);
 		if (error != 0) {
 			NFSLOCKLAYOUT(lhyp);
 			/*
 			 * Since the hash list was unlocked, we need to
 			 * find it again.
 			 */
 			ret = nfsrv_findlayout(&clientid, &fh, laytype, p,
 			    &lyp);
 			if (ret == 0 &&
 			    (lyp->lay_flags & NFSLAY_CALLB) != 0 &&
 			    lyp->lay_stateid.other[0] == stateid.other[0] &&
 			    lyp->lay_stateid.other[1] == stateid.other[1] &&
 			    lyp->lay_stateid.other[2] == stateid.other[2]) {
 				/*
 				 * The client no longer knows this layout, so
 				 * it can be free'd now.
 				 */
 				if (error == NFSERR_NOMATCHLAYOUT)
 					nfsrv_freelayout(&lhyp->list, lyp);
 				else {
 					/*
 					 * Leave it to be tried later by
 					 * clearing NFSLAY_CALLB and moving
 					 * it to the head of the list, so it
 					 * won't be tried again for a while.
 					 */
 					lyp->lay_flags &= ~NFSLAY_CALLB;
 					TAILQ_REMOVE(&lhyp->list, lyp,
 					    lay_list);
 					TAILQ_INSERT_HEAD(&lhyp->list, lyp,
 					    lay_list);
 				}
 			}
 			NFSUNLOCKLAYOUT(lhyp);
 		}
 	}
 }
 
 /*
  * Try and return layout(s).
  */
 int
 nfsrv_layoutreturn(struct nfsrv_descript *nd, vnode_t vp,
     int layouttype, int iomode, uint64_t offset, uint64_t len, int reclaim,
     int kind, nfsv4stateid_t *stateidp, int maxcnt, uint32_t *layp, int *fndp,
     struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfsvattr na;
 	struct nfslayouthash *lhyp;
 	struct nfslayout *lyp;
 	fhandle_t fh;
 	int error = 0;
 
 	*fndp = 0;
 	if (kind == NFSV4LAYOUTRET_FILE) {
 		error = nfsvno_getfh(vp, &fh, p);
 		if (error == 0) {
 			error = nfsrv_updatemdsattr(vp, &na, p);
 			if (error != 0)
 				printf("nfsrv_layoutreturn: updatemdsattr"
 				    " failed=%d\n", error);
 		}
 		if (error == 0) {
 			if (reclaim == newnfs_true) {
 				error = nfsrv_checkgrace(NULL, NULL,
 				    NFSLCK_RECLAIM);
 				if (error != NFSERR_NOGRACE)
 					error = 0;
 				return (error);
 			}
 			lhyp = NFSLAYOUTHASH(&fh);
 			NFSDRECALLLOCK();
 			NFSLOCKLAYOUT(lhyp);
 			error = nfsrv_findlayout(&nd->nd_clientid, &fh,
 			    layouttype, p, &lyp);
 			NFSD_DEBUG(4, "layoutret findlay=%d\n", error);
 			if (error == 0 &&
 			    stateidp->other[0] == lyp->lay_stateid.other[0] &&
 			    stateidp->other[1] == lyp->lay_stateid.other[1] &&
 			    stateidp->other[2] == lyp->lay_stateid.other[2]) {
 				NFSD_DEBUG(4, "nfsrv_layoutreturn: stateid %d"
 				    " %x %x %x laystateid %d %x %x %x"
 				    " off=%ju len=%ju flgs=0x%x\n",
 				    stateidp->seqid, stateidp->other[0],
 				    stateidp->other[1], stateidp->other[2],
 				    lyp->lay_stateid.seqid,
 				    lyp->lay_stateid.other[0],
 				    lyp->lay_stateid.other[1],
 				    lyp->lay_stateid.other[2],
 				    (uintmax_t)offset, (uintmax_t)len,
 				    lyp->lay_flags);
 				if (++lyp->lay_stateid.seqid == 0)
 					lyp->lay_stateid.seqid = 1;
 				stateidp->seqid = lyp->lay_stateid.seqid;
 				if (offset == 0 && len == UINT64_MAX) {
 					if ((iomode & NFSLAYOUTIOMODE_READ) !=
 					    0)
 						lyp->lay_flags &= ~NFSLAY_READ;
 					if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
 						lyp->lay_flags &= ~NFSLAY_RW;
 					if ((lyp->lay_flags & (NFSLAY_READ |
 					    NFSLAY_RW)) == 0)
 						nfsrv_freelayout(&lhyp->list,
 						    lyp);
 					else
 						*fndp = 1;
 				} else
 					*fndp = 1;
 			}
 			NFSUNLOCKLAYOUT(lhyp);
 			/* Search the nfsrv_recalllist for a match. */
 			TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
 				if (NFSBCMP(&lyp->lay_fh, &fh,
 				    sizeof(fh)) == 0 &&
 				    lyp->lay_clientid.qval ==
 				    nd->nd_clientid.qval &&
 				    stateidp->other[0] ==
 				    lyp->lay_stateid.other[0] &&
 				    stateidp->other[1] ==
 				    lyp->lay_stateid.other[1] &&
 				    stateidp->other[2] ==
 				    lyp->lay_stateid.other[2]) {
 					lyp->lay_flags |= NFSLAY_RETURNED;
 					wakeup(lyp);
 					error = 0;
 				}
 			}
 			NFSDRECALLUNLOCK();
 		}
 		if (layouttype == NFSLAYOUT_FLEXFILE)
 			nfsrv_flexlayouterr(nd, layp, maxcnt, p);
 	} else if (kind == NFSV4LAYOUTRET_FSID)
 		nfsrv_freelayouts(&nd->nd_clientid,
 		    &vp->v_mount->mnt_stat.f_fsid, layouttype, iomode);
 	else if (kind == NFSV4LAYOUTRET_ALL)
 		nfsrv_freelayouts(&nd->nd_clientid, NULL, layouttype, iomode);
 	else
 		error = NFSERR_INVAL;
 	if (error == -1)
 		error = 0;
 	return (error);
 }
 
 /*
  * Look for an existing layout.
  */
 static int
 nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
     NFSPROC_T *p, struct nfslayout **lypp)
 {
 	struct nfslayouthash *lhyp;
 	struct nfslayout *lyp;
 	int ret;
 
 	*lypp = NULL;
 	ret = 0;
 	lhyp = NFSLAYOUTHASH(fhp);
 	TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
 		if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
 		    lyp->lay_clientid.qval == clientidp->qval &&
 		    lyp->lay_type == laytype)
 			break;
 	}
 	if (lyp != NULL)
 		*lypp = lyp;
 	else
 		ret = -1;
 	return (ret);
 }
 
 /*
  * Add the new layout, as required.
  */
 static int
 nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
     nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p)
 {
 	struct nfsclient *clp;
 	struct nfslayouthash *lhyp;
 	struct nfslayout *lyp, *nlyp;
 	fhandle_t *fhp;
 	int error;
 
 	KASSERT((nd->nd_flag & ND_IMPLIEDCLID) != 0,
 	    ("nfsrv_layoutget: no nd_clientid\n"));
 	lyp = *lypp;
 	fhp = &lyp->lay_fh;
 	NFSLOCKSTATE();
 	error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
 	    NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 	if (error != 0) {
 		NFSUNLOCKSTATE();
 		return (error);
 	}
 	lyp->lay_stateid.seqid = stateidp->seqid = 1;
 	lyp->lay_stateid.other[0] = stateidp->other[0] =
 	    clp->lc_clientid.lval[0];
 	lyp->lay_stateid.other[1] = stateidp->other[1] =
 	    clp->lc_clientid.lval[1];
 	lyp->lay_stateid.other[2] = stateidp->other[2] =
 	    nfsrv_nextstateindex(clp);
 	NFSUNLOCKSTATE();
 
 	lhyp = NFSLAYOUTHASH(fhp);
 	NFSLOCKLAYOUT(lhyp);
 	TAILQ_FOREACH(nlyp, &lhyp->list, lay_list) {
 		if (NFSBCMP(&nlyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
 		    nlyp->lay_clientid.qval == nd->nd_clientid.qval)
 			break;
 	}
 	if (nlyp != NULL) {
 		/* A layout already exists, so use it. */
 		nlyp->lay_flags |= (lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW));
 		NFSBCOPY(nlyp->lay_xdr, layp, nlyp->lay_layoutlen);
 		*layoutlenp = nlyp->lay_layoutlen;
 		if (++nlyp->lay_stateid.seqid == 0)
 			nlyp->lay_stateid.seqid = 1;
 		stateidp->seqid = nlyp->lay_stateid.seqid;
 		stateidp->other[0] = nlyp->lay_stateid.other[0];
 		stateidp->other[1] = nlyp->lay_stateid.other[1];
 		stateidp->other[2] = nlyp->lay_stateid.other[2];
 		NFSUNLOCKLAYOUT(lhyp);
 		return (0);
 	}
 
 	/* Insert the new layout in the lists. */
 	*lypp = NULL;
 	atomic_add_int(&nfsrv_layoutcnt, 1);
 	NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
 	*layoutlenp = lyp->lay_layoutlen;
 	TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list);
 	NFSUNLOCKLAYOUT(lhyp);
 	return (0);
 }
 
 /*
  * Get the devinfo for a deviceid.
  */
 int
 nfsrv_getdevinfo(char *devid, int layouttype, uint32_t *maxcnt,
     uint32_t *notify, int *devaddrlen, char **devaddr)
 {
 	struct nfsdevice *ds;
 
 	if ((layouttype != NFSLAYOUT_NFSV4_1_FILES && layouttype !=
 	     NFSLAYOUT_FLEXFILE) ||
 	    (nfsrv_maxpnfsmirror > 1 && layouttype == NFSLAYOUT_NFSV4_1_FILES))
 		return (NFSERR_UNKNLAYOUTTYPE);
 
 	/*
 	 * Now, search for the device id.  Note that the structures won't go
 	 * away, but the order changes in the list.  As such, the lock only
 	 * needs to be held during the search through the list.
 	 */
 	NFSDDSLOCK();
 	TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 		if (NFSBCMP(devid, ds->nfsdev_deviceid, NFSX_V4DEVICEID) == 0 &&
 		    ds->nfsdev_nmp != NULL)
 			break;
 	}
 	NFSDDSUNLOCK();
 	if (ds == NULL)
 		return (NFSERR_NOENT);
 
 	/* If the correct nfsdev_XXXXaddrlen is > 0, we have the device info. */
 	*devaddrlen = 0;
 	if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
 		*devaddrlen = ds->nfsdev_fileaddrlen;
 		*devaddr = ds->nfsdev_fileaddr;
 	} else if (layouttype == NFSLAYOUT_FLEXFILE) {
 		*devaddrlen = ds->nfsdev_flexaddrlen;
 		*devaddr = ds->nfsdev_flexaddr;
 	}
 	if (*devaddrlen == 0)
 		return (NFSERR_UNKNLAYOUTTYPE);
 
 	/*
 	 * The XDR overhead is 3 unsigned values: layout_type,
 	 * length_of_address and notify bitmap.
 	 * If the notify array is changed to not all zeros, the
 	 * count of unsigned values must be increased.
 	 */
 	if (*maxcnt > 0 && *maxcnt < NFSM_RNDUP(*devaddrlen) +
 	    3 * NFSX_UNSIGNED) {
 		*maxcnt = NFSM_RNDUP(*devaddrlen) + 3 * NFSX_UNSIGNED;
 		return (NFSERR_TOOSMALL);
 	}
 	return (0);
 }
 
 /*
  * Free a list of layout state structures.
  */
 static void
 nfsrv_freelayoutlist(nfsquad_t clientid)
 {
 	struct nfslayouthash *lhyp;
 	struct nfslayout *lyp, *nlyp;
 	int i;
 
 	for (i = 0; i < nfsrv_layouthashsize; i++) {
 		lhyp = &nfslayouthash[i];
 		NFSLOCKLAYOUT(lhyp);
 		TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 			if (lyp->lay_clientid.qval == clientid.qval)
 				nfsrv_freelayout(&lhyp->list, lyp);
 		}
 		NFSUNLOCKLAYOUT(lhyp);
 	}
 }
 
 /*
  * Free up a layout.
  */
 static void
 nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp)
 {
 
 	NFSD_DEBUG(4, "Freelayout=%p\n", lyp);
 	atomic_add_int(&nfsrv_layoutcnt, -1);
 	TAILQ_REMOVE(lhp, lyp, lay_list);
 	free(lyp, M_NFSDSTATE);
 }
 
 /*
  * Free up a device id.
  */
 void
 nfsrv_freeonedevid(struct nfsdevice *ds)
 {
 	int i;
 
 	atomic_add_int(&nfsrv_devidcnt, -1);
 	vrele(ds->nfsdev_dvp);
 	for (i = 0; i < nfsrv_dsdirsize; i++)
 		if (ds->nfsdev_dsdir[i] != NULL)
 			vrele(ds->nfsdev_dsdir[i]);
 	free(ds->nfsdev_fileaddr, M_NFSDSTATE);
 	free(ds->nfsdev_flexaddr, M_NFSDSTATE);
 	free(ds->nfsdev_host, M_NFSDSTATE);
 	free(ds, M_NFSDSTATE);
 }
 
 /*
  * Free up a device id and its mirrors.
  */
 static void
 nfsrv_freedevid(struct nfsdevice *ds)
 {
 
 	TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
 	nfsrv_freeonedevid(ds);
 }
 
 /*
  * Free all layouts and device ids.
  * Done when the nfsd threads are shut down since there may be a new
  * modified device id list created when the nfsd is restarted.
  */
 void
 nfsrv_freealllayoutsanddevids(void)
 {
 	struct nfsdontlist *mrp, *nmrp;
 	struct nfslayout *lyp, *nlyp;
 
 	/* Get rid of the deviceid structures. */
 	nfsrv_freealldevids();
 	TAILQ_INIT(&nfsrv_devidhead);
 	nfsrv_devidcnt = 0;
 
 	/* Get rid of all layouts. */
 	nfsrv_freealllayouts();
 
 	/* Get rid of any nfsdontlist entries. */
 	LIST_FOREACH_SAFE(mrp, &nfsrv_dontlisthead, nfsmr_list, nmrp)
 		free(mrp, M_NFSDSTATE);
 	LIST_INIT(&nfsrv_dontlisthead);
 	nfsrv_dontlistlen = 0;
 
 	/* Free layouts in the recall list. */
 	TAILQ_FOREACH_SAFE(lyp, &nfsrv_recalllisthead, lay_list, nlyp)
 		nfsrv_freelayout(&nfsrv_recalllisthead, lyp);
 	TAILQ_INIT(&nfsrv_recalllisthead);
 }
 
 /*
  * Free layouts that match the arguments.
  */
 static void
 nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, int iomode)
 {
 	struct nfslayouthash *lhyp;
 	struct nfslayout *lyp, *nlyp;
 	int i;
 
 	for (i = 0; i < nfsrv_layouthashsize; i++) {
 		lhyp = &nfslayouthash[i];
 		NFSLOCKLAYOUT(lhyp);
 		TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 			if (clid->qval != lyp->lay_clientid.qval)
 				continue;
 			if (fs != NULL && (fs->val[0] != lyp->lay_fsid.val[0] ||
 			    fs->val[1] != lyp->lay_fsid.val[1]))
 				continue;
 			if (laytype != lyp->lay_type)
 				continue;
 			if ((iomode & NFSLAYOUTIOMODE_READ) != 0)
 				lyp->lay_flags &= ~NFSLAY_READ;
 			if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
 				lyp->lay_flags &= ~NFSLAY_RW;
 			if ((lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)) == 0)
 				nfsrv_freelayout(&lhyp->list, lyp);
 		}
 		NFSUNLOCKLAYOUT(lhyp);
 	}
 }
 
 /*
  * Free all layouts for the argument file.
  */
 void
 nfsrv_freefilelayouts(fhandle_t *fhp)
 {
 	struct nfslayouthash *lhyp;
 	struct nfslayout *lyp, *nlyp;
 
 	lhyp = NFSLAYOUTHASH(fhp);
 	NFSLOCKLAYOUT(lhyp);
 	TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 		if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0)
 			nfsrv_freelayout(&lhyp->list, lyp);
 	}
 	NFSUNLOCKLAYOUT(lhyp);
 }
 
 /*
  * Free all layouts.
  */
 static void
 nfsrv_freealllayouts(void)
 {
 	struct nfslayouthash *lhyp;
 	struct nfslayout *lyp, *nlyp;
 	int i;
 
 	for (i = 0; i < nfsrv_layouthashsize; i++) {
 		lhyp = &nfslayouthash[i];
 		NFSLOCKLAYOUT(lhyp);
 		TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp)
 			nfsrv_freelayout(&lhyp->list, lyp);
 		NFSUNLOCKLAYOUT(lhyp);
 	}
 }
 
 /*
  * Look up the mount path for the DS server.
  */
 static int
 nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
     struct nfsdevice **dsp)
 {
 	struct nameidata nd;
 	struct nfsdevice *ds;
 	struct mount *mp;
 	int error, i;
 	char *dsdirpath;
 	size_t dsdirsize;
 
 	NFSD_DEBUG(4, "setdssrv path=%s\n", dspathp);
 	*dsp = NULL;
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
 	    dspathp, p);
 	error = namei(&nd);
 	NFSD_DEBUG(4, "lookup=%d\n", error);
 	if (error != 0)
 		return (error);
 	if (nd.ni_vp->v_type != VDIR) {
 		vput(nd.ni_vp);
 		NFSD_DEBUG(4, "dspath not dir\n");
 		return (ENOTDIR);
 	}
 	if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 		vput(nd.ni_vp);
 		NFSD_DEBUG(4, "dspath not an NFS mount\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * Allocate a DS server structure with the NFS mounted directory
 	 * vnode reference counted, so that a non-forced dismount will
 	 * fail with EBUSY.
 	 * This structure is always linked into the list, even if an error
 	 * is being returned.  The caller will free the entire list upon
 	 * an error return.
 	 */
 	*dsp = ds = malloc(sizeof(*ds) + nfsrv_dsdirsize * sizeof(vnode_t),
 	    M_NFSDSTATE, M_WAITOK | M_ZERO);
 	ds->nfsdev_dvp = nd.ni_vp;
 	ds->nfsdev_nmp = VFSTONFS(nd.ni_vp->v_mount);
 	NFSVOPUNLOCK(nd.ni_vp);
 
 	dsdirsize = strlen(dspathp) + 16;
 	dsdirpath = malloc(dsdirsize, M_TEMP, M_WAITOK);
 	/* Now, create the DS directory structures. */
 	for (i = 0; i < nfsrv_dsdirsize; i++) {
 		snprintf(dsdirpath, dsdirsize, "%s/ds%d", dspathp, i);
 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 		    UIO_SYSSPACE, dsdirpath, p);
 		error = namei(&nd);
 		NFSD_DEBUG(4, "dsdirpath=%s lookup=%d\n", dsdirpath, error);
 		if (error != 0)
 			break;
 		if (nd.ni_vp->v_type != VDIR) {
 			vput(nd.ni_vp);
 			error = ENOTDIR;
 			NFSD_DEBUG(4, "dsdirpath not a VDIR\n");
 			break;
 		}
 		if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 			vput(nd.ni_vp);
 			error = ENXIO;
 			NFSD_DEBUG(4, "dsdirpath not an NFS mount\n");
 			break;
 		}
 		ds->nfsdev_dsdir[i] = nd.ni_vp;
 		NFSVOPUNLOCK(nd.ni_vp);
 	}
 	free(dsdirpath, M_TEMP);
 
 	if (strlen(mdspathp) > 0) {
 		/*
 		 * This DS stores file for a specific MDS exported file
 		 * system.
 		 */
 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 		    UIO_SYSSPACE, mdspathp, p);
 		error = namei(&nd);
 		NFSD_DEBUG(4, "mds lookup=%d\n", error);
 		if (error != 0)
 			goto out;
 		if (nd.ni_vp->v_type != VDIR) {
 			vput(nd.ni_vp);
 			error = ENOTDIR;
 			NFSD_DEBUG(4, "mdspath not dir\n");
 			goto out;
 		}
 		mp = nd.ni_vp->v_mount;
 		if ((mp->mnt_flag & MNT_EXPORTED) == 0) {
 			vput(nd.ni_vp);
 			error = ENXIO;
 			NFSD_DEBUG(4, "mdspath not an exported fs\n");
 			goto out;
 		}
 		ds->nfsdev_mdsfsid = mp->mnt_stat.f_fsid;
 		ds->nfsdev_mdsisset = 1;
 		vput(nd.ni_vp);
 	}
 
 out:
 	TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
 	atomic_add_int(&nfsrv_devidcnt, 1);
 	return (error);
 }
 
 /*
  * Look up the mount path for the DS server and delete it.
  */
 int
 nfsrv_deldsserver(int op, char *dspathp, NFSPROC_T *p)
 {
 	struct mount *mp;
 	struct nfsmount *nmp;
 	struct nfsdevice *ds;
 	int error;
 
 	NFSD_DEBUG(4, "deldssrv path=%s\n", dspathp);
 	/*
 	 * Search for the path in the mount list.  Avoid looking the path
 	 * up, since this mount point may be hung, with associated locked
 	 * vnodes, etc.
 	 * Set NFSMNTP_CANCELRPCS so that any forced dismount will be blocked
 	 * until this completes.
 	 * As noted in the man page, this should be done before any forced
 	 * dismount on the mount point, but at least the handshake on
 	 * NFSMNTP_CANCELRPCS should make it safe.
 	 */
 	error = 0;
 	ds = NULL;
 	nmp = NULL;
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (strcmp(mp->mnt_stat.f_mntonname, dspathp) == 0 &&
 		    strcmp(mp->mnt_stat.f_fstypename, "nfs") == 0 &&
 		    mp->mnt_data != NULL) {
 			nmp = VFSTONFS(mp);
 			NFSLOCKMNT(nmp);
 			if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 			     NFSMNTP_CANCELRPCS)) == 0) {
 				nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 				NFSUNLOCKMNT(nmp);
 			} else {
 				NFSUNLOCKMNT(nmp);
 				nmp = NULL;
 			}
 			break;
 		}
 	}
 	mtx_unlock(&mountlist_mtx);
 
 	if (nmp != NULL) {
 		ds = nfsrv_deldsnmp(op, nmp, p);
 		NFSD_DEBUG(4, "deldsnmp=%p\n", ds);
 		if (ds != NULL) {
 			nfsrv_killrpcs(nmp);
 			NFSD_DEBUG(4, "aft killrpcs\n");
 		} else
 			error = ENXIO;
 		NFSLOCKMNT(nmp);
 		nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 		wakeup(nmp);
 		NFSUNLOCKMNT(nmp);
 	} else
 		error = EINVAL;
 	return (error);
 }
 
 /*
  * Search for and remove a DS entry which matches the "nmp" argument.
  * The nfsdevice structure pointer is returned so that the caller can
  * free it via nfsrv_freeonedevid().
  * For the forced case, do not try to do LayoutRecalls, since the server
  * must be shut down now anyhow.
  */
 struct nfsdevice *
 nfsrv_deldsnmp(int op, struct nfsmount *nmp, NFSPROC_T *p)
 {
 	struct nfsdevice *fndds;
 
 	NFSD_DEBUG(4, "deldsdvp\n");
 	NFSDDSLOCK();
 	if (op == PNFSDOP_FORCEDELDS)
 		fndds = nfsv4_findmirror(nmp);
 	else
 		fndds = nfsrv_findmirroredds(nmp);
 	if (fndds != NULL)
 		nfsrv_deleteds(fndds);
 	NFSDDSUNLOCK();
 	if (fndds != NULL) {
 		if (op != PNFSDOP_FORCEDELDS)
 			nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
 		printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
 	}
 	return (fndds);
 }
 
 /*
  * Similar to nfsrv_deldsnmp(), except that the DS is indicated by deviceid.
  * This function also calls nfsrv_killrpcs() to unblock RPCs on the mount
  * point.
  * Also, returns an error instead of the nfsdevice found.
  */
 APPLESTATIC int
 nfsrv_delds(char *devid, NFSPROC_T *p)
 {
 	struct nfsdevice *ds, *fndds;
 	struct nfsmount *nmp;
 	int fndmirror;
 
 	NFSD_DEBUG(4, "delds\n");
 	/*
 	 * Search the DS server list for a match with devid.
 	 * Remove the DS entry if found and there is a mirror.
 	 */
 	fndds = NULL;
 	nmp = NULL;
 	fndmirror = 0;
 	NFSDDSLOCK();
 	TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 		if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0 &&
 		    ds->nfsdev_nmp != NULL) {
 			NFSD_DEBUG(4, "fnd main ds\n");
 			fndds = ds;
 			break;
 		}
 	}
 	if (fndds == NULL) {
 		NFSDDSUNLOCK();
 		return (ENXIO);
 	}
 	if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
 		fndmirror = 1;
 	else if (fndds->nfsdev_mdsisset != 0) {
 		/* For the fsid is set case, search for a mirror. */
 		TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 			if (ds != fndds && ds->nfsdev_nmp != NULL &&
 			    ds->nfsdev_mdsisset != 0 &&
 			    ds->nfsdev_mdsfsid.val[0] ==
 			    fndds->nfsdev_mdsfsid.val[0] &&
 			    ds->nfsdev_mdsfsid.val[1] ==
 			    fndds->nfsdev_mdsfsid.val[1]) {
 				fndmirror = 1;
 				break;
 			}
 		}
 	}
 	if (fndmirror != 0) {
 		nmp = fndds->nfsdev_nmp;
 		NFSLOCKMNT(nmp);
 		if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 		     NFSMNTP_CANCELRPCS)) == 0) {
 			nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 			NFSUNLOCKMNT(nmp);
 			nfsrv_deleteds(fndds);
 		} else {
 			NFSUNLOCKMNT(nmp);
 			nmp = NULL;
 		}
 	}
 	NFSDDSUNLOCK();
 	if (nmp != NULL) {
 		nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
 		printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
 		nfsrv_killrpcs(nmp);
 		NFSLOCKMNT(nmp);
 		nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 		wakeup(nmp);
 		NFSUNLOCKMNT(nmp);
 		return (0);
 	}
 	return (ENXIO);
 }
 
 /*
  * Mark a DS as disabled by setting nfsdev_nmp = NULL.
  */
 static void
 nfsrv_deleteds(struct nfsdevice *fndds)
 {
 
 	NFSD_DEBUG(4, "deleteds: deleting a mirror\n");
 	fndds->nfsdev_nmp = NULL;
 	if (fndds->nfsdev_mdsisset == 0)
 		nfsrv_faildscnt--;
 }
 
 /*
  * Fill in the addr structures for the File and Flex File layouts.
  */
 static void
 nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost)
 {
 	uint32_t *tl;
 	char *netprot;
 	int addrlen;
 	static uint64_t new_devid = 0;
 
 	if (strchr(addr, ':') != NULL)
 		netprot = "tcp6";
 	else
 		netprot = "tcp";
 
 	/* Fill in the device id. */
 	NFSBCOPY(&nfsdev_time, ds->nfsdev_deviceid, sizeof(nfsdev_time));
 	new_devid++;
 	NFSBCOPY(&new_devid, &ds->nfsdev_deviceid[sizeof(nfsdev_time)],
 	    sizeof(new_devid));
 
 	/*
 	 * Fill in the file addr (actually the nfsv4_file_layout_ds_addr4
 	 * as defined in RFC5661) in XDR.
 	 */
 	addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
 	    6 * NFSX_UNSIGNED;
 	NFSD_DEBUG(4, "hn=%s addr=%s netprot=%s\n", dnshost, addr, netprot);
 	ds->nfsdev_fileaddrlen = addrlen;
 	tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
 	ds->nfsdev_fileaddr = (char *)tl;
 	*tl++ = txdr_unsigned(1);		/* One stripe with index 0. */
 	*tl++ = 0;
 	*tl++ = txdr_unsigned(1);		/* One multipath list */
 	*tl++ = txdr_unsigned(1);		/* with one entry in it. */
 	/* The netaddr for this one entry. */
 	*tl++ = txdr_unsigned(strlen(netprot));
 	NFSBCOPY(netprot, tl, strlen(netprot));
 	tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(strlen(addr));
 	NFSBCOPY(addr, tl, strlen(addr));
 
 	/*
 	 * Fill in the flex file addr (actually the ff_device_addr4
 	 * as defined for Flexible File Layout) in XDR.
 	 */
 	addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
 	    14 * NFSX_UNSIGNED;
 	ds->nfsdev_flexaddrlen = addrlen;
 	tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
 	ds->nfsdev_flexaddr = (char *)tl;
 	*tl++ = txdr_unsigned(1);		/* One multipath entry. */
 	/* The netaddr for this one entry. */
 	*tl++ = txdr_unsigned(strlen(netprot));
 	NFSBCOPY(netprot, tl, strlen(netprot));
 	tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(strlen(addr));
 	NFSBCOPY(addr, tl, strlen(addr));
 	tl += (NFSM_RNDUP(strlen(addr)) / NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(2);		/* Two NFS Versions. */
 	*tl++ = txdr_unsigned(NFS_VER4);	/* NFSv4. */
 	*tl++ = txdr_unsigned(NFSV42_MINORVERSION); /* Minor version 2. */
 	*tl++ = txdr_unsigned(NFS_SRVMAXIO);	/* DS max rsize. */
 	*tl++ = txdr_unsigned(NFS_SRVMAXIO);	/* DS max wsize. */
 	*tl++ = newnfs_true;			/* Tightly coupled. */
 	*tl++ = txdr_unsigned(NFS_VER4);	/* NFSv4. */
 	*tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */
 	*tl++ = txdr_unsigned(NFS_SRVMAXIO);	/* DS max rsize. */
 	*tl++ = txdr_unsigned(NFS_SRVMAXIO);	/* DS max wsize. */
 	*tl = newnfs_true;			/* Tightly coupled. */
 
 	ds->nfsdev_hostnamelen = strlen(dnshost);
 	ds->nfsdev_host = malloc(ds->nfsdev_hostnamelen + 1, M_NFSDSTATE,
 	    M_WAITOK);
 	NFSBCOPY(dnshost, ds->nfsdev_host, ds->nfsdev_hostnamelen + 1);
 }
 
 
 /*
  * Create the device id list.
  * Return 0 if the nfsd threads are to run and ENXIO if the "-p" argument
  * is misconfigured.
  */
 int
 nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPROC_T *p)
 {
 	struct nfsdevice *ds;
 	char *addrp, *dnshostp, *dspathp, *mdspathp;
 	int error, i;
 
 	addrp = args->addr;
 	dnshostp = args->dnshost;
 	dspathp = args->dspath;
 	mdspathp = args->mdspath;
 	nfsrv_maxpnfsmirror = args->mirrorcnt;
 	if (addrp == NULL || dnshostp == NULL || dspathp == NULL ||
 	    mdspathp == NULL)
 		return (0);
 
 	/*
 	 * Loop around for each nul-terminated string in args->addr,
 	 * args->dnshost, args->dnspath and args->mdspath.
 	 */
 	while (addrp < (args->addr + args->addrlen) &&
 	    dnshostp < (args->dnshost + args->dnshostlen) &&
 	    dspathp < (args->dspath + args->dspathlen) &&
 	    mdspathp < (args->mdspath + args->mdspathlen)) {
 		error = nfsrv_setdsserver(dspathp, mdspathp, p, &ds);
 		if (error != 0) {
 			/* Free all DS servers. */
 			nfsrv_freealldevids();
 			nfsrv_devidcnt = 0;
 			return (ENXIO);
 		}
 		nfsrv_allocdevid(ds, addrp, dnshostp);
 		addrp += (strlen(addrp) + 1);
 		dnshostp += (strlen(dnshostp) + 1);
 		dspathp += (strlen(dspathp) + 1);
 		mdspathp += (strlen(mdspathp) + 1);
 	}
 	if (nfsrv_devidcnt < nfsrv_maxpnfsmirror) {
 		/* Free all DS servers. */
 		nfsrv_freealldevids();
 		nfsrv_devidcnt = 0;
 		nfsrv_maxpnfsmirror = 1;
 		return (ENXIO);
 	}
 	/* We can fail at most one less DS than the mirror level. */
 	nfsrv_faildscnt = nfsrv_maxpnfsmirror - 1;
 
 	/*
 	 * Allocate the nfslayout hash table now, since this is a pNFS server.
 	 * Make it 1% of the high water mark and at least 100.
 	 */
 	if (nfslayouthash == NULL) {
 		nfsrv_layouthashsize = nfsrv_layouthighwater / 100;
 		if (nfsrv_layouthashsize < 100)
 			nfsrv_layouthashsize = 100;
 		nfslayouthash = mallocarray(nfsrv_layouthashsize,
 		    sizeof(struct nfslayouthash), M_NFSDSESSION, M_WAITOK |
 		    M_ZERO);
 		for (i = 0; i < nfsrv_layouthashsize; i++) {
 			mtx_init(&nfslayouthash[i].mtx, "nfslm", NULL, MTX_DEF);
 			TAILQ_INIT(&nfslayouthash[i].list);
 		}
 	}
 	return (0);
 }
 
 /*
  * Free all device ids.
  */
 static void
 nfsrv_freealldevids(void)
 {
 	struct nfsdevice *ds, *nds;
 
 	TAILQ_FOREACH_SAFE(ds, &nfsrv_devidhead, nfsdev_list, nds)
 		nfsrv_freedevid(ds);
 }
 
 /*
  * Check to see if there is a Read/Write Layout plus either:
  * - A Write Delegation
  * or
  * - An Open with Write_access.
  * Return 1 if this is the case and 0 otherwise.
  * This function is used by nfsrv_proxyds() to decide if doing a Proxy
  * Getattr RPC to the Data Server (DS) is necessary.
  */
 #define	NFSCLIDVECSIZE	6
 APPLESTATIC int
 nfsrv_checkdsattr(vnode_t vp, NFSPROC_T *p)
 {
 	fhandle_t fh, *tfhp;
 	struct nfsstate *stp;
 	struct nfslayout *lyp;
 	struct nfslayouthash *lhyp;
 	struct nfslockhashhead *hp;
 	struct nfslockfile *lfp;
 	nfsquad_t clid[NFSCLIDVECSIZE];
 	int clidcnt, ret;
 
 	ret = nfsvno_getfh(vp, &fh, p);
 	if (ret != 0)
 		return (0);
 
 	/* First check for a Read/Write Layout. */
 	clidcnt = 0;
 	lhyp = NFSLAYOUTHASH(&fh);
 	NFSLOCKLAYOUT(lhyp);
 	TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
 		if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
 		    ((lyp->lay_flags & NFSLAY_RW) != 0 ||
 		     ((lyp->lay_flags & NFSLAY_READ) != 0 &&
 		      nfsrv_pnfsatime != 0))) {
 			if (clidcnt < NFSCLIDVECSIZE)
 				clid[clidcnt].qval = lyp->lay_clientid.qval;
 			clidcnt++;
 		}
 	}
 	NFSUNLOCKLAYOUT(lhyp);
 	if (clidcnt == 0) {
 		/* None found, so return 0. */
 		return (0);
 	}
 
 	/* Get the nfslockfile for this fh. */
 	NFSLOCKSTATE();
 	hp = NFSLOCKHASH(&fh);
 	LIST_FOREACH(lfp, hp, lf_hash) {
 		tfhp = &lfp->lf_fh;
 		if (NFSVNO_CMPFH(&fh, tfhp))
 			break;
 	}
 	if (lfp == NULL) {
 		/* None found, so return 0. */
 		NFSUNLOCKSTATE();
 		return (0);
 	}
 
 	/* Now, look for a Write delegation for this clientid. */
 	LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 		if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
 		    nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
 			break;
 	}
 	if (stp != NULL) {
 		/* Found one, so return 1. */
 		NFSUNLOCKSTATE();
 		return (1);
 	}
 
 	/* No Write delegation, so look for an Open with Write_access. */
 	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 		KASSERT((stp->ls_flags & NFSLCK_OPEN) != 0,
 		    ("nfsrv_checkdsattr: Non-open in Open list\n"));
 		if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0 &&
 		    nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
 			break;
 	}
 	NFSUNLOCKSTATE();
 	if (stp != NULL)
 		return (1);
 	return (0);
 }
 
 /*
  * Look for a matching clientid in the vector. Return 1 if one might match.
  */
 static int
 nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt)
 {
 	int i;
 
 	/* If too many for the vector, return 1 since there might be a match. */
 	if (clidcnt > NFSCLIDVECSIZE)
 		return (1);
 
 	for (i = 0; i < clidcnt; i++)
 		if (clidvec[i].qval == clid.qval)
 			return (1);
 	return (0);
 }
 
 /*
  * Check the don't list for "vp" and see if issuing an rw layout is allowed.
  * Return 1 if issuing an rw layout isn't allowed, 0 otherwise.
  */
 static int
 nfsrv_dontlayout(fhandle_t *fhp)
 {
 	struct nfsdontlist *mrp;
 	int ret;
 
 	if (nfsrv_dontlistlen == 0)
 		return (0);
 	ret = 0;
 	NFSDDONTLISTLOCK();
 	LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
 		if (NFSBCMP(fhp, &mrp->nfsmr_fh, sizeof(*fhp)) == 0 &&
 		    (mrp->nfsmr_flags & NFSMR_DONTLAYOUT) != 0) {
 			ret = 1;
 			break;
 		}
 	}
 	NFSDDONTLISTUNLOCK();
 	return (ret);
 }
 
 #define	PNFSDS_COPYSIZ	65536
 /*
  * Create a new file on a DS and copy the contents of an extant DS file to it.
  * This can be used for recovery of a DS file onto a recovered DS.
  * The steps are:
  * - When called, the MDS file's vnode is locked, blocking LayoutGet operations.
  * - Disable issuing of read/write layouts for the file via the nfsdontlist,
  *   so that they will be disabled after the MDS file's vnode is unlocked.
  * - Set up the nfsrv_recalllist so that recall of read/write layouts can
  *   be done.
  * - Unlock the MDS file's vnode, so that the client(s) can perform proxied
  *   writes, LayoutCommits and LayoutReturns for the file when completing the
  *   LayoutReturn requested by the LayoutRecall callback.
  * - Issue a LayoutRecall callback for all read/write layouts and wait for
  *   them to be returned. (If the LayoutRecall callback replies
  *   NFSERR_NOMATCHLAYOUT, they are gone and no LayoutReturn is needed.)
  * - Exclusively lock the MDS file's vnode.  This ensures that no proxied
  *   writes are in progress or can occur during the DS file copy.
  *   It also blocks Setattr operations.
  * - Create the file on the recovered mirror.
  * - Copy the file from the operational DS.
  * - Copy any ACL from the MDS file to the new DS file.
  * - Set the modify time of the new DS file to that of the MDS file.
  * - Update the extended attribute for the MDS file.
  * - Enable issuing of rw layouts by deleting the nfsdontlist entry.
  * - The caller will unlock the MDS file's vnode allowing operations
  *   to continue normally, since it is now on the mirror again.
  */
 int
 nfsrv_copymr(vnode_t vp, vnode_t fvp, vnode_t dvp, struct nfsdevice *ds,
     struct pnfsdsfile *pf, struct pnfsdsfile *wpf, int mirrorcnt,
     struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfsdontlist *mrp, *nmrp;
 	struct nfslayouthash *lhyp;
 	struct nfslayout *lyp, *nlyp;
 	struct nfslayouthead thl;
 	struct mount *mp, *tvmp;
 	struct acl *aclp;
 	struct vattr va;
 	struct timespec mtime;
 	fhandle_t fh;
 	vnode_t tvp;
 	off_t rdpos, wrpos;
 	ssize_t aresid;
 	char *dat;
 	int didprintf, ret, retacl, xfer;
 
 	ASSERT_VOP_LOCKED(fvp, "nfsrv_copymr fvp");
 	ASSERT_VOP_LOCKED(vp, "nfsrv_copymr vp");
 	/*
 	 * Allocate a nfsdontlist entry and set the NFSMR_DONTLAYOUT flag
 	 * so that no more RW layouts will get issued.
 	 */
 	ret = nfsvno_getfh(vp, &fh, p);
 	if (ret != 0) {
 		NFSD_DEBUG(4, "nfsrv_copymr: getfh=%d\n", ret);
 		return (ret);
 	}
 	nmrp = malloc(sizeof(*nmrp), M_NFSDSTATE, M_WAITOK);
 	nmrp->nfsmr_flags = NFSMR_DONTLAYOUT;
 	NFSBCOPY(&fh, &nmrp->nfsmr_fh, sizeof(fh));
 	NFSDDONTLISTLOCK();
 	LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
 		if (NFSBCMP(&fh, &mrp->nfsmr_fh, sizeof(fh)) == 0)
 			break;
 	}
 	if (mrp == NULL) {
 		LIST_INSERT_HEAD(&nfsrv_dontlisthead, nmrp, nfsmr_list);
 		mrp = nmrp;
 		nmrp = NULL;
 		nfsrv_dontlistlen++;
 		NFSD_DEBUG(4, "nfsrv_copymr: in dontlist\n");
 	} else {
 		NFSDDONTLISTUNLOCK();
 		free(nmrp, M_NFSDSTATE);
 		NFSD_DEBUG(4, "nfsrv_copymr: dup dontlist\n");
 		return (ENXIO);
 	}
 	NFSDDONTLISTUNLOCK();
 
 	/*
 	 * Search for all RW layouts for this file.  Move them to the
 	 * recall list, so they can be recalled and their return noted.
 	 */
 	lhyp = NFSLAYOUTHASH(&fh);
 	NFSDRECALLLOCK();
 	NFSLOCKLAYOUT(lhyp);
 	TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 		if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
 		    (lyp->lay_flags & NFSLAY_RW) != 0) {
 			TAILQ_REMOVE(&lhyp->list, lyp, lay_list);
 			TAILQ_INSERT_HEAD(&nfsrv_recalllisthead, lyp, lay_list);
 			lyp->lay_trycnt = 0;
 		}
 	}
 	NFSUNLOCKLAYOUT(lhyp);
 	NFSDRECALLUNLOCK();
 
 	ret = 0;
 	mp = tvmp = NULL;
 	didprintf = 0;
 	TAILQ_INIT(&thl);
 	/* Unlock the MDS vp, so that a LayoutReturn can be done on it. */
 	NFSVOPUNLOCK(vp);
 	/* Now, do a recall for all layouts not yet recalled. */
 tryagain:
 	NFSDRECALLLOCK();
 	TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
 		if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
 		    (lyp->lay_flags & NFSLAY_RECALL) == 0) {
 			lyp->lay_flags |= NFSLAY_RECALL;
 			/*
 			 * The layout stateid.seqid needs to be incremented
 			 * before doing a LAYOUT_RECALL callback.
 			 */
 			if (++lyp->lay_stateid.seqid == 0)
 				lyp->lay_stateid.seqid = 1;
 			NFSDRECALLUNLOCK();
 			nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
 			    &lyp->lay_fh, lyp, 0, lyp->lay_type, p);
 			NFSD_DEBUG(4, "nfsrv_copymr: recalled layout\n");
 			goto tryagain;
 		}
 	}
 
 	/* Now wait for them to be returned. */
 tryagain2:
 	TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
 		if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0) {
 			if ((lyp->lay_flags & NFSLAY_RETURNED) != 0) {
 				TAILQ_REMOVE(&nfsrv_recalllisthead, lyp,
 				    lay_list);
 				TAILQ_INSERT_HEAD(&thl, lyp, lay_list);
 				NFSD_DEBUG(4,
 				    "nfsrv_copymr: layout returned\n");
 			} else {
 				lyp->lay_trycnt++;
 				ret = mtx_sleep(lyp, NFSDRECALLMUTEXPTR,
 				    PVFS | PCATCH, "nfsmrl", hz);
 				NFSD_DEBUG(4, "nfsrv_copymr: aft sleep=%d\n",
 				    ret);
 				if (ret == EINTR || ret == ERESTART)
 					break;
 				if ((lyp->lay_flags & NFSLAY_RETURNED) == 0) {
 					/*
 					 * Give up after 60sec and return
 					 * ENXIO, failing the copymr.
 					 * This layout will remain on the
 					 * recalllist.  It can only be cleared
 					 * by restarting the nfsd.
 					 * This seems the safe way to handle
 					 * it, since it cannot be safely copied
 					 * with an outstanding RW layout.
 					 */
 					if (lyp->lay_trycnt >= 60) {
 						ret = ENXIO;
 						break;
 					}
 					if (didprintf == 0) {
 						printf("nfsrv_copymr: layout "
 						    "not returned\n");
 						didprintf = 1;
 					}
 				}
 			}
 			goto tryagain2;
 		}
 	}
 	NFSDRECALLUNLOCK();
 	/* We can now get rid of the layouts that have been returned. */
 	TAILQ_FOREACH_SAFE(lyp, &thl, lay_list, nlyp)
 		nfsrv_freelayout(&thl, lyp);
 
 	/*
 	 * Do the vn_start_write() calls here, before the MDS vnode is
 	 * locked and the tvp is created (locked) in the NFS file system
 	 * that dvp is in.
 	 * For tvmp, this probably isn't necessary, since it will be an
 	 * NFS mount and they are not suspendable at this time.
 	 */
 	if (ret == 0)
 		ret = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 	if (ret == 0) {
 		tvmp = dvp->v_mount;
 		ret = vn_start_write(NULL, &tvmp, V_WAIT | PCATCH);
 	}
 
 	/*
 	 * LK_EXCLUSIVE lock the MDS vnode, so that any
 	 * proxied writes through the MDS will be blocked until we have
 	 * completed the copy and update of the extended attributes.
 	 * This will also ensure that any attributes and ACL will not be
 	 * changed until the copy is complete.
 	 */
 	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 	if (ret == 0 && VN_IS_DOOMED(vp)) {
 		NFSD_DEBUG(4, "nfsrv_copymr: lk_exclusive doomed\n");
 		ret = ESTALE;
 	}
 
 	/* Create the data file on the recovered DS. */
 	if (ret == 0)
 		ret = nfsrv_createdsfile(vp, &fh, pf, dvp, ds, cred, p, &tvp);
 
 	/* Copy the DS file, if created successfully. */
 	if (ret == 0) {
 		/*
 		 * Get any NFSv4 ACL on the MDS file, so that it can be set
 		 * on the new DS file.
 		 */
 		aclp = acl_alloc(M_WAITOK | M_ZERO);
 		retacl = VOP_GETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
 		if (retacl != 0 && retacl != ENOATTR)
 			NFSD_DEBUG(1, "nfsrv_copymr: vop_getacl=%d\n", retacl);
 		dat = malloc(PNFSDS_COPYSIZ, M_TEMP, M_WAITOK);
 		/* Malloc a block of 0s used to check for holes. */
 		if (nfsrv_zeropnfsdat == NULL)
 			nfsrv_zeropnfsdat = malloc(PNFSDS_COPYSIZ, M_TEMP,
 			    M_WAITOK | M_ZERO);
 		rdpos = wrpos = 0;
 		ret = VOP_GETATTR(fvp, &va, cred);
 		aresid = 0;
 		while (ret == 0 && aresid == 0) {
 			ret = vn_rdwr(UIO_READ, fvp, dat, PNFSDS_COPYSIZ,
 			    rdpos, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL,
 			    &aresid, p);
 			xfer = PNFSDS_COPYSIZ - aresid;
 			if (ret == 0 && xfer > 0) {
 				rdpos += xfer;
 				/*
 				 * Skip the write for holes, except for the
 				 * last block.
 				 */
 				if (xfer < PNFSDS_COPYSIZ || rdpos ==
 				    va.va_size || NFSBCMP(dat,
 				    nfsrv_zeropnfsdat, PNFSDS_COPYSIZ) != 0)
 					ret = vn_rdwr(UIO_WRITE, tvp, dat, xfer,
 					    wrpos, UIO_SYSSPACE, IO_NODELOCKED,
 					    cred, NULL, NULL, p);
 				if (ret == 0)
 					wrpos += xfer;
 			}
 		}
 
 		/* If there is an ACL and the copy succeeded, set the ACL. */
 		if (ret == 0 && retacl == 0) {
 			ret = VOP_SETACL(tvp, ACL_TYPE_NFS4, aclp, cred, p);
 			/*
 			 * Don't consider these as errors, since VOP_GETACL()
 			 * can return an ACL when they are not actually
 			 * supported.  For example, for UFS, VOP_GETACL()
 			 * will return a trivial ACL based on the uid/gid/mode
 			 * when there is no ACL on the file.
 			 * This case should be recognized as a trivial ACL
 			 * by UFS's VOP_SETACL() and succeed, but...
 			 */
 			if (ret == ENOATTR || ret == EOPNOTSUPP || ret == EPERM)
 				ret = 0;
 		}
 
 		if (ret == 0)
 			ret = VOP_FSYNC(tvp, MNT_WAIT, p);
 
 		/* Set the DS data file's modify time that of the MDS file. */
 		if (ret == 0)
 			ret = VOP_GETATTR(vp, &va, cred);
 		if (ret == 0) {
 			mtime = va.va_mtime;
 			VATTR_NULL(&va);
 			va.va_mtime = mtime;
 			ret = VOP_SETATTR(tvp, &va, cred);
 		}
 
 		vput(tvp);
 		acl_free(aclp);
 		free(dat, M_TEMP);
 	}
 	if (tvmp != NULL)
 		vn_finished_write(tvmp);
 
 	/* Update the extended attributes for the newly created DS file. */
 	if (ret == 0)
 		ret = vn_extattr_set(vp, IO_NODELOCKED,
 		    EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
 		    sizeof(*wpf) * mirrorcnt, (char *)wpf, p);
 	if (mp != NULL)
 		vn_finished_write(mp);
 
 	/* Get rid of the dontlist entry, so that Layouts can be issued. */
 	NFSDDONTLISTLOCK();
 	LIST_REMOVE(mrp, nfsmr_list);
 	NFSDDONTLISTUNLOCK();
 	free(mrp, M_NFSDSTATE);
 	return (ret);
 }
 
 /*
  * Create a data storage file on the recovered DS.
  */
 static int
 nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
     vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
     vnode_t *tvpp)
 {
 	struct vattr va, nva;
 	int error;
 
 	/* Make data file name based on FH. */
 	error = VOP_GETATTR(vp, &va, cred);
 	if (error == 0) {
 		/* Set the attributes for "vp" to Setattr the DS vp. */
 		VATTR_NULL(&nva);
 		nva.va_uid = va.va_uid;
 		nva.va_gid = va.va_gid;
 		nva.va_mode = va.va_mode;
 		nva.va_size = 0;
 		VATTR_NULL(&va);
 		va.va_type = VREG;
 		va.va_mode = nva.va_mode;
 		NFSD_DEBUG(4, "nfsrv_dscreatefile: dvp=%p pf=%p\n", dvp, pf);
 		error = nfsrv_dscreate(dvp, &va, &nva, fhp, pf, NULL,
 		    pf->dsf_filename, cred, p, tvpp);
 	}
 	return (error);
 }
 
 /*
  * Look up the MDS file shared locked, and then get the extended attribute
  * to find the extant DS file to be copied to the new mirror.
  * If successful, *vpp is set to the MDS file's vp and *nvpp is
  * set to a DS data file for the MDS file, both exclusively locked.
  * The "buf" argument has the pnfsdsfile structure from the MDS file
  * in it and buflen is set to its length.
  */
 int
 nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf,
     int *buflenp, char *fname, NFSPROC_T *p, struct vnode **vpp,
     struct vnode **nvpp, struct pnfsdsfile **pfp, struct nfsdevice **dsp,
     struct nfsdevice **fdsp)
 {
 	struct nameidata nd;
 	struct vnode *vp, *curvp;
 	struct pnfsdsfile *pf;
 	struct nfsmount *nmp, *curnmp;
 	int dsdir, error, mirrorcnt, ippos;
 
 	vp = NULL;
 	curvp = NULL;
 	curnmp = NULL;
 	*dsp = NULL;
 	*fdsp = NULL;
 	if (dspathp == NULL && curdspathp != NULL)
 		return (EPERM);
 
 	/*
 	 * Look up the MDS file shared locked.  The lock will be upgraded
 	 * to an exclusive lock after any rw layouts have been returned.
 	 */
 	NFSD_DEBUG(4, "mdsopen path=%s\n", mdspathp);
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
 	    mdspathp, p);
 	error = namei(&nd);
 	NFSD_DEBUG(4, "lookup=%d\n", error);
 	if (error != 0)
 		return (error);
 	if (nd.ni_vp->v_type != VREG) {
 		vput(nd.ni_vp);
 		NFSD_DEBUG(4, "mdspath not reg\n");
 		return (EISDIR);
 	}
 	vp = nd.ni_vp;
 
 	if (curdspathp != NULL) {
 		/*
 		 * Look up the current DS path and find the nfsdev structure for
 		 * it.
 		 */
 		NFSD_DEBUG(4, "curmdsdev path=%s\n", curdspathp);
 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 		    UIO_SYSSPACE, curdspathp, p);
 		error = namei(&nd);
 		NFSD_DEBUG(4, "ds lookup=%d\n", error);
 		if (error != 0) {
 			vput(vp);
 			return (error);
 		}
 		if (nd.ni_vp->v_type != VDIR) {
 			vput(nd.ni_vp);
 			vput(vp);
 			NFSD_DEBUG(4, "curdspath not dir\n");
 			return (ENOTDIR);
 		}
 		if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 			vput(nd.ni_vp);
 			vput(vp);
 			NFSD_DEBUG(4, "curdspath not an NFS mount\n");
 			return (ENXIO);
 		}
 		curnmp = VFSTONFS(nd.ni_vp->v_mount);
 	
 		/* Search the nfsdev list for a match. */
 		NFSDDSLOCK();
 		*fdsp = nfsv4_findmirror(curnmp);
 		NFSDDSUNLOCK();
 		if (*fdsp == NULL)
 			curnmp = NULL;
 		if (curnmp == NULL) {
 			vput(nd.ni_vp);
 			vput(vp);
 			NFSD_DEBUG(4, "mdscopymr: no current ds\n");
 			return (ENXIO);
 		}
 		curvp = nd.ni_vp;
 	}
 
 	if (dspathp != NULL) {
 		/* Look up the nfsdev path and find the nfsdev structure. */
 		NFSD_DEBUG(4, "mdsdev path=%s\n", dspathp);
 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 		    UIO_SYSSPACE, dspathp, p);
 		error = namei(&nd);
 		NFSD_DEBUG(4, "ds lookup=%d\n", error);
 		if (error != 0) {
 			vput(vp);
 			if (curvp != NULL)
 				vput(curvp);
 			return (error);
 		}
 		if (nd.ni_vp->v_type != VDIR || nd.ni_vp == curvp) {
 			vput(nd.ni_vp);
 			vput(vp);
 			if (curvp != NULL)
 				vput(curvp);
 			NFSD_DEBUG(4, "dspath not dir\n");
 			if (nd.ni_vp == curvp)
 				return (EPERM);
 			return (ENOTDIR);
 		}
 		if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 			vput(nd.ni_vp);
 			vput(vp);
 			if (curvp != NULL)
 				vput(curvp);
 			NFSD_DEBUG(4, "dspath not an NFS mount\n");
 			return (ENXIO);
 		}
 		nmp = VFSTONFS(nd.ni_vp->v_mount);
 	
 		/*
 		 * Search the nfsdevice list for a match.  If curnmp == NULL,
 		 * this is a recovery and there must be a mirror.
 		 */
 		NFSDDSLOCK();
 		if (curnmp == NULL)
 			*dsp = nfsrv_findmirroredds(nmp);
 		else
 			*dsp = nfsv4_findmirror(nmp);
 		NFSDDSUNLOCK();
 		if (*dsp == NULL) {
 			vput(nd.ni_vp);
 			vput(vp);
 			if (curvp != NULL)
 				vput(curvp);
 			NFSD_DEBUG(4, "mdscopymr: no ds\n");
 			return (ENXIO);
 		}
 	} else {
 		nd.ni_vp = NULL;
 		nmp = NULL;
 	}
 
 	/*
 	 * Get a vp for an available DS data file using the extended
 	 * attribute on the MDS file.
 	 * If there is a valid entry for the new DS in the extended attribute
 	 * on the MDS file (as checked via the nmp argument),
 	 * nfsrv_dsgetsockmnt() returns EEXIST, so no copying will occur.
 	 */
 	error = nfsrv_dsgetsockmnt(vp, 0, buf, buflenp, &mirrorcnt, p,
 	    NULL, NULL, NULL, fname, nvpp, &nmp, curnmp, &ippos, &dsdir);
 	if (curvp != NULL)
 		vput(curvp);
 	if (nd.ni_vp == NULL) {
 		if (error == 0 && nmp != NULL) {
 			/* Search the nfsdev list for a match. */
 			NFSDDSLOCK();
 			*dsp = nfsrv_findmirroredds(nmp);
 			NFSDDSUNLOCK();
 		}
 		if (error == 0 && (nmp == NULL || *dsp == NULL)) {
 			if (nvpp != NULL && *nvpp != NULL) {
 				vput(*nvpp);
 				*nvpp = NULL;
 			}
 			error = ENXIO;
 		}
 	} else
 		vput(nd.ni_vp);
 
 	/*
 	 * When dspathp != NULL and curdspathp == NULL, this is a recovery
 	 * and is only allowed if there is a 0.0.0.0 IP address entry.
 	 * When curdspathp != NULL, the ippos will be set to that entry.
 	 */
 	if (error == 0 && dspathp != NULL && ippos == -1) {
 		if (nvpp != NULL && *nvpp != NULL) {
 			vput(*nvpp);
 			*nvpp = NULL;
 		}
 		error = ENXIO;
 	}
 	if (error == 0) {
 		*vpp = vp;
 
 		pf = (struct pnfsdsfile *)buf;
 		if (ippos == -1) {
 			/* If no zeroip pnfsdsfile, add one. */
 			ippos = *buflenp / sizeof(*pf);
 			*buflenp += sizeof(*pf);
 			pf += ippos;
 			pf->dsf_dir = dsdir;
 			strlcpy(pf->dsf_filename, fname,
 			    sizeof(pf->dsf_filename));
 		} else
 			pf += ippos;
 		*pfp = pf;
 	} else
 		vput(vp);
 	return (error);
 }
 
 /*
  * Search for a matching pnfsd mirror device structure, base on the nmp arg.
  * Return one if found, NULL otherwise.
  */
 static struct nfsdevice *
 nfsrv_findmirroredds(struct nfsmount *nmp)
 {
 	struct nfsdevice *ds, *fndds;
 	int fndmirror;
 
 	mtx_assert(NFSDDSMUTEXPTR, MA_OWNED);
 	/*
 	 * Search the DS server list for a match with nmp.
 	 * Remove the DS entry if found and there is a mirror.
 	 */
 	fndds = NULL;
 	fndmirror = 0;
 	if (nfsrv_devidcnt == 0)
 		return (fndds);
 	TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 		if (ds->nfsdev_nmp == nmp) {
 			NFSD_DEBUG(4, "nfsrv_findmirroredds: fnd main ds\n");
 			fndds = ds;
 			break;
 		}
 	}
 	if (fndds == NULL)
 		return (fndds);
 	if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
 		fndmirror = 1;
 	else if (fndds->nfsdev_mdsisset != 0) {
 		/* For the fsid is set case, search for a mirror. */
 		TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 			if (ds != fndds && ds->nfsdev_nmp != NULL &&
 			    ds->nfsdev_mdsisset != 0 &&
 			    ds->nfsdev_mdsfsid.val[0] ==
 			    fndds->nfsdev_mdsfsid.val[0] &&
 			    ds->nfsdev_mdsfsid.val[1] ==
 			    fndds->nfsdev_mdsfsid.val[1]) {
 				fndmirror = 1;
 				break;
 			}
 		}
 	}
 	if (fndmirror == 0) {
 		NFSD_DEBUG(4, "nfsrv_findmirroredds: no mirror for DS\n");
 		return (NULL);
 	}
 	return (fndds);
 }
 
Index: projects/clang1000-import/sys/geom/geom_subr.c
===================================================================
--- projects/clang1000-import/sys/geom/geom_subr.c	(revision 357178)
+++ projects/clang1000-import/sys/geom/geom_subr.c	(revision 357179)
@@ -1,1658 +1,1661 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/devicestat.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/errno.h>
 #include <sys/sbuf.h>
 #include <sys/sdt.h>
 #include <geom/geom.h>
 #include <geom/geom_dbg.h>
 #include <geom/geom_int.h>
 #include <machine/stdarg.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #ifdef KDB
 #include <sys/kdb.h>
 #endif
 
 SDT_PROVIDER_DEFINE(geom);
 
 struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes);
 static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms);
 char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim;
 
 struct g_hh00 {
 	struct g_class		*mp;
 	struct g_provider	*pp;
 	off_t			size;
 	int			error;
 	int			post;
 };
 
 void
 g_dbg_printf(const char *classname, int lvl, struct bio *bp,
     const char *format,
     ...)
 {
 #ifndef PRINTF_BUFR_SIZE
 #define PRINTF_BUFR_SIZE 64
 #endif
 	char bufr[PRINTF_BUFR_SIZE];
 	struct sbuf sb, *sbp __unused;
 	va_list ap;
 
 	sbp = sbuf_new(&sb, bufr, sizeof(bufr), SBUF_FIXEDLEN);
 	KASSERT(sbp != NULL, ("sbuf_new misused?"));
 
 	sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
 
 	sbuf_cat(&sb, classname);
 	if (lvl >= 0)
 		sbuf_printf(&sb, "[%d]", lvl);
 	
 	va_start(ap, format);
 	sbuf_vprintf(&sb, format, ap);
 	va_end(ap);
 
 	if (bp != NULL) {
 		sbuf_putc(&sb, ' ');
 		g_format_bio(&sb, bp);
 	}
 
 	/* Terminate the debug line with a single '\n'. */
 	sbuf_nl_terminate(&sb);
 
 	/* Flush line to printf. */
 	sbuf_finish(&sb);
 	sbuf_delete(&sb);
 }
 
 /*
  * This event offers a new class a chance to taste all preexisting providers.
  */
 static void
 g_load_class(void *arg, int flag)
 {
 	struct g_hh00 *hh;
 	struct g_class *mp2, *mp;
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 	if (flag == EV_CANCEL)	/* XXX: can't happen ? */
 		return;
 	if (g_shutdown)
 		return;
 
 	hh = arg;
 	mp = hh->mp;
 	hh->error = 0;
 	if (hh->post) {
 		g_free(hh);
 		hh = NULL;
 	}
 	g_trace(G_T_TOPOLOGY, "g_load_class(%s)", mp->name);
 	KASSERT(mp->name != NULL && *mp->name != '\0',
 	    ("GEOM class has no name"));
 	LIST_FOREACH(mp2, &g_classes, class) {
 		if (mp2 == mp) {
 			printf("The GEOM class %s is already loaded.\n",
 			    mp2->name);
 			if (hh != NULL)
 				hh->error = EEXIST;
 			return;
 		} else if (strcmp(mp2->name, mp->name) == 0) {
 			printf("A GEOM class %s is already loaded.\n",
 			    mp2->name);
 			if (hh != NULL)
 				hh->error = EEXIST;
 			return;
 		}
 	}
 
 	LIST_INIT(&mp->geom);
 	LIST_INSERT_HEAD(&g_classes, mp, class);
 	if (mp->init != NULL)
 		mp->init(mp);
 	if (mp->taste == NULL)
 		return;
 	LIST_FOREACH(mp2, &g_classes, class) {
 		if (mp == mp2)
 			continue;
 		LIST_FOREACH(gp, &mp2->geom, geom) {
 			LIST_FOREACH(pp, &gp->provider, provider) {
 				mp->taste(mp, pp, 0);
 				g_topology_assert();
 			}
 		}
 	}
 }
 
 static int
 g_unload_class(struct g_class *mp)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	int error;
 
 	g_topology_lock();
 	g_trace(G_T_TOPOLOGY, "g_unload_class(%s)", mp->name);
 retry:
 	G_VALID_CLASS(mp);
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		/* We refuse to unload if anything is open */
 		LIST_FOREACH(pp, &gp->provider, provider)
 			if (pp->acr || pp->acw || pp->ace) {
 				g_topology_unlock();
 				return (EBUSY);
 			}
 		LIST_FOREACH(cp, &gp->consumer, consumer)
 			if (cp->acr || cp->acw || cp->ace) {
 				g_topology_unlock();
 				return (EBUSY);
 			}
 		/* If the geom is withering, wait for it to finish. */
 		if (gp->flags & G_GEOM_WITHER) {
 			g_topology_sleep(mp, 1);
 			goto retry;
 		}
 	}
 
 	/*
 	 * We allow unloading if we have no geoms, or a class
 	 * method we can use to get rid of them.
 	 */
 	if (!LIST_EMPTY(&mp->geom) && mp->destroy_geom == NULL) {
 		g_topology_unlock();
 		return (EOPNOTSUPP);
 	}
 
 	/* Bar new entries */
 	mp->taste = NULL;
 	mp->config = NULL;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		error = mp->destroy_geom(NULL, mp, gp);
 		if (error != 0) {
 			g_topology_unlock();
 			return (error);
 		}
 	}
 	/* Wait for withering to finish. */
 	for (;;) {
 		gp = LIST_FIRST(&mp->geom);
 		if (gp == NULL)
 			break;
 		KASSERT(gp->flags & G_GEOM_WITHER,
 		   ("Non-withering geom in class %s", mp->name));
 		g_topology_sleep(mp, 1);
 	}
 	G_VALID_CLASS(mp);
 	if (mp->fini != NULL)
 		mp->fini(mp);
 	LIST_REMOVE(mp, class);
 	g_topology_unlock();
 
 	return (0);
 }
 
 int
 g_modevent(module_t mod, int type, void *data)
 {
 	struct g_hh00 *hh;
 	int error;
 	static int g_ignition;
 	struct g_class *mp;
 
 	mp = data;
 	if (mp->version != G_VERSION) {
 		printf("GEOM class %s has Wrong version %x\n",
 		    mp->name, mp->version);
 		return (EINVAL);
 	}
 	if (!g_ignition) {
 		g_ignition++;
 		g_init();
 	}
 	error = EOPNOTSUPP;
 	switch (type) {
 	case MOD_LOAD:
 		g_trace(G_T_TOPOLOGY, "g_modevent(%s, LOAD)", mp->name);
 		hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
 		hh->mp = mp;
 		/*
 		 * Once the system is not cold, MOD_LOAD calls will be
 		 * from the userland and the g_event thread will be able
 		 * to acknowledge their completion.
 		 */
 		if (cold) {
 			hh->post = 1;
 			error = g_post_event(g_load_class, hh, M_WAITOK, NULL);
 		} else {
 			error = g_waitfor_event(g_load_class, hh, M_WAITOK,
 			    NULL);
 			if (error == 0)
 				error = hh->error;
 			g_free(hh);
 		}
 		break;
 	case MOD_UNLOAD:
 		g_trace(G_T_TOPOLOGY, "g_modevent(%s, UNLOAD)", mp->name);
 		error = g_unload_class(mp);
 		if (error == 0) {
 			KASSERT(LIST_EMPTY(&mp->geom),
 			    ("Unloaded class (%s) still has geom", mp->name));
 		}
 		break;
 	}
 	return (error);
 }
 
 static void
 g_retaste_event(void *arg, int flag)
 {
 	struct g_class *mp, *mp2;
 	struct g_geom *gp;
 	struct g_hh00 *hh;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	if (flag == EV_CANCEL)  /* XXX: can't happen ? */
 		return;
 	if (g_shutdown || g_notaste)
 		return;
 
 	hh = arg;
 	mp = hh->mp;
 	hh->error = 0;
 	if (hh->post) {
 		g_free(hh);
 		hh = NULL;
 	}
 	g_trace(G_T_TOPOLOGY, "g_retaste(%s)", mp->name);
 
 	LIST_FOREACH(mp2, &g_classes, class) {
 		LIST_FOREACH(gp, &mp2->geom, geom) {
 			LIST_FOREACH(pp, &gp->provider, provider) {
 				if (pp->acr || pp->acw || pp->ace)
 					continue;
 				LIST_FOREACH(cp, &pp->consumers, consumers) {
 					if (cp->geom->class == mp &&
 					    (cp->flags & G_CF_ORPHAN) == 0)
 						break;
 				}
 				if (cp != NULL) {
 					cp->flags |= G_CF_ORPHAN;
 					g_wither_geom(cp->geom, ENXIO);
 				}
 				mp->taste(mp, pp, 0);
 				g_topology_assert();
 			}
 		}
 	}
 }
 
 int
 g_retaste(struct g_class *mp)
 {
 	struct g_hh00 *hh;
 	int error;
 
 	if (mp->taste == NULL)
 		return (EINVAL);
 
 	hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
 	hh->mp = mp;
 
 	if (cold) {
 		hh->post = 1;
 		error = g_post_event(g_retaste_event, hh, M_WAITOK, NULL);
 	} else {
 		error = g_waitfor_event(g_retaste_event, hh, M_WAITOK, NULL);
 		if (error == 0)
 			error = hh->error;
 		g_free(hh);
 	}
 
 	return (error);
 }
 
 struct g_geom *
 g_new_geomf(struct g_class *mp, const char *fmt, ...)
 {
 	struct g_geom *gp;
 	va_list ap;
 	struct sbuf *sb;
 
 	g_topology_assert();
 	G_VALID_CLASS(mp);
 	sb = sbuf_new_auto();
 	va_start(ap, fmt);
 	sbuf_vprintf(sb, fmt, ap);
 	va_end(ap);
 	sbuf_finish(sb);
 	gp = g_malloc(sizeof *gp, M_WAITOK | M_ZERO);
 	gp->name = g_malloc(sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
 	gp->class = mp;
 	gp->rank = 1;
 	LIST_INIT(&gp->consumer);
 	LIST_INIT(&gp->provider);
 	LIST_INIT(&gp->aliases);
 	LIST_INSERT_HEAD(&mp->geom, gp, geom);
 	TAILQ_INSERT_HEAD(&geoms, gp, geoms);
 	strcpy(gp->name, sbuf_data(sb));
 	sbuf_delete(sb);
 	/* Fill in defaults from class */
 	gp->start = mp->start;
 	gp->spoiled = mp->spoiled;
 	gp->attrchanged = mp->attrchanged;
 	gp->providergone = mp->providergone;
 	gp->dumpconf = mp->dumpconf;
 	gp->access = mp->access;
 	gp->orphan = mp->orphan;
 	gp->ioctl = mp->ioctl;
 	gp->resize = mp->resize;
 	return (gp);
 }
 
 void
 g_destroy_geom(struct g_geom *gp)
 {
 	struct g_geom_alias *gap, *gaptmp;
 
 	g_topology_assert();
 	G_VALID_GEOM(gp);
 	g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name);
 	KASSERT(LIST_EMPTY(&gp->consumer),
 	    ("g_destroy_geom(%s) with consumer(s) [%p]",
 	    gp->name, LIST_FIRST(&gp->consumer)));
 	KASSERT(LIST_EMPTY(&gp->provider),
 	    ("g_destroy_geom(%s) with provider(s) [%p]",
 	    gp->name, LIST_FIRST(&gp->provider)));
 	g_cancel_event(gp);
 	LIST_REMOVE(gp, geom);
 	TAILQ_REMOVE(&geoms, gp, geoms);
 	LIST_FOREACH_SAFE(gap, &gp->aliases, ga_next, gaptmp)
 		g_free(gap);
 	g_free(gp->name);
 	g_free(gp);
 }
 
 /*
  * This function is called (repeatedly) until the geom has withered away.
  */
 void
 g_wither_geom(struct g_geom *gp, int error)
 {
 	struct g_provider *pp;
 
 	g_topology_assert();
 	G_VALID_GEOM(gp);
 	g_trace(G_T_TOPOLOGY, "g_wither_geom(%p(%s))", gp, gp->name);
 	if (!(gp->flags & G_GEOM_WITHER)) {
 		gp->flags |= G_GEOM_WITHER;
 		LIST_FOREACH(pp, &gp->provider, provider)
 			if (!(pp->flags & G_PF_ORPHAN))
 				g_orphan_provider(pp, error);
 	}
 	g_do_wither();
 }
 
 /*
  * Convenience function to destroy a particular provider.
  */
 void
 g_wither_provider(struct g_provider *pp, int error)
 {
 
 	pp->flags |= G_PF_WITHER;
 	if (!(pp->flags & G_PF_ORPHAN))
 		g_orphan_provider(pp, error);
 }
 
 /*
  * This function is called (repeatedly) until the has withered away.
  */
 void
 g_wither_geom_close(struct g_geom *gp, int error)
 {
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	G_VALID_GEOM(gp);
 	g_trace(G_T_TOPOLOGY, "g_wither_geom_close(%p(%s))", gp, gp->name);
 	LIST_FOREACH(cp, &gp->consumer, consumer)
 		if (cp->acr || cp->acw || cp->ace)
 			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	g_wither_geom(gp, error);
 }
 
 /*
  * This function is called (repeatedly) until we cant wash away more
  * withered bits at present.
  */
 void
 g_wither_washer()
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_provider *pp, *pp2;
 	struct g_consumer *cp, *cp2;
 
 	g_topology_assert();
 	LIST_FOREACH(mp, &g_classes, class) {
 		LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 			LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) {
 				if (!(pp->flags & G_PF_WITHER))
 					continue;
 				if (LIST_EMPTY(&pp->consumers))
 					g_destroy_provider(pp);
 			}
 			if (!(gp->flags & G_GEOM_WITHER))
 				continue;
 			LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) {
 				if (LIST_EMPTY(&pp->consumers))
 					g_destroy_provider(pp);
 			}
 			LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp2) {
 				if (cp->acr || cp->acw || cp->ace)
 					continue;
 				if (cp->provider != NULL)
 					g_detach(cp);
 				g_destroy_consumer(cp);
 			}
 			if (LIST_EMPTY(&gp->provider) &&
 			    LIST_EMPTY(&gp->consumer))
 				g_destroy_geom(gp);
 		}
 	}
 }
 
 struct g_consumer *
 g_new_consumer(struct g_geom *gp)
 {
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	G_VALID_GEOM(gp);
 	KASSERT(!(gp->flags & G_GEOM_WITHER),
 	    ("g_new_consumer on WITHERing geom(%s) (class %s)",
 	    gp->name, gp->class->name));
 	KASSERT(gp->orphan != NULL,
 	    ("g_new_consumer on geom(%s) (class %s) without orphan",
 	    gp->name, gp->class->name));
 
 	cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO);
 	cp->geom = gp;
 	cp->stat = devstat_new_entry(cp, -1, 0, DEVSTAT_ALL_SUPPORTED,
 	    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
 	LIST_INSERT_HEAD(&gp->consumer, cp, consumer);
 	return(cp);
 }
 
 void
 g_destroy_consumer(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 
 	g_topology_assert();
 	G_VALID_CONSUMER(cp);
 	g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp);
 	KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached"));
 	KASSERT (cp->acr == 0, ("g_destroy_consumer with acr"));
 	KASSERT (cp->acw == 0, ("g_destroy_consumer with acw"));
 	KASSERT (cp->ace == 0, ("g_destroy_consumer with ace"));
 	g_cancel_event(cp);
 	gp = cp->geom;
 	LIST_REMOVE(cp, consumer);
 	devstat_remove_entry(cp->stat);
 	g_free(cp);
 	if (gp->flags & G_GEOM_WITHER)
 		g_do_wither();
 }
 
 static void
 g_new_provider_event(void *arg, int flag)
 {
 	struct g_class *mp;
 	struct g_provider *pp;
 	struct g_consumer *cp, *next_cp;
 
 	g_topology_assert();
 	if (flag == EV_CANCEL)
 		return;
 	if (g_shutdown)
 		return;
 	pp = arg;
 	G_VALID_PROVIDER(pp);
 	KASSERT(!(pp->flags & G_PF_WITHER),
 	    ("g_new_provider_event but withered"));
 	LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, next_cp) {
 		if ((cp->flags & G_CF_ORPHAN) == 0 &&
 		    cp->geom->attrchanged != NULL)
 			cp->geom->attrchanged(cp, "GEOM::media");
 	}
 	if (g_notaste)
 		return;
 	LIST_FOREACH(mp, &g_classes, class) {
 		if (mp->taste == NULL)
 			continue;
 		LIST_FOREACH(cp, &pp->consumers, consumers)
 			if (cp->geom->class == mp &&
 			    (cp->flags & G_CF_ORPHAN) == 0)
 				break;
 		if (cp != NULL)
 			continue;
 		mp->taste(mp, pp, 0);
 		g_topology_assert();
 	}
 }
 
 
 struct g_provider *
 g_new_providerf(struct g_geom *gp, const char *fmt, ...)
 {
 	struct g_provider *pp;
 	struct sbuf *sb;
 	va_list ap;
 
 	g_topology_assert();
 	G_VALID_GEOM(gp);
 	KASSERT(gp->access != NULL,
 	    ("new provider on geom(%s) without ->access (class %s)",
 	    gp->name, gp->class->name));
 	KASSERT(gp->start != NULL,
 	    ("new provider on geom(%s) without ->start (class %s)",
 	    gp->name, gp->class->name));
 	KASSERT(!(gp->flags & G_GEOM_WITHER),
 	    ("new provider on WITHERing geom(%s) (class %s)",
 	    gp->name, gp->class->name));
 	sb = sbuf_new_auto();
 	va_start(ap, fmt);
 	sbuf_vprintf(sb, fmt, ap);
 	va_end(ap);
 	sbuf_finish(sb);
 	pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
 	pp->name = (char *)(pp + 1);
 	strcpy(pp->name, sbuf_data(sb));
 	sbuf_delete(sb);
 	LIST_INIT(&pp->consumers);
 	pp->error = ENXIO;
 	pp->geom = gp;
 	pp->stat = devstat_new_entry(pp, -1, 0, DEVSTAT_ALL_SUPPORTED,
 	    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
 	LIST_INSERT_HEAD(&gp->provider, pp, provider);
 	g_post_event(g_new_provider_event, pp, M_WAITOK, pp, gp, NULL);
 	return (pp);
 }
 
 void
 g_error_provider(struct g_provider *pp, int error)
 {
 
 	/* G_VALID_PROVIDER(pp);  We may not have g_topology */
 	pp->error = error;
 }
 
 static void
 g_resize_provider_event(void *arg, int flag)
 {
 	struct g_hh00 *hh;
 	struct g_class *mp;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *cp, *cp2;
 	off_t size;
 
 	g_topology_assert();
 	if (g_shutdown)
 		return;
 
 	hh = arg;
 	pp = hh->pp;
 	size = hh->size;
 	g_free(hh);
 
 	G_VALID_PROVIDER(pp);
 	KASSERT(!(pp->flags & G_PF_WITHER),
 	    ("g_resize_provider_event but withered"));
 	g_trace(G_T_TOPOLOGY, "g_resize_provider_event(%p)", pp);
 
 	LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, cp2) {
 		gp = cp->geom;
 		if (gp->resize == NULL && size < pp->mediasize) {
 			/*
 			 * XXX: g_dev_orphan method does deferred destroying
 			 * and it is possible, that other event could already
 			 * call the orphan method. Check consumer's flags to
 			 * do not schedule it twice.
 			 */
 			if (cp->flags & G_CF_ORPHAN)
 				continue;
 			cp->flags |= G_CF_ORPHAN;
 			cp->geom->orphan(cp);
 		}
 	}
 
 	pp->mediasize = size;
 	
 	LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, cp2) {
 		gp = cp->geom;
 		if ((gp->flags & G_GEOM_WITHER) == 0 && gp->resize != NULL)
 			gp->resize(cp);
 	}
 
 	/*
 	 * After resizing, the previously invalid GEOM class metadata
 	 * might become valid.  This means we should retaste.
 	 */
 	LIST_FOREACH(mp, &g_classes, class) {
 		if (mp->taste == NULL)
 			continue;
 		LIST_FOREACH(cp, &pp->consumers, consumers)
 			if (cp->geom->class == mp &&
 			    (cp->flags & G_CF_ORPHAN) == 0)
 				break;
 		if (cp != NULL)
 			continue;
 		mp->taste(mp, pp, 0);
 		g_topology_assert();
 	}
 }
 
 void
 g_resize_provider(struct g_provider *pp, off_t size)
 {
 	struct g_hh00 *hh;
 
 	G_VALID_PROVIDER(pp);
 	if (pp->flags & G_PF_WITHER)
 		return;
 
 	if (size == pp->mediasize)
 		return;
 
 	hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
 	hh->pp = pp;
 	hh->size = size;
 	g_post_event(g_resize_provider_event, hh, M_WAITOK, NULL);
 }
 
 #ifndef	_PATH_DEV
 #define	_PATH_DEV	"/dev/"
 #endif
 
 struct g_provider *
 g_provider_by_name(char const *arg)
 {
 	struct g_class *cp;
 	struct g_geom *gp;
 	struct g_provider *pp, *wpp;
 
 	if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
 		arg += sizeof(_PATH_DEV) - 1;
 
 	wpp = NULL;
 	LIST_FOREACH(cp, &g_classes, class) {
 		LIST_FOREACH(gp, &cp->geom, geom) {
 			LIST_FOREACH(pp, &gp->provider, provider) {
 				if (strcmp(arg, pp->name) != 0)
 					continue;
 				if ((gp->flags & G_GEOM_WITHER) == 0 &&
 				    (pp->flags & G_PF_WITHER) == 0)
 					return (pp);
 				else
 					wpp = pp;
 			}
 		}
 	}
 
 	return (wpp);
 }
 
 void
 g_destroy_provider(struct g_provider *pp)
 {
 	struct g_geom *gp;
 
 	g_topology_assert();
 	G_VALID_PROVIDER(pp);
 	KASSERT(LIST_EMPTY(&pp->consumers),
 	    ("g_destroy_provider but attached"));
 	KASSERT (pp->acr == 0, ("g_destroy_provider with acr"));
 	KASSERT (pp->acw == 0, ("g_destroy_provider with acw"));
 	KASSERT (pp->ace == 0, ("g_destroy_provider with ace"));
 	g_cancel_event(pp);
 	LIST_REMOVE(pp, provider);
 	gp = pp->geom;
 	devstat_remove_entry(pp->stat);
 	/*
 	 * If a callback was provided, send notification that the provider
 	 * is now gone.
 	 */
 	if (gp->providergone != NULL)
 		gp->providergone(pp);
 
 	g_free(pp);
 	if ((gp->flags & G_GEOM_WITHER))
 		g_do_wither();
 }
 
 /*
  * We keep the "geoms" list sorted by topological order (== increasing
  * numerical rank) at all times.
  * When an attach is done, the attaching geoms rank is invalidated
  * and it is moved to the tail of the list.
  * All geoms later in the sequence has their ranks reevaluated in
  * sequence.  If we cannot assign rank to a geom because it's
  * prerequisites do not have rank, we move that element to the tail
  * of the sequence with invalid rank as well.
  * At some point we encounter our original geom and if we stil fail
  * to assign it a rank, there must be a loop and we fail back to
  * g_attach() which detach again and calls redo_rank again
  * to fix up the damage.
  * It would be much simpler code wise to do it recursively, but we
  * can't risk that on the kernel stack.
  */
 
 static int
 redo_rank(struct g_geom *gp)
 {
 	struct g_consumer *cp;
 	struct g_geom *gp1, *gp2;
 	int n, m;
 
 	g_topology_assert();
 	G_VALID_GEOM(gp);
 
 	/* Invalidate this geoms rank and move it to the tail */
 	gp1 = TAILQ_NEXT(gp, geoms);
 	if (gp1 != NULL) {
 		gp->rank = 0;
 		TAILQ_REMOVE(&geoms, gp, geoms);
 		TAILQ_INSERT_TAIL(&geoms, gp, geoms);
 	} else {
 		gp1 = gp;
 	}
 
 	/* re-rank the rest of the sequence */
 	for (; gp1 != NULL; gp1 = gp2) {
 		gp1->rank = 0;
 		m = 1;
 		LIST_FOREACH(cp, &gp1->consumer, consumer) {
 			if (cp->provider == NULL)
 				continue;
 			n = cp->provider->geom->rank;
 			if (n == 0) {
 				m = 0;
 				break;
 			} else if (n >= m)
 				m = n + 1;
 		}
 		gp1->rank = m;
 		gp2 = TAILQ_NEXT(gp1, geoms);
 
 		/* got a rank, moving on */
 		if (m != 0)
 			continue;
 
 		/* no rank to original geom means loop */
 		if (gp == gp1) 
 			return (ELOOP);
 
 		/* no rank, put it at the end move on */
 		TAILQ_REMOVE(&geoms, gp1, geoms);
 		TAILQ_INSERT_TAIL(&geoms, gp1, geoms);
 	}
 	return (0);
 }
 
 int
 g_attach(struct g_consumer *cp, struct g_provider *pp)
 {
 	int error;
 
 	g_topology_assert();
 	G_VALID_CONSUMER(cp);
 	G_VALID_PROVIDER(pp);
 	g_trace(G_T_TOPOLOGY, "g_attach(%p, %p)", cp, pp);
 	KASSERT(cp->provider == NULL, ("attach but attached"));
 	cp->provider = pp;
 	cp->flags &= ~G_CF_ORPHAN;
 	LIST_INSERT_HEAD(&pp->consumers, cp, consumers);
 	error = redo_rank(cp->geom);
 	if (error) {
 		LIST_REMOVE(cp, consumers);
 		cp->provider = NULL;
 		redo_rank(cp->geom);
 	}
 	return (error);
 }
 
 void
 g_detach(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 
 	g_topology_assert();
 	G_VALID_CONSUMER(cp);
 	g_trace(G_T_TOPOLOGY, "g_detach(%p)", cp);
 	KASSERT(cp->provider != NULL, ("detach but not attached"));
 	KASSERT(cp->acr == 0, ("detach but nonzero acr"));
 	KASSERT(cp->acw == 0, ("detach but nonzero acw"));
 	KASSERT(cp->ace == 0, ("detach but nonzero ace"));
 	KASSERT(cp->nstart == cp->nend,
 	    ("detach with active requests"));
 	pp = cp->provider;
 	LIST_REMOVE(cp, consumers);
 	cp->provider = NULL;
 	if ((cp->geom->flags & G_GEOM_WITHER) ||
 	    (pp->geom->flags & G_GEOM_WITHER) ||
 	    (pp->flags & G_PF_WITHER))
 		g_do_wither();
 	redo_rank(cp->geom);
 }
 
 /*
  * g_access()
  *
  * Access-check with delta values.  The question asked is "can provider
  * "cp" change the access counters by the relative amounts dc[rwe] ?"
  */
 
 int
 g_access(struct g_consumer *cp, int dcr, int dcw, int dce)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	int pw, pe;
 #ifdef INVARIANTS
 	int sr, sw, se;
 #endif
 	int error;
 
 	g_topology_assert();
 	G_VALID_CONSUMER(cp);
 	pp = cp->provider;
 	KASSERT(pp != NULL, ("access but not attached"));
 	G_VALID_PROVIDER(pp);
 	gp = pp->geom;
 
 	g_trace(G_T_ACCESS, "g_access(%p(%s), %d, %d, %d)",
 	    cp, pp->name, dcr, dcw, dce);
 
 	KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr"));
 	KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw"));
 	KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace"));
 	KASSERT(dcr != 0 || dcw != 0 || dce != 0, ("NOP access request"));
 	KASSERT(cp->acr + dcr != 0 || cp->acw + dcw != 0 ||
 	    cp->ace + dce != 0 || cp->nstart == cp->nend,
 	    ("Last close with active requests"));
 	KASSERT(gp->access != NULL, ("NULL geom->access"));
 
 	/*
 	 * If our class cares about being spoiled, and we have been, we
 	 * are probably just ahead of the event telling us that.  Fail
 	 * now rather than having to unravel this later.
 	 */
 	if (cp->geom->spoiled != NULL && (cp->flags & G_CF_SPOILED) &&
 	    (dcr > 0 || dcw > 0 || dce > 0))
 		return (ENXIO);
 
 	/*
 	 * A number of GEOM classes either need to perform an I/O on the first
 	 * open or to acquire a different subsystem's lock.  To do that they
 	 * may have to drop the topology lock.
 	 * Other GEOM classes perform special actions when opening a lower rank
 	 * geom for the first time.  As a result, more than one thread may
 	 * end up performing the special actions.
 	 * So, we prevent concurrent "first" opens by marking the consumer with
 	 * special flag.
 	 *
 	 * Note that if the geom's access method never drops the topology lock,
 	 * then we will never see G_GEOM_IN_ACCESS here.
 	 */
 	while ((gp->flags & G_GEOM_IN_ACCESS) != 0) {
 		g_trace(G_T_ACCESS,
 		    "%s: race on geom %s via provider %s and consumer of %s",
 		    __func__, gp->name, pp->name, cp->geom->name);
 		gp->flags |= G_GEOM_ACCESS_WAIT;
 		g_topology_sleep(gp, 0);
 	}
 
 	/*
 	 * Figure out what counts the provider would have had, if this
 	 * consumer had (r0w0e0) at this time.
 	 */
 	pw = pp->acw - cp->acw;
 	pe = pp->ace - cp->ace;
 
 	g_trace(G_T_ACCESS,
     "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)",
 	    dcr, dcw, dce,
 	    cp->acr, cp->acw, cp->ace,
 	    pp->acr, pp->acw, pp->ace,
 	    pp, pp->name);
 
 	/* If foot-shooting is enabled, any open on rank#1 is OK */
 	if ((g_debugflags & G_F_FOOTSHOOTING) && gp->rank == 1)
 		;
 	/* If we try exclusive but already write: fail */
 	else if (dce > 0 && pw > 0)
 		return (EPERM);
 	/* If we try write but already exclusive: fail */
 	else if (dcw > 0 && pe > 0)
 		return (EPERM);
 	/* If we try to open more but provider is error'ed: fail */
 	else if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0) {
 		printf("%s(%d): provider %s has error %d set\n",
 		    __func__, __LINE__, pp->name, pp->error);
 		return (pp->error);
 	}
 
 	/* Ok then... */
 
 #ifdef INVARIANTS
 	sr = cp->acr;
 	sw = cp->acw;
 	se = cp->ace;
 #endif
 	gp->flags |= G_GEOM_IN_ACCESS;
 	error = gp->access(pp, dcr, dcw, dce);
 	KASSERT(dcr > 0 || dcw > 0 || dce > 0 || error == 0,
 	    ("Geom provider %s::%s dcr=%d dcw=%d dce=%d error=%d failed "
 	    "closing ->access()", gp->class->name, pp->name, dcr, dcw,
 	    dce, error));
 
 	g_topology_assert();
 	gp->flags &= ~G_GEOM_IN_ACCESS;
 	KASSERT(cp->acr == sr && cp->acw == sw && cp->ace == se,
 	    ("Access counts changed during geom->access"));
 	if ((gp->flags & G_GEOM_ACCESS_WAIT) != 0) {
 		gp->flags &= ~G_GEOM_ACCESS_WAIT;
 		wakeup(gp);
 	}
 
 	if (!error) {
 		/*
 		 * If we open first write, spoil any partner consumers.
 		 * If we close last write and provider is not errored,
 		 * trigger re-taste.
 		 */
 		if (pp->acw == 0 && dcw != 0)
 			g_spoil(pp, cp);
 		else if (pp->acw != 0 && pp->acw == -dcw && pp->error == 0 &&
 		    !(gp->flags & G_GEOM_WITHER))
 			g_post_event(g_new_provider_event, pp, M_WAITOK, 
 			    pp, NULL);
 
 		pp->acr += dcr;
 		pp->acw += dcw;
 		pp->ace += dce;
 		cp->acr += dcr;
 		cp->acw += dcw;
 		cp->ace += dce;
 		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)
 			KASSERT(pp->sectorsize > 0,
 			    ("Provider %s lacks sectorsize", pp->name));
 		if ((cp->geom->flags & G_GEOM_WITHER) &&
 		    cp->acr == 0 && cp->acw == 0 && cp->ace == 0)
 			g_do_wither();
 	}
 	return (error);
 }
 
 int
 g_handleattr_int(struct bio *bp, const char *attribute, int val)
 {
 
 	return (g_handleattr(bp, attribute, &val, sizeof val));
 }
 
 int
 g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val)
 {
 
 	return (g_handleattr(bp, attribute, &val, sizeof val));
 }
 
 int
 g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val)
 {
 
 	return (g_handleattr(bp, attribute, &val, sizeof val));
 }
 
 int
 g_handleattr_str(struct bio *bp, const char *attribute, const char *str)
 {
 
 	return (g_handleattr(bp, attribute, str, 0));
 }
 
 int
 g_handleattr(struct bio *bp, const char *attribute, const void *val, int len)
 {
 	int error = 0;
 
 	if (strcmp(bp->bio_attribute, attribute))
 		return (0);
 	if (len == 0) {
 		bzero(bp->bio_data, bp->bio_length);
 		if (strlcpy(bp->bio_data, val, bp->bio_length) >=
 		    bp->bio_length) {
 			printf("%s: %s %s bio_length %jd strlen %zu -> EFAULT\n",
 			    __func__, bp->bio_to->name, attribute,
 			    (intmax_t)bp->bio_length, strlen(val));
 			error = EFAULT;
 		}
 	} else if (bp->bio_length == len) {
 		bcopy(val, bp->bio_data, len);
 	} else {
 		printf("%s: %s %s bio_length %jd len %d -> EFAULT\n", __func__,
 		    bp->bio_to->name, attribute, (intmax_t)bp->bio_length, len);
 		error = EFAULT;
 	}
 	if (error == 0)
 		bp->bio_completed = bp->bio_length;
 	g_io_deliver(bp, error);
 	return (1);
 }
 
 int
 g_std_access(struct g_provider *pp,
 	int dr __unused, int dw __unused, int de __unused)
 {
 
 	g_topology_assert();
 	G_VALID_PROVIDER(pp);
         return (0);
 }
 
 void
 g_std_done(struct bio *bp)
 {
 	struct bio *bp2;
 
 	bp2 = bp->bio_parent;
 	if (bp2->bio_error == 0)
 		bp2->bio_error = bp->bio_error;
 	bp2->bio_completed += bp->bio_completed;
 	g_destroy_bio(bp);
 	bp2->bio_inbed++;
-	if (bp2->bio_children == bp2->bio_inbed)
+	if (bp2->bio_children == bp2->bio_inbed) {
+		if (bp2->bio_cmd == BIO_SPEEDUP)
+			bp2->bio_completed = bp2->bio_length;
 		g_io_deliver(bp2, bp2->bio_error);
+	}
 }
 
 /* XXX: maybe this is only g_slice_spoiled */
 
 void
 g_std_spoiled(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 	G_VALID_CONSUMER(cp);
 	g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp);
 	cp->flags |= G_CF_ORPHAN;
 	g_detach(cp);
 	gp = cp->geom;
 	LIST_FOREACH(pp, &gp->provider, provider)
 		g_orphan_provider(pp, ENXIO);
 	g_destroy_consumer(cp);
 	if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer))
 		g_destroy_geom(gp);
 	else
 		gp->flags |= G_GEOM_WITHER;
 }
 
 /*
  * Spoiling happens when a provider is opened for writing, but consumers
  * which are configured by in-band data are attached (slicers for instance).
  * Since the write might potentially change the in-band data, such consumers
  * need to re-evaluate their existence after the writing session closes.
  * We do this by (offering to) tear them down when the open for write happens
  * in return for a re-taste when it closes again.
  * Together with the fact that such consumers grab an 'e' bit whenever they
  * are open, regardless of mode, this ends up DTRT.
  */
 
 static void
 g_spoil_event(void *arg, int flag)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp, *cp2;
 
 	g_topology_assert();
 	if (flag == EV_CANCEL)
 		return;
 	pp = arg;
 	G_VALID_PROVIDER(pp);
 	g_trace(G_T_TOPOLOGY, "%s %p(%s:%s:%s)", __func__, pp,
 	    pp->geom->class->name, pp->geom->name, pp->name);
 	for (cp = LIST_FIRST(&pp->consumers); cp != NULL; cp = cp2) {
 		cp2 = LIST_NEXT(cp, consumers);
 		if ((cp->flags & G_CF_SPOILED) == 0)
 			continue;
 		cp->flags &= ~G_CF_SPOILED;
 		if (cp->geom->spoiled == NULL)
 			continue;
 		cp->geom->spoiled(cp);
 		g_topology_assert();
 	}
 }
 
 void
 g_spoil(struct g_provider *pp, struct g_consumer *cp)
 {
 	struct g_consumer *cp2;
 
 	g_topology_assert();
 	G_VALID_PROVIDER(pp);
 	G_VALID_CONSUMER(cp);
 
 	LIST_FOREACH(cp2, &pp->consumers, consumers) {
 		if (cp2 == cp)
 			continue;
 /*
 		KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr));
 		KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw));
 */
 		KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace));
 		cp2->flags |= G_CF_SPOILED;
 	}
 	g_post_event(g_spoil_event, pp, M_WAITOK, pp, NULL);
 }
 
 static void
 g_media_changed_event(void *arg, int flag)
 {
 	struct g_provider *pp;
 	int retaste;
 
 	g_topology_assert();
 	if (flag == EV_CANCEL)
 		return;
 	pp = arg;
 	G_VALID_PROVIDER(pp);
 
 	/*
 	 * If provider was not open for writing, queue retaste after spoiling.
 	 * If it was, retaste will happen automatically on close.
 	 */
 	retaste = (pp->acw == 0 && pp->error == 0 &&
 	    !(pp->geom->flags & G_GEOM_WITHER));
 	g_spoil_event(arg, flag);
 	if (retaste)
 		g_post_event(g_new_provider_event, pp, M_WAITOK, pp, NULL);
 }
 
 int
 g_media_changed(struct g_provider *pp, int flag)
 {
 	struct g_consumer *cp;
 
 	LIST_FOREACH(cp, &pp->consumers, consumers)
 		cp->flags |= G_CF_SPOILED;
 	return (g_post_event(g_media_changed_event, pp, flag, pp, NULL));
 }
 
 int
 g_media_gone(struct g_provider *pp, int flag)
 {
 	struct g_consumer *cp;
 
 	LIST_FOREACH(cp, &pp->consumers, consumers)
 		cp->flags |= G_CF_SPOILED;
 	return (g_post_event(g_spoil_event, pp, flag, pp, NULL));
 }
 
 int
 g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len)
 {
 	int error, i;
 
 	i = len;
 	error = g_io_getattr(attr, cp, &i, var);
 	if (error)
 		return (error);
 	if (i != len)
 		return (EINVAL);
 	return (0);
 }
 
 static int
 g_get_device_prefix_len(const char *name)
 {
 	int len;
 
 	if (strncmp(name, "ada", 3) == 0)
 		len = 3;
 	else if (strncmp(name, "ad", 2) == 0)
 		len = 2;
 	else
 		return (0);
 	if (name[len] < '0' || name[len] > '9')
 		return (0);
 	do {
 		len++;
 	} while (name[len] >= '0' && name[len] <= '9');
 	return (len);
 }
 
 int
 g_compare_names(const char *namea, const char *nameb)
 {
 	int deva, devb;
 
 	if (strcmp(namea, nameb) == 0)
 		return (1);
 	deva = g_get_device_prefix_len(namea);
 	if (deva == 0)
 		return (0);
 	devb = g_get_device_prefix_len(nameb);
 	if (devb == 0)
 		return (0);
 	if (strcmp(namea + deva, nameb + devb) == 0)
 		return (1);
 	return (0);
 }
 
 void
 g_geom_add_alias(struct g_geom *gp, const char *alias)
 {
 	struct g_geom_alias *gap;
 
 	gap = (struct g_geom_alias *)g_malloc(
 		sizeof(struct g_geom_alias) + strlen(alias) + 1, M_WAITOK);
 	strcpy((char *)(gap + 1), alias);
 	gap->ga_alias = (const char *)(gap + 1);
 	LIST_INSERT_HEAD(&gp->aliases, gap, ga_next);
 }
 
 #if defined(DIAGNOSTIC) || defined(DDB)
 /*
  * This function walks the mesh and returns a non-zero integer if it
  * finds the argument pointer is an object. The return value indicates
  * which type of object it is believed to be. If topology is not locked,
  * this function is potentially dangerous, but we don't assert that the
  * topology lock is held when called from debugger.
  */
 int
 g_valid_obj(void const *ptr)
 {
 	struct g_class *mp;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 
 #ifdef KDB
 	if (kdb_active == 0)
 #endif
 		g_topology_assert();
 
 	LIST_FOREACH(mp, &g_classes, class) {
 		if (ptr == mp)
 			return (1);
 		LIST_FOREACH(gp, &mp->geom, geom) {
 			if (ptr == gp)
 				return (2);
 			LIST_FOREACH(cp, &gp->consumer, consumer)
 				if (ptr == cp)
 					return (3);
 			LIST_FOREACH(pp, &gp->provider, provider)
 				if (ptr == pp)
 					return (4);
 		}
 	}
 	return(0);
 }
 #endif
 
 #ifdef DDB
 
 #define	gprintf(...)	do {						\
 	db_printf("%*s", indent, "");					\
 	db_printf(__VA_ARGS__);						\
 } while (0)
 #define	gprintln(...)	do {						\
 	gprintf(__VA_ARGS__);						\
 	db_printf("\n");						\
 } while (0)
 
 #define	ADDFLAG(obj, flag, sflag)	do {				\
 	if ((obj)->flags & (flag)) {					\
 		if (comma)						\
 			strlcat(str, ",", size);			\
 		strlcat(str, (sflag), size);				\
 		comma = 1;						\
 	}								\
 } while (0)
 
 static char *
 provider_flags_to_string(struct g_provider *pp, char *str, size_t size)
 {
 	int comma = 0;
 
 	bzero(str, size);
 	if (pp->flags == 0) {
 		strlcpy(str, "NONE", size);
 		return (str);
 	}
 	ADDFLAG(pp, G_PF_WITHER, "G_PF_WITHER");
 	ADDFLAG(pp, G_PF_ORPHAN, "G_PF_ORPHAN");
 	return (str);
 }
 
 static char *
 geom_flags_to_string(struct g_geom *gp, char *str, size_t size)
 {
 	int comma = 0;
 
 	bzero(str, size);
 	if (gp->flags == 0) {
 		strlcpy(str, "NONE", size);
 		return (str);
 	}
 	ADDFLAG(gp, G_GEOM_WITHER, "G_GEOM_WITHER");
 	return (str);
 }
 static void
 db_show_geom_consumer(int indent, struct g_consumer *cp)
 {
 
 	if (indent == 0) {
 		gprintln("consumer: %p", cp);
 		gprintln("  class:    %s (%p)", cp->geom->class->name,
 		    cp->geom->class);
 		gprintln("  geom:     %s (%p)", cp->geom->name, cp->geom);
 		if (cp->provider == NULL)
 			gprintln("  provider: none");
 		else {
 			gprintln("  provider: %s (%p)", cp->provider->name,
 			    cp->provider);
 		}
 		gprintln("  access:   r%dw%de%d", cp->acr, cp->acw, cp->ace);
 		gprintln("  flags:    0x%04x", cp->flags);
 #ifdef INVARIANTS
 		gprintln("  nstart:   %u", cp->nstart);
 		gprintln("  nend:     %u", cp->nend);
 #endif
 	} else {
 		gprintf("consumer: %p (%s), access=r%dw%de%d", cp,
 		    cp->provider != NULL ? cp->provider->name : "none",
 		    cp->acr, cp->acw, cp->ace);
 		if (cp->flags)
 			db_printf(", flags=0x%04x", cp->flags);
 		db_printf("\n");
 	}
 }
 
 static void
 db_show_geom_provider(int indent, struct g_provider *pp)
 {
 	struct g_consumer *cp;
 	char flags[64];
 
 	if (indent == 0) {
 		gprintln("provider: %s (%p)", pp->name, pp);
 		gprintln("  class:        %s (%p)", pp->geom->class->name,
 		    pp->geom->class);
 		gprintln("  geom:         %s (%p)", pp->geom->name, pp->geom);
 		gprintln("  mediasize:    %jd", (intmax_t)pp->mediasize);
 		gprintln("  sectorsize:   %u", pp->sectorsize);
 		gprintln("  stripesize:   %ju", (uintmax_t)pp->stripesize);
 		gprintln("  stripeoffset: %ju", (uintmax_t)pp->stripeoffset);
 		gprintln("  access:       r%dw%de%d", pp->acr, pp->acw,
 		    pp->ace);
 		gprintln("  flags:        %s (0x%04x)",
 		    provider_flags_to_string(pp, flags, sizeof(flags)),
 		    pp->flags);
 		gprintln("  error:        %d", pp->error);
 		if (LIST_EMPTY(&pp->consumers))
 			gprintln("  consumers:    none");
 	} else {
 		gprintf("provider: %s (%p), access=r%dw%de%d",
 		    pp->name, pp, pp->acr, pp->acw, pp->ace);
 		if (pp->flags != 0) {
 			db_printf(", flags=%s (0x%04x)",
 			    provider_flags_to_string(pp, flags, sizeof(flags)),
 			    pp->flags);
 		}
 		db_printf("\n");
 	}
 	if (!LIST_EMPTY(&pp->consumers)) {
 		LIST_FOREACH(cp, &pp->consumers, consumers) {
 			db_show_geom_consumer(indent + 2, cp);
 			if (db_pager_quit)
 				break;
 		}
 	}
 }
 
 static void
 db_show_geom_geom(int indent, struct g_geom *gp)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	char flags[64];
 
 	if (indent == 0) {
 		gprintln("geom: %s (%p)", gp->name, gp);
 		gprintln("  class:     %s (%p)", gp->class->name, gp->class);
 		gprintln("  flags:     %s (0x%04x)",
 		    geom_flags_to_string(gp, flags, sizeof(flags)), gp->flags);
 		gprintln("  rank:      %d", gp->rank);
 		if (LIST_EMPTY(&gp->provider))
 			gprintln("  providers: none");
 		if (LIST_EMPTY(&gp->consumer))
 			gprintln("  consumers: none");
 	} else {
 		gprintf("geom: %s (%p), rank=%d", gp->name, gp, gp->rank);
 		if (gp->flags != 0) {
 			db_printf(", flags=%s (0x%04x)",
 			    geom_flags_to_string(gp, flags, sizeof(flags)),
 			    gp->flags);
 		}
 		db_printf("\n");
 	}
 	if (!LIST_EMPTY(&gp->provider)) {
 		LIST_FOREACH(pp, &gp->provider, provider) {
 			db_show_geom_provider(indent + 2, pp);
 			if (db_pager_quit)
 				break;
 		}
 	}
 	if (!LIST_EMPTY(&gp->consumer)) {
 		LIST_FOREACH(cp, &gp->consumer, consumer) {
 			db_show_geom_consumer(indent + 2, cp);
 			if (db_pager_quit)
 				break;
 		}
 	}
 }
 
 static void
 db_show_geom_class(struct g_class *mp)
 {
 	struct g_geom *gp;
 
 	db_printf("class: %s (%p)\n", mp->name, mp);
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		db_show_geom_geom(2, gp);
 		if (db_pager_quit)
 			break;
 	}
 }
 
 /*
  * Print the GEOM topology or the given object.
  */
 DB_SHOW_COMMAND(geom, db_show_geom)
 {
 	struct g_class *mp;
 
 	if (!have_addr) {
 		/* No address given, print the entire topology. */
 		LIST_FOREACH(mp, &g_classes, class) {
 			db_show_geom_class(mp);
 			db_printf("\n");
 			if (db_pager_quit)
 				break;
 		}
 	} else {
 		switch (g_valid_obj((void *)addr)) {
 		case 1:
 			db_show_geom_class((struct g_class *)addr);
 			break;
 		case 2:
 			db_show_geom_geom(0, (struct g_geom *)addr);
 			break;
 		case 3:
 			db_show_geom_consumer(0, (struct g_consumer *)addr);
 			break;
 		case 4:
 			db_show_geom_provider(0, (struct g_provider *)addr);
 			break;
 		default:
 			db_printf("Not a GEOM object.\n");
 			break;
 		}
 	}
 }
 
 static void
 db_print_bio_cmd(struct bio *bp)
 {
 	db_printf("  cmd: ");
 	switch (bp->bio_cmd) {
 	case BIO_READ: db_printf("BIO_READ"); break;
 	case BIO_WRITE: db_printf("BIO_WRITE"); break;
 	case BIO_DELETE: db_printf("BIO_DELETE"); break;
 	case BIO_GETATTR: db_printf("BIO_GETATTR"); break;
 	case BIO_FLUSH: db_printf("BIO_FLUSH"); break;
 	case BIO_CMD0: db_printf("BIO_CMD0"); break;
 	case BIO_CMD1: db_printf("BIO_CMD1"); break;
 	case BIO_CMD2: db_printf("BIO_CMD2"); break;
 	case BIO_ZONE: db_printf("BIO_ZONE"); break;
 	default: db_printf("UNKNOWN"); break;
 	}
 	db_printf("\n");
 }
 
 static void
 db_print_bio_flags(struct bio *bp)
 {
 	int comma;
 
 	comma = 0;
 	db_printf("  flags: ");
 	if (bp->bio_flags & BIO_ERROR) {
 		db_printf("BIO_ERROR");
 		comma = 1;
 	}
 	if (bp->bio_flags & BIO_DONE) {
 		db_printf("%sBIO_DONE", (comma ? ", " : ""));
 		comma = 1;
 	}
 	if (bp->bio_flags & BIO_ONQUEUE)
 		db_printf("%sBIO_ONQUEUE", (comma ? ", " : ""));
 	db_printf("\n");
 }
 
 /*
  * Print useful information in a BIO
  */
 DB_SHOW_COMMAND(bio, db_show_bio)
 {
 	struct bio *bp;
 
 	if (have_addr) {
 		bp = (struct bio *)addr;
 		db_printf("BIO %p\n", bp);
 		db_print_bio_cmd(bp);
 		db_print_bio_flags(bp);
 		db_printf("  cflags: 0x%hx\n", bp->bio_cflags);
 		db_printf("  pflags: 0x%hx\n", bp->bio_pflags);
 		db_printf("  offset: %jd\n", (intmax_t)bp->bio_offset);
 		db_printf("  length: %jd\n", (intmax_t)bp->bio_length);
 		db_printf("  bcount: %ld\n", bp->bio_bcount);
 		db_printf("  resid: %ld\n", bp->bio_resid);
 		db_printf("  completed: %jd\n", (intmax_t)bp->bio_completed);
 		db_printf("  children: %u\n", bp->bio_children);
 		db_printf("  inbed: %u\n", bp->bio_inbed);
 		db_printf("  error: %d\n", bp->bio_error);
 		db_printf("  parent: %p\n", bp->bio_parent);
 		db_printf("  driver1: %p\n", bp->bio_driver1);
 		db_printf("  driver2: %p\n", bp->bio_driver2);
 		db_printf("  caller1: %p\n", bp->bio_caller1);
 		db_printf("  caller2: %p\n", bp->bio_caller2);
 		db_printf("  bio_from: %p\n", bp->bio_from);
 		db_printf("  bio_to: %p\n", bp->bio_to);
 
 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
 		db_printf("  bio_track_bp: %p\n", bp->bio_track_bp);
 #endif
 	}
 }
 
 #undef	gprintf
 #undef	gprintln
 #undef	ADDFLAG
 
 #endif	/* DDB */
Index: projects/clang1000-import/sys/geom/stripe/g_stripe.c
===================================================================
--- projects/clang1000-import/sys/geom/stripe/g_stripe.c	(revision 357178)
+++ projects/clang1000-import/sys/geom/stripe/g_stripe.c	(revision 357179)
@@ -1,1276 +1,1278 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
 #include <geom/geom_dbg.h>
 #include <geom/stripe/g_stripe.h>
 
 FEATURE(geom_stripe, "GEOM striping support");
 
 static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data");
 
 static uma_zone_t g_stripe_zone;
 
 static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force);
 static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 
 static g_taste_t g_stripe_taste;
 static g_ctl_req_t g_stripe_config;
 static g_dumpconf_t g_stripe_dumpconf;
 static g_init_t g_stripe_init;
 static g_fini_t g_stripe_fini;
 
 struct g_class g_stripe_class = {
 	.name = G_STRIPE_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_stripe_config,
 	.taste = g_stripe_taste,
 	.destroy_geom = g_stripe_destroy_geom,
 	.init = g_stripe_init,
 	.fini = g_stripe_fini
 };
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0,
     "GEOM_STRIPE stuff");
 static u_int g_stripe_debug = 0;
 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0,
     "Debug level");
 static int g_stripe_fast = 0;
 static int
 g_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS)
 {
 	int error, fast;
 
 	fast = g_stripe_fast;
 	error = sysctl_handle_int(oidp, &fast, 0, req);
 	if (error == 0 && req->newptr != NULL)
 		g_stripe_fast = fast;
 	return (error);
 }
 SYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, CTLTYPE_INT | CTLFLAG_RWTUN,
     NULL, 0, g_sysctl_stripe_fast, "I", "Fast, but memory-consuming, mode");
 static u_int g_stripe_maxmem = MAXPHYS * 100;
 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RDTUN, &g_stripe_maxmem,
     0, "Maximum memory that can be allocated in \"fast\" mode (in bytes)");
 static u_int g_stripe_fast_failed = 0;
 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD,
     &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed");
 
 /*
  * Greatest Common Divisor.
  */
 static u_int
 gcd(u_int a, u_int b)
 {
 	u_int c;
 
 	while (b != 0) {
 		c = a;
 		a = b;
 		b = (c % b);
 	}
 	return (a);
 }
 
 /*
  * Least Common Multiple.
  */
 static u_int
 lcm(u_int a, u_int b)
 {
 
 	return ((a * b) / gcd(a, b));
 }
 
 static void
 g_stripe_init(struct g_class *mp __unused)
 {
 
 	g_stripe_zone = uma_zcreate("g_stripe_zone", MAXPHYS, NULL, NULL,
 	    NULL, NULL, 0, 0);
 	g_stripe_maxmem -= g_stripe_maxmem % MAXPHYS;
 	uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAXPHYS);
 }
 
 static void
 g_stripe_fini(struct g_class *mp __unused)
 {
 
 	uma_zdestroy(g_stripe_zone);
 }
 
 /*
  * Return the number of valid disks.
  */
 static u_int
 g_stripe_nvalid(struct g_stripe_softc *sc)
 {
 	u_int i, no;
 
 	no = 0;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		if (sc->sc_disks[i] != NULL)
 			no++;
 	}
 
 	return (no);
 }
 
 static void
 g_stripe_remove_disk(struct g_consumer *cp)
 {
 	struct g_stripe_softc *sc;
 
 	g_topology_assert();
 	KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
 	sc = (struct g_stripe_softc *)cp->geom->softc;
 	KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
 
 	if (cp->private == NULL) {
 		G_STRIPE_DEBUG(0, "Disk %s removed from %s.",
 		    cp->provider->name, sc->sc_name);
 		cp->private = (void *)(uintptr_t)-1;
 	}
 
 	if (sc->sc_provider != NULL) {
 		G_STRIPE_DEBUG(0, "Device %s deactivated.",
 		    sc->sc_provider->name);
 		g_wither_provider(sc->sc_provider, ENXIO);
 		sc->sc_provider = NULL;
 	}
 
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		return;
 	sc->sc_disks[cp->index] = NULL;
 	cp->index = 0;
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	/* If there are no valid disks anymore, remove device. */
 	if (LIST_EMPTY(&sc->sc_geom->consumer))
 		g_stripe_destroy(sc, 1);
 }
 
 static void
 g_stripe_orphan(struct g_consumer *cp)
 {
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 
 	g_stripe_remove_disk(cp);
 }
 
 static int
 g_stripe_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *cp1, *cp2, *tmp;
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	gp = pp->geom;
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
 
 	/* On first open, grab an extra "exclusive" bit */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... and let go of it on last close */
 	if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
 		de--;
 
 	LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) {
 		error = g_access(cp1, dr, dw, de);
 		if (error != 0)
 			goto fail;
 		if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 &&
 		    cp1->private != NULL) {
 			g_stripe_remove_disk(cp1); /* May destroy geom. */
 		}
 	}
 	return (0);
 
 fail:
 	LIST_FOREACH(cp2, &gp->consumer, consumer) {
 		if (cp1 == cp2)
 			break;
 		g_access(cp2, -dr, -dw, -de);
 	}
 	return (error);
 }
 
 static void
 g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset,
     off_t length, int mode)
 {
 	off_t stripesize;
 	size_t len;
 
 	stripesize = sc->sc_stripesize;
 	len = (size_t)(stripesize - (offset & (stripesize - 1)));
 	do {
 		bcopy(src, dst, len);
 		if (mode) {
 			dst += len + stripesize * (sc->sc_ndisks - 1);
 			src += len;
 		} else {
 			dst += len;
 			src += len + stripesize * (sc->sc_ndisks - 1);
 		}
 		length -= len;
 		KASSERT(length >= 0,
 		    ("Length < 0 (stripesize=%ju, offset=%ju, length=%jd).",
 		    (uintmax_t)stripesize, (uintmax_t)offset, (intmax_t)length));
 		if (length > stripesize)
 			len = stripesize;
 		else
 			len = length;
 	} while (length > 0);
 }
 
 static void
 g_stripe_done(struct bio *bp)
 {
 	struct g_stripe_softc *sc;
 	struct bio *pbp;
 
 	pbp = bp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) {
 		g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset,
 		    bp->bio_length, 1);
 		bp->bio_data = bp->bio_caller1;
 		bp->bio_caller1 = NULL;
 	}
 	mtx_lock(&sc->sc_lock);
 	if (pbp->bio_error == 0)
 		pbp->bio_error = bp->bio_error;
 	pbp->bio_completed += bp->bio_completed;
 	pbp->bio_inbed++;
 	if (pbp->bio_children == pbp->bio_inbed) {
 		mtx_unlock(&sc->sc_lock);
 		if (pbp->bio_driver1 != NULL)
 			uma_zfree(g_stripe_zone, pbp->bio_driver1);
+		if (bp->bio_cmd == BIO_SPEEDUP)
+			pbp->bio_completed = pbp->bio_length;
 		g_io_deliver(pbp, pbp->bio_error);
 	} else
 		mtx_unlock(&sc->sc_lock);
 	g_destroy_bio(bp);
 }
 
 static int
 g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length)
 {
 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
 	struct g_stripe_softc *sc;
 	char *addr, *data = NULL;
 	struct bio *cbp;
 	off_t stripesize;
 	u_int nparts = 0;
 	int error;
 
 	sc = bp->bio_to->geom->softc;
 
 	addr = bp->bio_data;
 	stripesize = sc->sc_stripesize;
 
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		error = ENOMEM;
 		goto failure;
 	}
 	TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 	nparts++;
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cbp->bio_done = g_stripe_done;
 	cbp->bio_offset = offset;
 	cbp->bio_data = addr;
 	cbp->bio_caller1 = NULL;
 	cbp->bio_length = length;
 	cbp->bio_caller2 = sc->sc_disks[no];
 
 	/* offset -= offset % stripesize; */
 	offset -= offset & (stripesize - 1);
 	addr += length;
 	length = bp->bio_length - length;
 	for (no++; length > 0; no++, length -= stripesize, addr += stripesize) {
 		if (no > sc->sc_ndisks - 1) {
 			no = 0;
 			offset += stripesize;
 		}
 		if (nparts >= sc->sc_ndisks) {
 			cbp = TAILQ_NEXT(cbp, bio_queue);
 			if (cbp == NULL)
 				cbp = TAILQ_FIRST(&queue);
 			nparts++;
 			/*
 			 * Update bio structure.
 			 */
 			/*
 			 * MIN() is in case when
 			 * (bp->bio_length % sc->sc_stripesize) != 0.
 			 */
 			cbp->bio_length += MIN(stripesize, length);
 			if (cbp->bio_caller1 == NULL) {
 				cbp->bio_caller1 = cbp->bio_data;
 				cbp->bio_data = NULL;
 				if (data == NULL) {
 					data = uma_zalloc(g_stripe_zone,
 					    M_NOWAIT);
 					if (data == NULL) {
 						error = ENOMEM;
 						goto failure;
 					}
 				}
 			}
 		} else {
 			cbp = g_clone_bio(bp);
 			if (cbp == NULL) {
 				error = ENOMEM;
 				goto failure;
 			}
 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 			nparts++;
 			/*
 			 * Fill in the component buf structure.
 			 */
 			cbp->bio_done = g_stripe_done;
 			cbp->bio_offset = offset;
 			cbp->bio_data = addr;
 			cbp->bio_caller1 = NULL;
 			/*
 			 * MIN() is in case when
 			 * (bp->bio_length % sc->sc_stripesize) != 0.
 			 */
 			cbp->bio_length = MIN(stripesize, length);
 			cbp->bio_caller2 = sc->sc_disks[no];
 		}
 	}
 	if (data != NULL)
 		bp->bio_driver1 = data;
 	/*
 	 * Fire off all allocated requests!
 	 */
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		struct g_consumer *cp;
 
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		cp = cbp->bio_caller2;
 		cbp->bio_caller2 = NULL;
 		cbp->bio_to = cp->provider;
 		if (cbp->bio_caller1 != NULL) {
 			cbp->bio_data = data;
 			if (bp->bio_cmd == BIO_WRITE) {
 				g_stripe_copy(sc, cbp->bio_caller1, data,
 				    cbp->bio_offset, cbp->bio_length, 0);
 			}
 			data += cbp->bio_length;
 		}
 		G_STRIPE_LOGREQ(cbp, "Sending request.");
 		g_io_request(cbp, cp);
 	}
 	return (0);
 failure:
 	if (data != NULL)
 		uma_zfree(g_stripe_zone, data);
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		if (cbp->bio_caller1 != NULL) {
 			cbp->bio_data = cbp->bio_caller1;
 			cbp->bio_caller1 = NULL;
 		}
 		bp->bio_children--;
 		g_destroy_bio(cbp);
 	}
 	return (error);
 }
 
 static int
 g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
 {
 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
 	struct g_stripe_softc *sc;
 	off_t stripesize;
 	struct bio *cbp;
 	char *addr;
 	int error;
 
 	sc = bp->bio_to->geom->softc;
 
 	stripesize = sc->sc_stripesize;
 
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		error = ENOMEM;
 		goto failure;
 	}
 	TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 	/*
 	 * Fill in the component buf structure.
 	 */
 	if (bp->bio_length == length)
 		cbp->bio_done = g_std_done;	/* Optimized lockless case. */
 	else
 		cbp->bio_done = g_stripe_done;
 	cbp->bio_offset = offset;
 	cbp->bio_length = length;
 	if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 		bp->bio_ma_n = round_page(bp->bio_ma_offset +
 		    bp->bio_length) / PAGE_SIZE;
 		addr = NULL;
 	} else
 		addr = bp->bio_data;
 	cbp->bio_caller2 = sc->sc_disks[no];
 
 	/* offset -= offset % stripesize; */
 	offset -= offset & (stripesize - 1);
 	if (bp->bio_cmd != BIO_DELETE)
 		addr += length;
 	length = bp->bio_length - length;
 	for (no++; length > 0; no++, length -= stripesize) {
 		if (no > sc->sc_ndisks - 1) {
 			no = 0;
 			offset += stripesize;
 		}
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			error = ENOMEM;
 			goto failure;
 		}
 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 
 		/*
 		 * Fill in the component buf structure.
 		 */
 		cbp->bio_done = g_stripe_done;
 		cbp->bio_offset = offset;
 		/*
 		 * MIN() is in case when
 		 * (bp->bio_length % sc->sc_stripesize) != 0.
 		 */
 		cbp->bio_length = MIN(stripesize, length);
 		if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 			cbp->bio_ma_offset += (uintptr_t)addr;
 			cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
 			cbp->bio_ma_offset %= PAGE_SIZE;
 			cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
 			    cbp->bio_length) / PAGE_SIZE;
 		} else
 			cbp->bio_data = addr;
 
 		cbp->bio_caller2 = sc->sc_disks[no];
 
 		if (bp->bio_cmd != BIO_DELETE)
 			addr += stripesize;
 	}
 	/*
 	 * Fire off all allocated requests!
 	 */
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		struct g_consumer *cp;
 
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		cp = cbp->bio_caller2;
 		cbp->bio_caller2 = NULL;
 		cbp->bio_to = cp->provider;
 		G_STRIPE_LOGREQ(cbp, "Sending request.");
 		g_io_request(cbp, cp);
 	}
 	return (0);
 failure:
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		bp->bio_children--;
 		g_destroy_bio(cbp);
 	}
 	return (error);
 }
 
 static void
 g_stripe_pushdown(struct g_stripe_softc *sc, struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	u_int no;
 
 	bioq_init(&queue);
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			for (cbp = bioq_first(&queue); cbp != NULL;
 			    cbp = bioq_first(&queue)) {
 				bioq_remove(&queue, cbp);
 				g_destroy_bio(cbp);
 			}
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		cbp->bio_done = g_stripe_done;
 		cbp->bio_caller2 = sc->sc_disks[no];
 		cbp->bio_to = sc->sc_disks[no]->provider;
 	}
 	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
 		bioq_remove(&queue, cbp);
 		G_STRIPE_LOGREQ(cbp, "Sending request.");
 		cp = cbp->bio_caller2;
 		cbp->bio_caller2 = NULL;
 		g_io_request(cbp, cp);
 	}
 }
 
 static void
 g_stripe_start(struct bio *bp)
 {
 	off_t offset, start, length, nstripe, stripesize;
 	struct g_stripe_softc *sc;
 	u_int no;
 	int error, fast = 0;
 
 	sc = bp->bio_to->geom->softc;
 	/*
 	 * If sc == NULL, provider's error should be set and g_stripe_start()
 	 * should not be called at all.
 	 */
 	KASSERT(sc != NULL,
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 
 	G_STRIPE_LOGREQ(bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_SPEEDUP:
 	case BIO_FLUSH:
 		g_stripe_pushdown(sc, bp);
 		return;
 	case BIO_GETATTR:
 		/* To which provider it should be delivered? */
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	stripesize = sc->sc_stripesize;
 
 	/*
 	 * Calculations are quite messy, but fast I hope.
 	 */
 
 	/* Stripe number. */
 	/* nstripe = bp->bio_offset / stripesize; */
 	nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits;
 	/* Disk number. */
 	no = nstripe % sc->sc_ndisks;
 	/* Start position in stripe. */
 	/* start = bp->bio_offset % stripesize; */
 	start = bp->bio_offset & (stripesize - 1);
 	/* Start position in disk. */
 	/* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */
 	offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start;
 	/* Length of data to operate. */
 	length = MIN(bp->bio_length, stripesize - start);
 
 	/*
 	 * Do use "fast" mode when:
 	 * 1. "Fast" mode is ON.
 	 * and
 	 * 2. Request size is less than or equal to MAXPHYS,
 	 *    which should always be true.
 	 * and
 	 * 3. Request size is bigger than stripesize * ndisks. If it isn't,
 	 *    there will be no need to send more than one I/O request to
 	 *    a provider, so there is nothing to optmize.
 	 * and
 	 * 4. Request is not unmapped.
 	 * and
 	 * 5. It is not a BIO_DELETE.
 	 */
 	if (g_stripe_fast && bp->bio_length <= MAXPHYS &&
 	    bp->bio_length >= stripesize * sc->sc_ndisks &&
 	    (bp->bio_flags & BIO_UNMAPPED) == 0 &&
 	    bp->bio_cmd != BIO_DELETE) {
 		fast = 1;
 	}
 	error = 0;
 	if (fast) {
 		error = g_stripe_start_fast(bp, no, offset, length);
 		if (error != 0)
 			g_stripe_fast_failed++;
 	}
 	/*
 	 * Do use "economic" when:
 	 * 1. "Economic" mode is ON.
 	 * or
 	 * 2. "Fast" mode failed. It can only fail if there is no memory.
 	 */
 	if (!fast || error != 0)
 		error = g_stripe_start_economic(bp, no, offset, length);
 	if (error != 0) {
 		if (bp->bio_error == 0)
 			bp->bio_error = error;
 		g_io_deliver(bp, bp->bio_error);
 	}
 }
 
 static void
 g_stripe_check_and_run(struct g_stripe_softc *sc)
 {
 	struct g_provider *dp;
 	off_t mediasize, ms;
 	u_int no, sectorsize = 0;
 
 	g_topology_assert();
 	if (g_stripe_nvalid(sc) != sc->sc_ndisks)
 		return;
 
 	sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s",
 	    sc->sc_name);
 	sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
 	if (g_stripe_fast == 0)
 		sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED;
 	/*
 	 * Find the smallest disk.
 	 */
 	mediasize = sc->sc_disks[0]->provider->mediasize;
 	if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
 		mediasize -= sc->sc_disks[0]->provider->sectorsize;
 	mediasize -= mediasize % sc->sc_stripesize;
 	sectorsize = sc->sc_disks[0]->provider->sectorsize;
 	for (no = 1; no < sc->sc_ndisks; no++) {
 		dp = sc->sc_disks[no]->provider;
 		ms = dp->mediasize;
 		if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
 			ms -= dp->sectorsize;
 		ms -= ms % sc->sc_stripesize;
 		if (ms < mediasize)
 			mediasize = ms;
 		sectorsize = lcm(sectorsize, dp->sectorsize);
 
 		/* A provider underneath us doesn't support unmapped */
 		if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
 			G_STRIPE_DEBUG(1, "Cancelling unmapped "
 			    "because of %s.", dp->name);
 			sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED;
 		}
 	}
 	sc->sc_provider->sectorsize = sectorsize;
 	sc->sc_provider->mediasize = mediasize * sc->sc_ndisks;
 	sc->sc_provider->stripesize = sc->sc_stripesize;
 	sc->sc_provider->stripeoffset = 0;
 	g_error_provider(sc->sc_provider, 0);
 
 	G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name);
 }
 
 static int
 g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	/* Decode metadata. */
 	stripe_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 /*
  * Add disk to given device.
  */
 static int
 g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no)
 {
 	struct g_consumer *cp, *fcp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	/* Metadata corrupted? */
 	if (no >= sc->sc_ndisks)
 		return (EINVAL);
 
 	/* Check if disk is not already attached. */
 	if (sc->sc_disks[no] != NULL)
 		return (EEXIST);
 
 	gp = sc->sc_geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	cp->private = NULL;
 	cp->index = no;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
 		error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 		if (error != 0) {
 			g_detach(cp);
 			g_destroy_consumer(cp);
 			return (error);
 		}
 	}
 	if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) {
 		struct g_stripe_metadata md;
 
 		/* Reread metadata. */
 		error = g_stripe_read_metadata(cp, &md);
 		if (error != 0)
 			goto fail;
 
 		if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 ||
 		    strcmp(md.md_name, sc->sc_name) != 0 ||
 		    md.md_id != sc->sc_id) {
 			G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name);
 			goto fail;
 		}
 	}
 
 	sc->sc_disks[no] = cp;
 	G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
 	g_stripe_check_and_run(sc);
 
 	return (0);
 fail:
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
 		g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	return (error);
 }
 
 static struct g_geom *
 g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
     u_int type)
 {
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 	u_int no;
 
 	g_topology_assert();
 	G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
 	    md->md_id);
 
 	/* Two disks is minimum. */
 	if (md->md_all < 2) {
 		G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name);
 		return (NULL);
 	}
 #if 0
 	/* Stripe size have to be grater than or equal to sector size. */
 	if (md->md_stripesize < sectorsize) {
 		G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
 		return (NULL);
 	}
 #endif
 	/* Stripe size have to be power of 2. */
 	if (!powerof2(md->md_stripesize)) {
 		G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
 		return (NULL);
 	}
 
 	/* Check for duplicate unit */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
 			G_STRIPE_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO);
 	gp->start = g_stripe_start;
 	gp->spoiled = g_stripe_orphan;
 	gp->orphan = g_stripe_orphan;
 	gp->access = g_stripe_access;
 	gp->dumpconf = g_stripe_dumpconf;
 
 	sc->sc_id = md->md_id;
 	sc->sc_stripesize = md->md_stripesize;
 	sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1);
 	sc->sc_ndisks = md->md_all;
 	sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks,
 	    M_STRIPE, M_WAITOK | M_ZERO);
 	for (no = 0; no < sc->sc_ndisks; no++)
 		sc->sc_disks[no] = NULL;
 	sc->sc_type = type;
 	mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF);
 
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 
 	G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
 
 	return (gp);
 }
 
 static int
 g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp, *cp1;
 	struct g_geom *gp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_STRIPE_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_STRIPE_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	gp = sc->sc_geom;
 	LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) {
 		g_stripe_remove_disk(cp);
 		if (cp1 == NULL)
 			return (0);	/* Recursion happened. */
 	}
 	if (!LIST_EMPTY(&gp->consumer))
 		return (EINPROGRESS);
 
 	gp->softc = NULL;
 	KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
 	    gp->name));
 	free(sc->sc_disks, M_STRIPE);
 	mtx_destroy(&sc->sc_lock);
 	free(sc, M_STRIPE);
 	G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static int
 g_stripe_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_stripe_softc *sc;
 
 	sc = gp->softc;
 	return (g_stripe_destroy(sc, 0));
 }
 
 static struct g_geom *
 g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_stripe_metadata md;
 	struct g_stripe_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	/* Skip providers that are already open for writing. */
 	if (pp->acw > 0)
 		return (NULL);
 
 	G_STRIPE_DEBUG(3, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "stripe:taste");
 	gp->start = g_stripe_start;
 	gp->access = g_stripe_access;
 	gp->orphan = g_stripe_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_stripe_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_STRIPE_VERSION) {
 		printf("geom_stripe.ko module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	/*
 	 * Backward compatibility:
 	 */
 	/* There was no md_provider field in earlier versions of metadata. */
 	if (md.md_version < 2)
 		bzero(md.md_provider, sizeof(md.md_provider));
 	/* There was no md_provsize field in earlier versions of metadata. */
 	if (md.md_version < 3)
 		md.md_provsize = pp->mediasize;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_id != sc->sc_id)
 			continue;
 		break;
 	}
 	if (gp != NULL) {
 		G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_stripe_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_STRIPE_DEBUG(0,
 			    "Cannot add disk %s to %s (error=%d).", pp->name,
 			    gp->name, error);
 			return (NULL);
 		}
 	} else {
 		gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC);
 		if (gp == NULL) {
 			G_STRIPE_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 		G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_stripe_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_STRIPE_DEBUG(0,
 			    "Cannot add disk %s to %s (error=%d).", pp->name,
 			    gp->name, error);
 			g_stripe_destroy(sc, 1);
 			return (NULL);
 		}
 	}
 
 	return (gp);
 }
 
 static void
 g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	u_int attached, no;
 	struct g_stripe_metadata md;
 	struct g_provider *pp;
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 	struct sbuf *sb;
 	off_t *stripesize;
 	const char *name;
 	char param[16];
 	int *nargs;
 
 	g_topology_assert();
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 2) {
 		gctl_error(req, "Too few arguments.");
 		return;
 	}
 
 	strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_STRIPE_VERSION;
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	strlcpy(md.md_name, name, sizeof(md.md_name));
 	md.md_id = arc4random();
 	md.md_no = 0;
 	md.md_all = *nargs - 1;
 	stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize));
 	if (stripesize == NULL) {
 		gctl_error(req, "No '%s' argument.", "stripesize");
 		return;
 	}
 	md.md_stripesize = (uint32_t)*stripesize;
 	bzero(md.md_provider, sizeof(md.md_provider));
 	/* This field is not important here. */
 	md.md_provsize = 0;
 
 	/* Check all providers are valid */
 	for (no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", no);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		if (pp == NULL) {
 			G_STRIPE_DEBUG(1, "Disk %s is invalid.", name);
 			gctl_error(req, "Disk %s is invalid.", name);
 			return;
 		}
 	}
 
 	gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL);
 	if (gp == NULL) {
 		gctl_error(req, "Can't configure %s.", md.md_name);
 		return;
 	}
 
 	sc = gp->softc;
 	sb = sbuf_new_auto();
 	sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name);
 	for (attached = 0, no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", no);
 			continue;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		KASSERT(pp != NULL, ("Provider %s disappear?!", name));
 		if (g_stripe_add_disk(sc, pp, no - 1) != 0) {
 			G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.",
 			    no, pp->name, gp->name);
 			sbuf_printf(sb, " %s", pp->name);
 			continue;
 		}
 		attached++;
 	}
 	sbuf_finish(sb);
 	if (md.md_all != attached) {
 		g_stripe_destroy(gp->softc, 1);
 		gctl_error(req, "%s", sbuf_data(sb));
 	}
 	sbuf_delete(sb);
 }
 
 static struct g_stripe_softc *
 g_stripe_find_device(struct g_class *mp, const char *name)
 {
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(sc->sc_name, name) == 0)
 			return (sc);
 	}
 	return (NULL);
 }
 
 static void
 g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_stripe_softc *sc;
 	int *force, *nargs, error;
 	const char *name;
 	char param[16];
 	u_int i;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No '%s' argument.", "force");
 		return;
 	}
 
 	for (i = 0; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			return;
 		}
 		sc = g_stripe_find_device(mp, name);
 		if (sc == NULL) {
 			gctl_error(req, "No such device: %s.", name);
 			return;
 		}
 		error = g_stripe_destroy(sc, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    sc->sc_name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_STRIPE_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_stripe_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0 ||
 	    strcmp(verb, "stop") == 0) {
 		g_stripe_ctl_destroy(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_stripe_softc *sc;
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
 		    (u_int)cp->index);
 	} else {
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<Stripesize>%ju</Stripesize>\n", indent,
 		    (uintmax_t)sc->sc_stripesize);
 		sbuf_printf(sb, "%s<Type>", indent);
 		switch (sc->sc_type) {
 		case G_STRIPE_TYPE_AUTOMATIC:
 			sbuf_cat(sb, "AUTOMATIC");
 			break;
 		case G_STRIPE_TYPE_MANUAL:
 			sbuf_cat(sb, "MANUAL");
 			break;
 		default:
 			sbuf_cat(sb, "UNKNOWN");
 			break;
 		}
 		sbuf_cat(sb, "</Type>\n");
 		sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
 		    indent, sc->sc_ndisks, g_stripe_nvalid(sc));
 		sbuf_printf(sb, "%s<State>", indent);
 		if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
 			sbuf_cat(sb, "UP");
 		else
 			sbuf_cat(sb, "DOWN");
 		sbuf_cat(sb, "</State>\n");
 	}
 }
 
 DECLARE_GEOM_CLASS(g_stripe_class, g_stripe);
 MODULE_VERSION(geom_stripe, 0);
Index: projects/clang1000-import/sys/kern/vfs_default.c
===================================================================
--- projects/clang1000-import/sys/kern/vfs_default.c	(revision 357178)
+++ projects/clang1000-import/sys/kern/vfs_default.c	(revision 357179)
@@ -1,1473 +1,1460 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed
  * to Berkeley by John Heidemann of the UCLA Ficus project.
  *
  * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/event.h>
 #include <sys/filio.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/lockf.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/rwlock.h>
 #include <sys/fcntl.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/poll.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 
 static int	vop_nolookup(struct vop_lookup_args *);
 static int	vop_norename(struct vop_rename_args *);
 static int	vop_nostrategy(struct vop_strategy_args *);
 static int	get_next_dirent(struct vnode *vp, struct dirent **dpp,
 				char *dirbuf, int dirbuflen, off_t *off,
 				char **cpos, int *len, int *eofflag,
 				struct thread *td);
 static int	dirent_exists(struct vnode *vp, const char *dirname,
 			      struct thread *td);
 
 #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
 
 static int vop_stdis_text(struct vop_is_text_args *ap);
 static int vop_stdunset_text(struct vop_unset_text_args *ap);
 static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
 static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap);
 static int vop_stdfdatasync(struct vop_fdatasync_args *ap);
 static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
 
 /*
  * This vnode table stores what we want to do if the filesystem doesn't
  * implement a particular VOP.
  *
  * If there is no specific entry here, we will return EOPNOTSUPP.
  *
  * Note that every filesystem has to implement either vop_access
  * or vop_accessx; failing to do so will result in immediate crash
  * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(),
  * which calls vop_stdaccess() etc.
  */
 
 struct vop_vector default_vnodeops = {
 	.vop_default =		NULL,
 	.vop_bypass =		VOP_EOPNOTSUPP,
 
 	.vop_access =		vop_stdaccess,
 	.vop_accessx =		vop_stdaccessx,
 	.vop_advise =		vop_stdadvise,
 	.vop_advlock =		vop_stdadvlock,
 	.vop_advlockasync =	vop_stdadvlockasync,
 	.vop_advlockpurge =	vop_stdadvlockpurge,
 	.vop_allocate =		vop_stdallocate,
 	.vop_bmap =		vop_stdbmap,
 	.vop_close =		VOP_NULL,
 	.vop_fsync =		VOP_NULL,
 	.vop_fdatasync =	vop_stdfdatasync,
 	.vop_getpages =		vop_stdgetpages,
 	.vop_getpages_async =	vop_stdgetpages_async,
 	.vop_getwritemount = 	vop_stdgetwritemount,
 	.vop_inactive =		VOP_NULL,
 	.vop_need_inactive =	vop_stdneed_inactive,
 	.vop_ioctl =		vop_stdioctl,
 	.vop_kqfilter =		vop_stdkqfilter,
 	.vop_islocked =		vop_stdislocked,
 	.vop_lock1 =		vop_stdlock,
 	.vop_lookup =		vop_nolookup,
 	.vop_open =		VOP_NULL,
 	.vop_pathconf =		VOP_EINVAL,
 	.vop_poll =		vop_nopoll,
 	.vop_putpages =		vop_stdputpages,
 	.vop_readlink =		VOP_EINVAL,
 	.vop_rename =		vop_norename,
 	.vop_revoke =		VOP_PANIC,
 	.vop_strategy =		vop_nostrategy,
 	.vop_unlock =		vop_stdunlock,
 	.vop_vptocnp =		vop_stdvptocnp,
 	.vop_vptofh =		vop_stdvptofh,
 	.vop_unp_bind =		vop_stdunp_bind,
 	.vop_unp_connect =	vop_stdunp_connect,
 	.vop_unp_detach =	vop_stdunp_detach,
 	.vop_is_text =		vop_stdis_text,
 	.vop_set_text =		vop_stdset_text,
 	.vop_unset_text =	vop_stdunset_text,
 	.vop_add_writecount =	vop_stdadd_writecount,
 	.vop_copy_file_range =	vop_stdcopy_file_range,
 };
 VFS_VOP_VECTOR_REGISTER(default_vnodeops);
 
 /*
  * Series of placeholder functions for various error returns for
  * VOPs.
  */
 
 int
 vop_eopnotsupp(struct vop_generic_args *ap)
 {
 	/*
 	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
 	*/
 
 	return (EOPNOTSUPP);
 }
 
 int
 vop_ebadf(struct vop_generic_args *ap)
 {
 
 	return (EBADF);
 }
 
 int
 vop_enotty(struct vop_generic_args *ap)
 {
 
 	return (ENOTTY);
 }
 
 int
 vop_einval(struct vop_generic_args *ap)
 {
 
 	return (EINVAL);
 }
 
 int
 vop_enoent(struct vop_generic_args *ap)
 {
 
 	return (ENOENT);
 }
 
 int
 vop_null(struct vop_generic_args *ap)
 {
 
 	return (0);
 }
 
 /*
  * Helper function to panic on some bad VOPs in some filesystems.
  */
 int
 vop_panic(struct vop_generic_args *ap)
 {
 
 	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
 }
 
 /*
  * vop_std<something> and vop_no<something> are default functions for use by
  * filesystems that need the "default reasonable" implementation for a
  * particular operation.
  *
  * The documentation for the operations they implement exists (if it exists)
  * in the VOP_<SOMETHING>(9) manpage (all uppercase).
  */
 
 /*
  * Default vop for filesystems that do not support name lookup
  */
 static int
 vop_nolookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 
 	*ap->a_vpp = NULL;
 	return (ENOTDIR);
 }
 
 /*
  * vop_norename:
  *
  * Handle unlock and reference counting for arguments of vop_rename
  * for filesystems that do not implement rename operation.
  */
 static int
 vop_norename(struct vop_rename_args *ap)
 {
 
 	vop_rename_fail(ap);
 	return (EOPNOTSUPP);
 }
 
 /*
  *	vop_nostrategy:
  *
  *	Strategy routine for VFS devices that have none.
  *
  *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
  *	routine.  Typically this is done for a BIO_READ strategy call.
  *	Typically B_INVAL is assumed to already be clear prior to a write
  *	and should not be cleared manually unless you just made the buffer
  *	invalid.  BIO_ERROR should be cleared either way.
  */
 
 static int
 vop_nostrategy (struct vop_strategy_args *ap)
 {
 	printf("No strategy for buffer at %p\n", ap->a_bp);
 	vn_printf(ap->a_vp, "vnode ");
 	ap->a_bp->b_ioflags |= BIO_ERROR;
 	ap->a_bp->b_error = EOPNOTSUPP;
 	bufdone(ap->a_bp);
 	return (EOPNOTSUPP);
 }
 
 static int
 get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
 		int dirbuflen, off_t *off, char **cpos, int *len,
 		int *eofflag, struct thread *td)
 {
 	int error, reclen;
 	struct uio uio;
 	struct iovec iov;
 	struct dirent *dp;
 
 	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
 	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
 
 	if (*len == 0) {
 		iov.iov_base = dirbuf;
 		iov.iov_len = dirbuflen;
 
 		uio.uio_iov = &iov;
 		uio.uio_iovcnt = 1;
 		uio.uio_offset = *off;
 		uio.uio_resid = dirbuflen;
 		uio.uio_segflg = UIO_SYSSPACE;
 		uio.uio_rw = UIO_READ;
 		uio.uio_td = td;
 
 		*eofflag = 0;
 
 #ifdef MAC
 		error = mac_vnode_check_readdir(td->td_ucred, vp);
 		if (error == 0)
 #endif
 			error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
 		    		NULL, NULL);
 		if (error)
 			return (error);
 
 		*off = uio.uio_offset;
 
 		*cpos = dirbuf;
 		*len = (dirbuflen - uio.uio_resid);
 
 		if (*len == 0)
 			return (ENOENT);
 	}
 
 	dp = (struct dirent *)(*cpos);
 	reclen = dp->d_reclen;
 	*dpp = dp;
 
 	/* check for malformed directory.. */
 	if (reclen < DIRENT_MINSIZE)
 		return (EINVAL);
 
 	*cpos += reclen;
 	*len -= reclen;
 
 	return (0);
 }
 
 /*
  * Check if a named file exists in a given directory vnode.
  */
 static int
 dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
 {
 	char *dirbuf, *cpos;
 	int error, eofflag, dirbuflen, len, found;
 	off_t off;
 	struct dirent *dp;
 	struct vattr va;
 
 	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
 	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
 
 	found = 0;
 
 	error = VOP_GETATTR(vp, &va, td->td_ucred);
 	if (error)
 		return (found);
 
 	dirbuflen = DEV_BSIZE;
 	if (dirbuflen < va.va_blocksize)
 		dirbuflen = va.va_blocksize;
 	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
 
 	off = 0;
 	len = 0;
 	do {
 		error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
 					&cpos, &len, &eofflag, td);
 		if (error)
 			goto out;
 
 		if (dp->d_type != DT_WHT && dp->d_fileno != 0 &&
 		    strcmp(dp->d_name, dirname) == 0) {
 			found = 1;
 			goto out;
 		}
 	} while (len > 0 || !eofflag);
 
 out:
 	free(dirbuf, M_TEMP);
 	return (found);
 }
 
 int
 vop_stdaccess(struct vop_access_args *ap)
 {
 
 	KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
 	    VAPPEND)) == 0, ("invalid bit in accmode"));
 
 	return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td));
 }
 
 int
 vop_stdaccessx(struct vop_accessx_args *ap)
 {
 	int error;
 	accmode_t accmode = ap->a_accmode;
 
 	error = vfs_unixify_accmode(&accmode);
 	if (error != 0)
 		return (error);
 
 	if (accmode == 0)
 		return (0);
 
 	return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td));
 }
 
 /*
  * Advisory record locking support
  */
 int
 vop_stdadvlock(struct vop_advlock_args *ap)
 {
 	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 
 	vp = ap->a_vp;
 	if (ap->a_fl->l_whence == SEEK_END) {
 		/*
 		 * The NFSv4 server must avoid doing a vn_lock() here, since it
 		 * can deadlock the nfsd threads, due to a LOR.  Fortunately
 		 * the NFSv4 server always uses SEEK_SET and this code is
 		 * only required for the SEEK_END case.
 		 */
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
 		VOP_UNLOCK(vp);
 		if (error)
 			return (error);
 	} else
 		vattr.va_size = 0;
 
 	return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size));
 }
 
 int
 vop_stdadvlockasync(struct vop_advlockasync_args *ap)
 {
 	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 
 	vp = ap->a_vp;
 	if (ap->a_fl->l_whence == SEEK_END) {
 		/* The size argument is only needed for SEEK_END. */
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
 		VOP_UNLOCK(vp);
 		if (error)
 			return (error);
 	} else
 		vattr.va_size = 0;
 
 	return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size));
 }
 
 int
 vop_stdadvlockpurge(struct vop_advlockpurge_args *ap)
 {
 	struct vnode *vp;
 
 	vp = ap->a_vp;
 	lf_purgelocks(vp, &vp->v_lockf);
 	return (0);
 }
 
 /*
  * vop_stdpathconf:
  *
  * Standard implementation of POSIX pathconf, to get information about limits
  * for a filesystem.
  * Override per filesystem for the case where the filesystem has smaller
  * limits.
  */
 int
 vop_stdpathconf(ap)
 	struct vop_pathconf_args /* {
 	struct vnode *a_vp;
 	int a_name;
 	int *a_retval;
 	} */ *ap;
 {
 
 	switch (ap->a_name) {
 		case _PC_ASYNC_IO:
 			*ap->a_retval = _POSIX_ASYNCHRONOUS_IO;
 			return (0);
 		case _PC_PATH_MAX:
 			*ap->a_retval = PATH_MAX;
 			return (0);
 		case _PC_ACL_EXTENDED:
 		case _PC_ACL_NFS4:
 		case _PC_CAP_PRESENT:
 		case _PC_INF_PRESENT:
 		case _PC_MAC_PRESENT:
 			*ap->a_retval = 0;
 			return (0);
 		default:
 			return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Standard lock, unlock and islocked functions.
  */
 int
 vop_stdlock(ap)
 	struct vop_lock1_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 		char *file;
 		int line;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct mtx *ilk;
 
 	ilk = VI_MTX(vp);
 	return (lockmgr_lock_fast_path(vp->v_vnlock, ap->a_flags,
 	    &ilk->lock_object, ap->a_file, ap->a_line));
 }
 
 /* See above. */
 int
 vop_stdunlock(ap)
 	struct vop_unlock_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 
 	return (lockmgr_unlock(vp->v_vnlock));
 }
 
 /* See above. */
 int
 vop_stdislocked(ap)
 	struct vop_islocked_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 
 	return (lockstatus(ap->a_vp->v_vnlock));
 }
 
 /*
  * Variants of the above set.
  *
  * Differences are:
  * - shared locking disablement is not supported
  * - v_vnlock pointer is not honored
  */
 int
 vop_lock(ap)
 	struct vop_lock1_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 		char *file;
 		int line;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	int flags = ap->a_flags;
 	struct mtx *ilk;
 
 	MPASS(vp->v_vnlock == &vp->v_lock);
 
 	if (__predict_false((flags & ~(LK_TYPE_MASK | LK_NODDLKTREAT | LK_RETRY)) != 0))
 		goto other;
 
 	switch (flags & LK_TYPE_MASK) {
 	case LK_SHARED:
 		return (lockmgr_slock(&vp->v_lock, flags, ap->a_file, ap->a_line));
 	case LK_EXCLUSIVE:
 		return (lockmgr_xlock(&vp->v_lock, flags, ap->a_file, ap->a_line));
 	}
 other:
 	ilk = VI_MTX(vp);
 	return (lockmgr_lock_fast_path(&vp->v_lock, flags,
 	    &ilk->lock_object, ap->a_file, ap->a_line));
 }
 
 int
 vop_unlock(ap)
 	struct vop_unlock_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 
 	MPASS(vp->v_vnlock == &vp->v_lock);
 
 	return (lockmgr_unlock(&vp->v_lock));
 }
 
 int
 vop_islocked(ap)
 	struct vop_islocked_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 
 	MPASS(vp->v_vnlock == &vp->v_lock);
 
 	return (lockstatus(&vp->v_lock));
 }
 
 /*
  * Return true for select/poll.
  */
 int
 vop_nopoll(ap)
 	struct vop_poll_args /* {
 		struct vnode *a_vp;
 		int  a_events;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 
 	return (poll_no_poll(ap->a_events));
 }
 
 /*
  * Implement poll for local filesystems that support it.
  */
 int
 vop_stdpoll(ap)
 	struct vop_poll_args /* {
 		struct vnode *a_vp;
 		int  a_events;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	if (ap->a_events & ~POLLSTANDARD)
 		return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
 }
 
 /*
  * Return our mount point, as we will take charge of the writes.
  */
 int
 vop_stdgetwritemount(ap)
 	struct vop_getwritemount_args /* {
 		struct vnode *a_vp;
 		struct mount **a_mpp;
 	} */ *ap;
 {
 	struct mount *mp;
 	struct vnode *vp;
 
 	/*
 	 * Note that having a reference does not prevent forced unmount from
 	 * setting ->v_mount to NULL after the lock gets released. This is of
 	 * no consequence for typical consumers (most notably vn_start_write)
 	 * since in this case the vnode is VIRF_DOOMED. Unmount might have
 	 * progressed far enough that its completion is only delayed by the
 	 * reference obtained here. The consumer only needs to concern itself
 	 * with releasing it.
 	 */
 	vp = ap->a_vp;
 	mp = vp->v_mount;
 	if (mp == NULL) {
 		*(ap->a_mpp) = NULL;
 		return (0);
 	}
 	if (vfs_op_thread_enter(mp)) {
 		if (mp == vp->v_mount) {
 			vfs_mp_count_add_pcpu(mp, ref, 1);
 			vfs_op_thread_exit(mp);
 		} else {
 			vfs_op_thread_exit(mp);
 			mp = NULL;
 		}
 	} else {
 		MNT_ILOCK(mp);
 		if (mp == vp->v_mount) {
 			MNT_REF(mp);
 			MNT_IUNLOCK(mp);
 		} else {
 			MNT_IUNLOCK(mp);
 			mp = NULL;
 		}
 	}
 	*(ap->a_mpp) = mp;
 	return (0);
 }
 
 /*
  * If the file system doesn't implement VOP_BMAP, then return sensible defaults:
  * - Return the vnode's bufobj instead of any underlying device's bufobj
  * - Calculate the physical block number as if there were equal size
  *   consecutive blocks, but
  * - Report no contiguous runs of blocks.
  */
 int
 vop_stdbmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct bufobj **a_bop;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 
 	if (ap->a_bop != NULL)
 		*ap->a_bop = &ap->a_vp->v_bufobj;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	return (0);
 }
 
 int
 vop_stdfsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		int a_waitfor;
 		struct thread *a_td;
 	} */ *ap;
 {
 
 	return (vn_fsync_buf(ap->a_vp, ap->a_waitfor));
 }
 
 static int
 vop_stdfdatasync(struct vop_fdatasync_args *ap)
 {
 
 	return (VOP_FSYNC(ap->a_vp, MNT_WAIT, ap->a_td));
 }
 
 int
 vop_stdfdatasync_buf(struct vop_fdatasync_args *ap)
 {
 
 	return (vn_fsync_buf(ap->a_vp, MNT_WAIT));
 }
 
 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
 int
 vop_stdgetpages(ap)
 	struct vop_getpages_args /* {
 		struct vnode *a_vp;
 		vm_page_t *a_m;
 		int a_count;
 		int *a_rbehind;
 		int *a_rahead;
 	} */ *ap;
 {
 
 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
 	    ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL);
 }
 
 static int
 vop_stdgetpages_async(struct vop_getpages_async_args *ap)
 {
 	int error;
 
 	error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
 	    ap->a_rahead);
 	ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error);
 	return (error);
 }
 
 int
 vop_stdkqfilter(struct vop_kqfilter_args *ap)
 {
 	return vfs_kqfilter(ap);
 }
 
 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
 int
 vop_stdputpages(ap)
 	struct vop_putpages_args /* {
 		struct vnode *a_vp;
 		vm_page_t *a_m;
 		int a_count;
 		int a_sync;
 		int *a_rtvals;
 	} */ *ap;
 {
 
 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
 	     ap->a_sync, ap->a_rtvals);
 }
 
 int
 vop_stdvptofh(struct vop_vptofh_args *ap)
 {
 	return (EOPNOTSUPP);
 }
 
 int
 vop_stdvptocnp(struct vop_vptocnp_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode **dvp = ap->a_vpp;
 	struct ucred *cred = ap->a_cred;
 	char *buf = ap->a_buf;
 	int *buflen = ap->a_buflen;
 	char *dirbuf, *cpos;
 	int i, error, eofflag, dirbuflen, flags, locked, len, covered;
 	off_t off;
 	ino_t fileno;
 	struct vattr va;
 	struct nameidata nd;
 	struct thread *td;
 	struct dirent *dp;
 	struct vnode *mvp;
 
 	i = *buflen;
 	error = 0;
 	covered = 0;
 	td = curthread;
 
 	if (vp->v_type != VDIR)
 		return (ENOENT);
 
 	error = VOP_GETATTR(vp, &va, cred);
 	if (error)
 		return (error);
 
 	VREF(vp);
 	locked = VOP_ISLOCKED(vp);
 	VOP_UNLOCK(vp);
 	NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
 	    "..", vp, td);
 	flags = FREAD;
 	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL);
 	if (error) {
 		vn_lock(vp, locked | LK_RETRY);
 		return (error);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	mvp = *dvp = nd.ni_vp;
 
 	if (vp->v_mount != (*dvp)->v_mount &&
 	    ((*dvp)->v_vflag & VV_ROOT) &&
 	    ((*dvp)->v_mount->mnt_flag & MNT_UNION)) {
 		*dvp = (*dvp)->v_mount->mnt_vnodecovered;
 		VREF(mvp);
 		VOP_UNLOCK(mvp);
 		vn_close(mvp, FREAD, cred, td);
 		VREF(*dvp);
 		vn_lock(*dvp, LK_SHARED | LK_RETRY);
 		covered = 1;
 	}
 
 	fileno = va.va_fileid;
 
 	dirbuflen = DEV_BSIZE;
 	if (dirbuflen < va.va_blocksize)
 		dirbuflen = va.va_blocksize;
 	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
 
 	if ((*dvp)->v_type != VDIR) {
 		error = ENOENT;
 		goto out;
 	}
 
 	off = 0;
 	len = 0;
 	do {
 		/* call VOP_READDIR of parent */
 		error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
 					&cpos, &len, &eofflag, td);
 		if (error)
 			goto out;
 
 		if ((dp->d_type != DT_WHT) &&
 		    (dp->d_fileno == fileno)) {
 			if (covered) {
 				VOP_UNLOCK(*dvp);
 				vn_lock(mvp, LK_SHARED | LK_RETRY);
 				if (dirent_exists(mvp, dp->d_name, td)) {
 					error = ENOENT;
 					VOP_UNLOCK(mvp);
 					vn_lock(*dvp, LK_SHARED | LK_RETRY);
 					goto out;
 				}
 				VOP_UNLOCK(mvp);
 				vn_lock(*dvp, LK_SHARED | LK_RETRY);
 			}
 			i -= dp->d_namlen;
 
 			if (i < 0) {
 				error = ENOMEM;
 				goto out;
 			}
 			if (dp->d_namlen == 1 && dp->d_name[0] == '.') {
 				error = ENOENT;
 			} else {
 				bcopy(dp->d_name, buf + i, dp->d_namlen);
 				error = 0;
 			}
 			goto out;
 		}
 	} while (len > 0 || !eofflag);
 	error = ENOENT;
 
 out:
 	free(dirbuf, M_TEMP);
 	if (!error) {
 		*buflen = i;
 		vref(*dvp);
 	}
 	if (covered) {
 		vput(*dvp);
 		vrele(mvp);
 	} else {
 		VOP_UNLOCK(mvp);
 		vn_close(mvp, FREAD, cred, td);
 	}
 	vn_lock(vp, locked | LK_RETRY);
 	return (error);
 }
 
 int
 vop_stdallocate(struct vop_allocate_args *ap)
 {
 #ifdef __notyet__
 	struct statfs *sfs;
 	off_t maxfilesize = 0;
 #endif
 	struct iovec aiov;
 	struct vattr vattr, *vap;
 	struct uio auio;
 	off_t fsize, len, cur, offset;
 	uint8_t *buf;
 	struct thread *td;
 	struct vnode *vp;
 	size_t iosize;
 	int error;
 
 	buf = NULL;
 	error = 0;
 	td = curthread;
 	vap = &vattr;
 	vp = ap->a_vp;
 	len = *ap->a_len;
 	offset = *ap->a_offset;
 
 	error = VOP_GETATTR(vp, vap, td->td_ucred);
 	if (error != 0)
 		goto out;
 	fsize = vap->va_size;
 	iosize = vap->va_blocksize;
 	if (iosize == 0)
 		iosize = BLKDEV_IOSIZE;
 	if (iosize > MAXPHYS)
 		iosize = MAXPHYS;
 	buf = malloc(iosize, M_TEMP, M_WAITOK);
 
 #ifdef __notyet__
 	/*
 	 * Check if the filesystem sets f_maxfilesize; if not use
 	 * VOP_SETATTR to perform the check.
 	 */
 	sfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = VFS_STATFS(vp->v_mount, sfs, td);
 	if (error == 0)
 		maxfilesize = sfs->f_maxfilesize;
 	free(sfs, M_STATFS);
 	if (error != 0)
 		goto out;
 	if (maxfilesize) {
 		if (offset > maxfilesize || len > maxfilesize ||
 		    offset + len > maxfilesize) {
 			error = EFBIG;
 			goto out;
 		}
 	} else
 #endif
 	if (offset + len > vap->va_size) {
 		/*
 		 * Test offset + len against the filesystem's maxfilesize.
 		 */
 		VATTR_NULL(vap);
 		vap->va_size = offset + len;
 		error = VOP_SETATTR(vp, vap, td->td_ucred);
 		if (error != 0)
 			goto out;
 		VATTR_NULL(vap);
 		vap->va_size = fsize;
 		error = VOP_SETATTR(vp, vap, td->td_ucred);
 		if (error != 0)
 			goto out;
 	}
 
 	for (;;) {
 		/*
 		 * Read and write back anything below the nominal file
 		 * size.  There's currently no way outside the filesystem
 		 * to know whether this area is sparse or not.
 		 */
 		cur = iosize;
 		if ((offset % iosize) != 0)
 			cur -= (offset % iosize);
 		if (cur > len)
 			cur = len;
 		if (offset < fsize) {
 			aiov.iov_base = buf;
 			aiov.iov_len = cur;
 			auio.uio_iov = &aiov;
 			auio.uio_iovcnt = 1;
 			auio.uio_offset = offset;
 			auio.uio_resid = cur;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_READ;
 			auio.uio_td = td;
 			error = VOP_READ(vp, &auio, 0, td->td_ucred);
 			if (error != 0)
 				break;
 			if (auio.uio_resid > 0) {
 				bzero(buf + cur - auio.uio_resid,
 				    auio.uio_resid);
 			}
 		} else {
 			bzero(buf, cur);
 		}
 
 		aiov.iov_base = buf;
 		aiov.iov_len = cur;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = offset;
 		auio.uio_resid = cur;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_rw = UIO_WRITE;
 		auio.uio_td = td;
 
 		error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
 		if (error != 0)
 			break;
 
 		len -= cur;
 		offset += cur;
 		if (len == 0)
 			break;
 		if (should_yield())
 			break;
 	}
 
  out:
 	*ap->a_len = len;
 	*ap->a_offset = offset;
 	free(buf, M_TEMP);
 	return (error);
 }
 
 int
 vop_stdadvise(struct vop_advise_args *ap)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 	daddr_t startn, endn;
 	off_t bstart, bend, start, end;
 	int bsize, error;
 
 	vp = ap->a_vp;
 	switch (ap->a_advice) {
 	case POSIX_FADV_WILLNEED:
 		/*
 		 * Do nothing for now.  Filesystems should provide a
 		 * custom method which starts an asynchronous read of
 		 * the requested region.
 		 */
 		error = 0;
 		break;
 	case POSIX_FADV_DONTNEED:
 		error = 0;
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (VN_IS_DOOMED(vp)) {
 			VOP_UNLOCK(vp);
 			break;
 		}
 
 		/*
 		 * Round to block boundaries (and later possibly further to
 		 * page boundaries).  Applications cannot reasonably be aware  
 		 * of the boundaries, and the rounding must be to expand at
 		 * both extremities to cover enough.  It still doesn't cover
 		 * read-ahead.  For partial blocks, this gives unnecessary
 		 * discarding of buffers but is efficient enough since the
 		 * pages usually remain in VMIO for some time.
 		 */
 		bsize = vp->v_bufobj.bo_bsize;
 		bstart = rounddown(ap->a_start, bsize);
 		bend = roundup(ap->a_end, bsize);
 
 		/*
 		 * Deactivate pages in the specified range from the backing VM
 		 * object.  Pages that are resident in the buffer cache will
 		 * remain wired until their corresponding buffers are released
 		 * below.
 		 */
 		if (vp->v_object != NULL) {
 			start = trunc_page(bstart);
 			end = round_page(bend);
 			VM_OBJECT_RLOCK(vp->v_object);
 			vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start),
 			    OFF_TO_IDX(end));
 			VM_OBJECT_RUNLOCK(vp->v_object);
 		}
 
 		bo = &vp->v_bufobj;
 		BO_RLOCK(bo);
 		startn = bstart / bsize;
 		endn = bend / bsize;
 		error = bnoreuselist(&bo->bo_clean, bo, startn, endn);
 		if (error == 0)
 			error = bnoreuselist(&bo->bo_dirty, bo, startn, endn);
 		BO_RUNLOCK(bo);
 		VOP_UNLOCK(vp);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 int
 vop_stdunp_bind(struct vop_unp_bind_args *ap)
 {
 
 	ap->a_vp->v_unpcb = ap->a_unpcb;
 	return (0);
 }
 
 int
 vop_stdunp_connect(struct vop_unp_connect_args *ap)
 {
 
 	*ap->a_unpcb = ap->a_vp->v_unpcb;
 	return (0);
 }
 
 int
 vop_stdunp_detach(struct vop_unp_detach_args *ap)
 {
 
 	ap->a_vp->v_unpcb = NULL;
 	return (0);
 }
 
 static int
 vop_stdis_text(struct vop_is_text_args *ap)
 {
 
 	return (ap->a_vp->v_writecount < 0);
 }
 
 int
 vop_stdset_text(struct vop_set_text_args *ap)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	int error;
 
 	vp = ap->a_vp;
 	VI_LOCK(vp);
 	if (vp->v_writecount > 0) {
 		error = ETXTBSY;
 	} else {
 		/*
 		 * If requested by fs, keep a use reference to the
 		 * vnode until the last text reference is released.
 		 */
 		mp = vp->v_mount;
 		if (mp != NULL && (mp->mnt_kern_flag & MNTK_TEXT_REFS) != 0 &&
 		    vp->v_writecount == 0) {
 			vp->v_iflag |= VI_TEXT_REF;
 			vrefl(vp);
 		}
 
 		vp->v_writecount--;
 		error = 0;
 	}
 	VI_UNLOCK(vp);
 	return (error);
 }
 
 static int
 vop_stdunset_text(struct vop_unset_text_args *ap)
 {
 	struct vnode *vp;
 	int error;
 	bool last;
 
 	vp = ap->a_vp;
 	last = false;
 	VI_LOCK(vp);
 	if (vp->v_writecount < 0) {
 		if ((vp->v_iflag & VI_TEXT_REF) != 0 &&
 		    vp->v_writecount == -1) {
 			last = true;
 			vp->v_iflag &= ~VI_TEXT_REF;
 		}
 		vp->v_writecount++;
 		error = 0;
 	} else {
 		error = EINVAL;
 	}
 	VI_UNLOCK(vp);
 	if (last)
 		vunref(vp);
 	return (error);
 }
 
 static int
 vop_stdadd_writecount(struct vop_add_writecount_args *ap)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	int error;
 
 	vp = ap->a_vp;
 	VI_LOCK_FLAGS(vp, MTX_DUPOK);
 	if (vp->v_writecount < 0) {
 		error = ETXTBSY;
 	} else {
 		VNASSERT(vp->v_writecount + ap->a_inc >= 0, vp,
 		    ("neg writecount increment %d", ap->a_inc));
 		if (vp->v_writecount == 0) {
 			mp = vp->v_mount;
 			if (mp != NULL && (mp->mnt_kern_flag & MNTK_NOMSYNC) == 0)
 				vlazy(vp);
 		}
 		vp->v_writecount += ap->a_inc;
 		error = 0;
 	}
 	VI_UNLOCK(vp);
 	return (error);
 }
 
 int
 vop_stdneed_inactive(struct vop_need_inactive_args *ap)
 {
 
 	return (1);
 }
 
 int
 vop_stdioctl(struct vop_ioctl_args *ap)
 {
 	struct vnode *vp;
 	struct vattr va;
 	off_t *offp;
 	int error;
 
 	switch (ap->a_command) {
 	case FIOSEEKDATA:
 	case FIOSEEKHOLE:
 		vp = ap->a_vp;
 		error = vn_lock(vp, LK_SHARED);
 		if (error != 0)
 			return (EBADF);
 		if (vp->v_type == VREG)
 			error = VOP_GETATTR(vp, &va, ap->a_cred);
 		else
 			error = ENOTTY;
 		if (error == 0) {
 			offp = ap->a_data;
 			if (*offp < 0 || *offp >= va.va_size)
 				error = ENXIO;
 			else if (ap->a_command == FIOSEEKHOLE)
 				*offp = va.va_size;
 		}
 		VOP_UNLOCK(vp);
 		break;
 	default:
 		error = ENOTTY;
 		break;
 	}
 	return (error);
 }
 
 /*
  * vfs default ops
  * used to fill the vfs function table to get reasonable default return values.
  */
 int
 vfs_stdroot (mp, flags, vpp)
 	struct mount *mp;
 	int flags;
 	struct vnode **vpp;
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdstatfs (mp, sbp)
 	struct mount *mp;
 	struct statfs *sbp;
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdquotactl (mp, cmds, uid, arg)
 	struct mount *mp;
 	int cmds;
 	uid_t uid;
 	void *arg;
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdsync(mp, waitfor)
 	struct mount *mp;
 	int waitfor;
 {
 	struct vnode *vp, *mvp;
 	struct thread *td;
 	int error, lockreq, allerror = 0;
 
 	td = curthread;
 	lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
 	if (waitfor != MNT_WAIT)
 		lockreq |= LK_NOWAIT;
 	/*
 	 * Force stale buffer cache information to be flushed.
 	 */
 loop:
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		if (vp->v_bufobj.bo_dirty.bv_cnt == 0) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		if ((error = vget(vp, lockreq, td)) != 0) {
 			if (error == ENOENT) {
 				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 				goto loop;
 			}
 			continue;
 		}
 		error = VOP_FSYNC(vp, waitfor, td);
 		if (error)
 			allerror = error;
 		vput(vp);
 	}
 	return (allerror);
 }
 
 int
 vfs_stdnosync (mp, waitfor)
 	struct mount *mp;
 	int waitfor;
 {
 
 	return (0);
 }
 
 static int
 vop_stdcopy_file_range(struct vop_copy_file_range_args *ap)
 {
 	int error;
 
 	error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
 	    ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, ap->a_incred,
 	    ap->a_outcred, ap->a_fsizetd);
 	return (error);
 }
 
 int
 vfs_stdvget (mp, ino, flags, vpp)
 	struct mount *mp;
 	ino_t ino;
 	int flags;
 	struct vnode **vpp;
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdfhtovp (mp, fhp, flags, vpp)
 	struct mount *mp;
 	struct fid *fhp;
 	int flags;
 	struct vnode **vpp;
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdinit (vfsp)
 	struct vfsconf *vfsp;
 {
 
 	return (0);
 }
 
 int
 vfs_stduninit (vfsp)
 	struct vfsconf *vfsp;
 {
 
 	return(0);
 }
 
 int
 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)
 	struct mount *mp;
 	int cmd;
 	struct vnode *filename_vp;
 	int attrnamespace;
 	const char *attrname;
 {
 
 	if (filename_vp != NULL)
 		VOP_UNLOCK(filename_vp);
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdsysctl(mp, op, req)
 	struct mount *mp;
 	fsctlop_t op;
 	struct sysctl_req *req;
 {
 
 	return (EOPNOTSUPP);
 }
 
 static vop_bypass_t *
 bp_by_off(struct vop_vector *vop, struct vop_generic_args *a)
 {
 
 	return (*(vop_bypass_t **)((char *)vop + a->a_desc->vdesc_vop_offset));
 }
 
 int
 vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a)
 {
 	vop_bypass_t *bp;
 	int prev_stops, rc;
 
-	for (; vop != NULL; vop = vop->vop_default) {
-		bp = bp_by_off(vop, a);
-		if (bp != NULL)
-			break;
-
-		/*
-		 * Bypass is not really supported.  It is done for
-		 * fallback to unimplemented vops in the default
-		 * vector.
-		 */
-		bp = vop->vop_bypass;
-		if (bp != NULL)
-			break;
-	}
+	bp = bp_by_off(vop, a);
 	MPASS(bp != NULL);
 
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	rc = bp(a);
 	sigallowstop(prev_stops);
 	return (rc);
 }
Index: projects/clang1000-import/sys/kern/vfs_subr.c
===================================================================
--- projects/clang1000-import/sys/kern/vfs_subr.c	(revision 357178)
+++ projects/clang1000-import/sys/kern/vfs_subr.c	(revision 357179)
@@ -1,6397 +1,6401 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
  */
 
 /*
  * External virtual filesystem routines
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_watchdog.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/capsicum.h>
 #include <sys/condvar.h>
 #include <sys/conf.h>
 #include <sys/counter.h>
 #include <sys/dirent.h>
 #include <sys/event.h>
 #include <sys/eventhandler.h>
 #include <sys/extattr.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/lockf.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/pctrie.h>
 #include <sys/priv.h>
 #include <sys/reboot.h>
 #include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
 #include <sys/smp.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 #include <sys/watchdog.h>
 
 #include <machine/stdarg.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_kern.h>
 #include <vm/uma.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 static void	delmntque(struct vnode *vp);
 static int	flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo,
 		    int slpflag, int slptimeo);
 static void	syncer_shutdown(void *arg, int howto);
 static int	vtryrecycle(struct vnode *vp);
 static void	v_init_counters(struct vnode *);
 static void	v_incr_devcount(struct vnode *);
 static void	v_decr_devcount(struct vnode *);
 static void	vgonel(struct vnode *);
 static void	vfs_knllock(void *arg);
 static void	vfs_knlunlock(void *arg);
 static void	vfs_knl_assert_locked(void *arg);
 static void	vfs_knl_assert_unlocked(void *arg);
 static void	destroy_vpollinfo(struct vpollinfo *vi);
 static int	v_inval_buf_range_locked(struct vnode *vp, struct bufobj *bo,
 		    daddr_t startlbn, daddr_t endlbn);
 static void	vnlru_recalc(void);
 
 /*
  * These fences are intended for cases where some synchronization is
  * needed between access of v_iflags and lockless vnode refcount (v_holdcnt
  * and v_usecount) updates.  Access to v_iflags is generally synchronized
  * by the interlock, but we have some internal assertions that check vnode
  * flags without acquiring the lock.  Thus, these fences are INVARIANTS-only
  * for now.
  */
 #ifdef INVARIANTS
 #define	VNODE_REFCOUNT_FENCE_ACQ()	atomic_thread_fence_acq()
 #define	VNODE_REFCOUNT_FENCE_REL()	atomic_thread_fence_rel()
 #else
 #define	VNODE_REFCOUNT_FENCE_ACQ()
 #define	VNODE_REFCOUNT_FENCE_REL()
 #endif
 
 /*
  * Number of vnodes in existence.  Increased whenever getnewvnode()
  * allocates a new vnode, decreased in vdropl() for VIRF_DOOMED vnode.
  */
 static u_long __exclusive_cache_line numvnodes;
 
 SYSCTL_ULONG(_vfs, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0,
     "Number of vnodes in existence");
 
 static counter_u64_t vnodes_created;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, vnodes_created, CTLFLAG_RD, &vnodes_created,
     "Number of vnodes created by getnewvnode");
 
 /*
  * Conversion tables for conversion from vnode types to inode formats
  * and back.
  */
 enum vtype iftovt_tab[16] = {
 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
 };
 int vttoif_tab[10] = {
 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 	S_IFSOCK, S_IFIFO, S_IFMT, S_IFMT
 };
 
 /*
  * List of allocates vnodes in the system.
  */
 static TAILQ_HEAD(freelst, vnode) vnode_list;
 static struct vnode *vnode_list_free_marker;
 static struct vnode *vnode_list_reclaim_marker;
 
 /*
  * "Free" vnode target.  Free vnodes are rarely completely free, but are
  * just ones that are cheap to recycle.  Usually they are for files which
  * have been stat'd but not read; these usually have inode and namecache
  * data attached to them.  This target is the preferred minimum size of a
  * sub-cache consisting mostly of such files. The system balances the size
  * of this sub-cache with its complement to try to prevent either from
  * thrashing while the other is relatively inactive.  The targets express
  * a preference for the best balance.
  *
  * "Above" this target there are 2 further targets (watermarks) related
  * to recyling of free vnodes.  In the best-operating case, the cache is
  * exactly full, the free list has size between vlowat and vhiwat above the
  * free target, and recycling from it and normal use maintains this state.
  * Sometimes the free list is below vlowat or even empty, but this state
  * is even better for immediate use provided the cache is not full.
  * Otherwise, vnlru_proc() runs to reclaim enough vnodes (usually non-free
  * ones) to reach one of these states.  The watermarks are currently hard-
  * coded as 4% and 9% of the available space higher.  These and the default
  * of 25% for wantfreevnodes are too large if the memory size is large.
  * E.g., 9% of 75% of MAXVNODES is more than 566000 vnodes to reclaim
  * whenever vnlru_proc() becomes active.
  */
 static long wantfreevnodes;
 static long __exclusive_cache_line freevnodes;
 SYSCTL_ULONG(_vfs, OID_AUTO, freevnodes, CTLFLAG_RD,
     &freevnodes, 0, "Number of \"free\" vnodes");
 static long freevnodes_old;
 
 static counter_u64_t recycles_count;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles, CTLFLAG_RD, &recycles_count,
     "Number of vnodes recycled to meet vnode cache targets");
 
 static counter_u64_t recycles_free_count;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles_free, CTLFLAG_RD, &recycles_free_count,
     "Number of free vnodes recycled to meet vnode cache targets");
 
 /*
  * Various variables used for debugging the new implementation of
  * reassignbuf().
  * XXX these are probably of (very) limited utility now.
  */
 static int reassignbufcalls;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW | CTLFLAG_STATS,
     &reassignbufcalls, 0, "Number of calls to reassignbuf");
 
 static counter_u64_t deferred_inact;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, deferred_inact, CTLFLAG_RD, &deferred_inact,
     "Number of times inactive processing was deferred");
 
 /* To keep more than one thread at a time from running vfs_getnewfsid */
 static struct mtx mntid_mtx;
 
 /*
  * Lock for any access to the following:
  *	vnode_list
  *	numvnodes
  *	freevnodes
  */
 static struct mtx __exclusive_cache_line vnode_list_mtx;
 
 /* Publicly exported FS */
 struct nfs_public nfs_pub;
 
 static uma_zone_t buf_trie_zone;
 
 /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */
 static uma_zone_t vnode_zone;
 static uma_zone_t vnodepoll_zone;
 
 /*
  * The workitem queue.
  *
  * It is useful to delay writes of file data and filesystem metadata
  * for tens of seconds so that quickly created and deleted files need
  * not waste disk bandwidth being created and removed. To realize this,
  * we append vnodes to a "workitem" queue. When running with a soft
  * updates implementation, most pending metadata dependencies should
  * not wait for more than a few seconds. Thus, mounted on block devices
  * are delayed only about a half the time that file data is delayed.
  * Similarly, directory updates are more critical, so are only delayed
  * about a third the time that file data is delayed. Thus, there are
  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
  * one each second (driven off the filesystem syncer process). The
  * syncer_delayno variable indicates the next queue that is to be processed.
  * Items that need to be processed soon are placed in this queue:
  *
  *	syncer_workitem_pending[syncer_delayno]
  *
  * A delay of fifteen seconds is done by placing the request fifteen
  * entries later in the queue:
  *
  *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
  *
  */
 static int syncer_delayno;
 static long syncer_mask;
 LIST_HEAD(synclist, bufobj);
 static struct synclist *syncer_workitem_pending;
 /*
  * The sync_mtx protects:
  *	bo->bo_synclist
  *	sync_vnode_count
  *	syncer_delayno
  *	syncer_state
  *	syncer_workitem_pending
  *	syncer_worklist_len
  *	rushjob
  */
 static struct mtx sync_mtx;
 static struct cv sync_wakeup;
 
 #define SYNCER_MAXDELAY		32
 static int syncer_maxdelay = SYNCER_MAXDELAY;	/* maximum delay time */
 static int syncdelay = 30;		/* max time to delay syncing data */
 static int filedelay = 30;		/* time to delay syncing files */
 SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0,
     "Time to delay syncing files (in seconds)");
 static int dirdelay = 29;		/* time to delay syncing directories */
 SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0,
     "Time to delay syncing directories (in seconds)");
 static int metadelay = 28;		/* time to delay syncing metadata */
 SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0,
     "Time to delay syncing metadata (in seconds)");
 static int rushjob;		/* number of slots to run ASAP */
 static int stat_rush_requests;	/* number of times I/O speeded up */
 SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0,
     "Number of times I/O speeded up (rush requests)");
 
 #define	VDBATCH_SIZE 8
 struct vdbatch {
 	u_int index;
 	long freevnodes;
 	struct mtx lock;
 	struct vnode *tab[VDBATCH_SIZE];
 };
 DPCPU_DEFINE_STATIC(struct vdbatch, vd);
 
 static void	vdbatch_dequeue(struct vnode *vp);
 
 /*
  * When shutting down the syncer, run it at four times normal speed.
  */
 #define SYNCER_SHUTDOWN_SPEEDUP		4
 static int sync_vnode_count;
 static int syncer_worklist_len;
 static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY }
     syncer_state;
 
 /* Target for maximum number of vnodes. */
 u_long desiredvnodes;
 static u_long gapvnodes;		/* gap between wanted and desired */
 static u_long vhiwat;		/* enough extras after expansion */
 static u_long vlowat;		/* minimal extras before expansion */
 static u_long vstir;		/* nonzero to stir non-free vnodes */
 static volatile int vsmalltrigger = 8;	/* pref to keep if > this many pages */
 
 static u_long vnlru_read_freevnodes(void);
 
 /*
  * Note that no attempt is made to sanitize these parameters.
  */
 static int
 sysctl_maxvnodes(SYSCTL_HANDLER_ARGS)
 {
 	u_long val;
 	int error;
 
 	val = desiredvnodes;
 	error = sysctl_handle_long(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	if (val == desiredvnodes)
 		return (0);
 	mtx_lock(&vnode_list_mtx);
 	desiredvnodes = val;
 	wantfreevnodes = desiredvnodes / 4;
 	vnlru_recalc();
 	mtx_unlock(&vnode_list_mtx);
 	/*
 	 * XXX There is no protection against multiple threads changing
 	 * desiredvnodes at the same time. Locking above only helps vnlru and
 	 * getnewvnode.
 	 */
 	vfs_hash_changesize(desiredvnodes);
 	cache_changesize(desiredvnodes);
 	return (0);
 }
 
 SYSCTL_PROC(_kern, KERN_MAXVNODES, maxvnodes,
     CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, sysctl_maxvnodes,
     "UL", "Target for maximum number of vnodes");
 
 static int
 sysctl_wantfreevnodes(SYSCTL_HANDLER_ARGS)
 {
 	u_long val;
 	int error;
 
 	val = wantfreevnodes;
 	error = sysctl_handle_long(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	if (val == wantfreevnodes)
 		return (0);
 	mtx_lock(&vnode_list_mtx);
 	wantfreevnodes = val;
 	vnlru_recalc();
 	mtx_unlock(&vnode_list_mtx);
 	return (0);
 }
 
 SYSCTL_PROC(_vfs, OID_AUTO, wantfreevnodes,
     CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, sysctl_wantfreevnodes,
     "UL", "Target for minimum number of \"free\" vnodes");
 
 SYSCTL_ULONG(_kern, OID_AUTO, minvnodes, CTLFLAG_RW,
     &wantfreevnodes, 0, "Old name for vfs.wantfreevnodes (legacy)");
 static int vnlru_nowhere;
 SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW,
     &vnlru_nowhere, 0, "Number of times the vnlru process ran without success");
 
 static int
 sysctl_try_reclaim_vnode(SYSCTL_HANDLER_ARGS)
 {
 	struct vnode *vp;
 	struct nameidata nd;
 	char *buf;
 	unsigned long ndflags;
 	int error;
 
 	if (req->newptr == NULL)
 		return (EINVAL);
 	if (req->newlen >= PATH_MAX)
 		return (E2BIG);
 
 	buf = malloc(PATH_MAX, M_TEMP, M_WAITOK);
 	error = SYSCTL_IN(req, buf, req->newlen);
 	if (error != 0)
 		goto out;
 
 	buf[req->newlen] = '\0';
 
 	ndflags = LOCKLEAF | NOFOLLOW | AUDITVNODE1 | NOCACHE | SAVENAME;
 	NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, buf, curthread);
 	if ((error = namei(&nd)) != 0)
 		goto out;
 	vp = nd.ni_vp;
 
 	if (VN_IS_DOOMED(vp)) {
 		/*
 		 * This vnode is being recycled.  Return != 0 to let the caller
 		 * know that the sysctl had no effect.  Return EAGAIN because a
 		 * subsequent call will likely succeed (since namei will create
 		 * a new vnode if necessary)
 		 */
 		error = EAGAIN;
 		goto putvnode;
 	}
 
 	counter_u64_add(recycles_count, 1);
 	vgone(vp);
 putvnode:
 	NDFREE(&nd, 0);
 out:
 	free(buf, M_TEMP);
 	return (error);
 }
 
 static int
 sysctl_ftry_reclaim_vnode(SYSCTL_HANDLER_ARGS)
 {
 	struct thread *td = curthread;
 	struct vnode *vp;
 	struct file *fp;
 	int error;
 	int fd;
 
 	if (req->newptr == NULL)
 		return (EBADF);
 
         error = sysctl_handle_int(oidp, &fd, 0, req);
         if (error != 0)
                 return (error);
 	error = getvnode(curthread, fd, &cap_fcntl_rights, &fp);
 	if (error != 0)
 		return (error);
 	vp = fp->f_vnode;
 
 	error = vn_lock(vp, LK_EXCLUSIVE);
 	if (error != 0)
 		goto drop;
 
 	counter_u64_add(recycles_count, 1);
 	vgone(vp);
 	VOP_UNLOCK(vp);
 drop:
 	fdrop(fp, td);
 	return (error);
 }
 
 SYSCTL_PROC(_debug, OID_AUTO, try_reclaim_vnode,
     CTLTYPE_STRING | CTLFLAG_MPSAFE | CTLFLAG_WR, NULL, 0,
     sysctl_try_reclaim_vnode, "A", "Try to reclaim a vnode by its pathname");
 SYSCTL_PROC(_debug, OID_AUTO, ftry_reclaim_vnode,
     CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_WR, NULL, 0,
     sysctl_ftry_reclaim_vnode, "I",
     "Try to reclaim a vnode by its file descriptor");
 
 /* Shift count for (uintptr_t)vp to initialize vp->v_hash. */
 static int vnsz2log;
 
 /*
  * Support for the bufobj clean & dirty pctrie.
  */
 static void *
 buf_trie_alloc(struct pctrie *ptree)
 {
 
 	return uma_zalloc(buf_trie_zone, M_NOWAIT);
 }
 
 static void
 buf_trie_free(struct pctrie *ptree, void *node)
 {
 
 	uma_zfree(buf_trie_zone, node);
 }
 PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free);
 
 /*
  * Initialize the vnode management data structures.
  *
  * Reevaluate the following cap on the number of vnodes after the physical
  * memory size exceeds 512GB.  In the limit, as the physical memory size
  * grows, the ratio of the memory size in KB to vnodes approaches 64:1.
  */
 #ifndef	MAXVNODES_MAX
 #define	MAXVNODES_MAX	(512UL * 1024 * 1024 / 64)	/* 8M */
 #endif
 
 static MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
 
 static struct vnode *
 vn_alloc_marker(struct mount *mp)
 {
 	struct vnode *vp;
 
 	vp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO);
 	vp->v_type = VMARKER;
 	vp->v_mount = mp;
 
 	return (vp);
 }
 
 static void
 vn_free_marker(struct vnode *vp)
 {
 
 	MPASS(vp->v_type == VMARKER);
 	free(vp, M_VNODE_MARKER);
 }
 
 /*
  * Initialize a vnode as it first enters the zone.
  */
 static int
 vnode_init(void *mem, int size, int flags)
 {
 	struct vnode *vp;
 
 	vp = mem;
 	bzero(vp, size);
 	/*
 	 * Setup locks.
 	 */
 	vp->v_vnlock = &vp->v_lock;
 	mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF);
 	/*
 	 * By default, don't allow shared locks unless filesystems opt-in.
 	 */
 	lockinit(vp->v_vnlock, PVFS, "vnode", VLKTIMEOUT,
 	    LK_NOSHARE | LK_IS_VNODE);
 	/*
 	 * Initialize bufobj.
 	 */
 	bufobj_init(&vp->v_bufobj, vp);
 	/*
 	 * Initialize namecache.
 	 */
 	LIST_INIT(&vp->v_cache_src);
 	TAILQ_INIT(&vp->v_cache_dst);
 	/*
 	 * Initialize rangelocks.
 	 */
 	rangelock_init(&vp->v_rl);
 
 	vp->v_dbatchcpu = NOCPU;
 
 	mtx_lock(&vnode_list_mtx);
 	TAILQ_INSERT_BEFORE(vnode_list_free_marker, vp, v_vnodelist);
 	mtx_unlock(&vnode_list_mtx);
 	return (0);
 }
 
 /*
  * Free a vnode when it is cleared from the zone.
  */
 static void
 vnode_fini(void *mem, int size)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 
 	vp = mem;
 	vdbatch_dequeue(vp);
 	mtx_lock(&vnode_list_mtx);
 	TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
 	mtx_unlock(&vnode_list_mtx);
 	rangelock_destroy(&vp->v_rl);
 	lockdestroy(vp->v_vnlock);
 	mtx_destroy(&vp->v_interlock);
 	bo = &vp->v_bufobj;
 	rw_destroy(BO_LOCKPTR(bo));
 }
 
 /*
  * Provide the size of NFS nclnode and NFS fh for calculation of the
  * vnode memory consumption.  The size is specified directly to
  * eliminate dependency on NFS-private header.
  *
  * Other filesystems may use bigger or smaller (like UFS and ZFS)
  * private inode data, but the NFS-based estimation is ample enough.
  * Still, we care about differences in the size between 64- and 32-bit
  * platforms.
  *
  * Namecache structure size is heuristically
  * sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF + 1.
  */
 #ifdef _LP64
 #define	NFS_NCLNODE_SZ	(528 + 64)
 #define	NC_SZ		148
 #else
 #define	NFS_NCLNODE_SZ	(360 + 32)
 #define	NC_SZ		92
 #endif
 
 static void
 vntblinit(void *dummy __unused)
 {
 	struct vdbatch *vd;
 	int cpu, physvnodes, virtvnodes;
 	u_int i;
 
 	/*
 	 * Desiredvnodes is a function of the physical memory size and the
 	 * kernel's heap size.  Generally speaking, it scales with the
 	 * physical memory size.  The ratio of desiredvnodes to the physical
 	 * memory size is 1:16 until desiredvnodes exceeds 98,304.
 	 * Thereafter, the
 	 * marginal ratio of desiredvnodes to the physical memory size is
 	 * 1:64.  However, desiredvnodes is limited by the kernel's heap
 	 * size.  The memory required by desiredvnodes vnodes and vm objects
 	 * must not exceed 1/10th of the kernel's heap size.
 	 */
 	physvnodes = maxproc + pgtok(vm_cnt.v_page_count) / 64 +
 	    3 * min(98304 * 16, pgtok(vm_cnt.v_page_count)) / 64;
 	virtvnodes = vm_kmem_size / (10 * (sizeof(struct vm_object) +
 	    sizeof(struct vnode) + NC_SZ * ncsizefactor + NFS_NCLNODE_SZ));
 	desiredvnodes = min(physvnodes, virtvnodes);
 	if (desiredvnodes > MAXVNODES_MAX) {
 		if (bootverbose)
 			printf("Reducing kern.maxvnodes %lu -> %lu\n",
 			    desiredvnodes, MAXVNODES_MAX);
 		desiredvnodes = MAXVNODES_MAX;
 	}
 	wantfreevnodes = desiredvnodes / 4;
 	mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF);
 	TAILQ_INIT(&vnode_list);
 	mtx_init(&vnode_list_mtx, "vnode_list", NULL, MTX_DEF);
 	/*
 	 * The lock is taken to appease WITNESS.
 	 */
 	mtx_lock(&vnode_list_mtx);
 	vnlru_recalc();
 	mtx_unlock(&vnode_list_mtx);
 	vnode_list_free_marker = vn_alloc_marker(NULL);
 	TAILQ_INSERT_HEAD(&vnode_list, vnode_list_free_marker, v_vnodelist);
 	vnode_list_reclaim_marker = vn_alloc_marker(NULL);
 	TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist);
 	vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
 	    vnode_init, vnode_fini, UMA_ALIGN_PTR, 0);
 	vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	/*
 	 * Preallocate enough nodes to support one-per buf so that
 	 * we can not fail an insert.  reassignbuf() callers can not
 	 * tolerate the insertion failure.
 	 */
 	buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(),
 	    NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, 
 	    UMA_ZONE_NOFREE | UMA_ZONE_VM);
 	uma_prealloc(buf_trie_zone, nbuf);
 
 	vnodes_created = counter_u64_alloc(M_WAITOK);
 	recycles_count = counter_u64_alloc(M_WAITOK);
 	recycles_free_count = counter_u64_alloc(M_WAITOK);
 	deferred_inact = counter_u64_alloc(M_WAITOK);
 
 	/*
 	 * Initialize the filesystem syncer.
 	 */
 	syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE,
 	    &syncer_mask);
 	syncer_maxdelay = syncer_mask + 1;
 	mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF);
 	cv_init(&sync_wakeup, "syncer");
 	for (i = 1; i <= sizeof(struct vnode); i <<= 1)
 		vnsz2log++;
 	vnsz2log--;
 
 	CPU_FOREACH(cpu) {
 		vd = DPCPU_ID_PTR((cpu), vd);
 		bzero(vd, sizeof(*vd));
 		mtx_init(&vd->lock, "vdbatch", NULL, MTX_DEF);
 	}
 }
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL);
 
 
 /*
  * Mark a mount point as busy. Used to synchronize access and to delay
  * unmounting. Eventually, mountlist_mtx is not released on failure.
  *
  * vfs_busy() is a custom lock, it can block the caller.
  * vfs_busy() only sleeps if the unmount is active on the mount point.
  * For a mountpoint mp, vfs_busy-enforced lock is before lock of any
  * vnode belonging to mp.
  *
  * Lookup uses vfs_busy() to traverse mount points.
  * root fs			var fs
  * / vnode lock		A	/ vnode lock (/var)		D
  * /var vnode lock	B	/log vnode lock(/var/log)	E
  * vfs_busy lock	C	vfs_busy lock			F
  *
  * Within each file system, the lock order is C->A->B and F->D->E.
  *
  * When traversing across mounts, the system follows that lock order:
  *
  *        C->A->B
  *              |
  *              +->F->D->E
  *
  * The lookup() process for namei("/var") illustrates the process:
  *  VOP_LOOKUP() obtains B while A is held
  *  vfs_busy() obtains a shared lock on F while A and B are held
  *  vput() releases lock on B
  *  vput() releases lock on A
  *  VFS_ROOT() obtains lock on D while shared lock on F is held
  *  vfs_unbusy() releases shared lock on F
  *  vn_lock() obtains lock on deadfs vnode vp_crossmp instead of A.
  *    Attempt to lock A (instead of vp_crossmp) while D is held would
  *    violate the global order, causing deadlocks.
  *
  * dounmount() locks B while F is drained.
  */
 int
 vfs_busy(struct mount *mp, int flags)
 {
 
 	MPASS((flags & ~MBF_MASK) == 0);
 	CTR3(KTR_VFS, "%s: mp %p with flags %d", __func__, mp, flags);
 
 	if (vfs_op_thread_enter(mp)) {
 		MPASS((mp->mnt_kern_flag & MNTK_DRAINING) == 0);
 		MPASS((mp->mnt_kern_flag & MNTK_UNMOUNT) == 0);
 		MPASS((mp->mnt_kern_flag & MNTK_REFEXPIRE) == 0);
 		vfs_mp_count_add_pcpu(mp, ref, 1);
 		vfs_mp_count_add_pcpu(mp, lockref, 1);
 		vfs_op_thread_exit(mp);
 		if (flags & MBF_MNTLSTLOCK)
 			mtx_unlock(&mountlist_mtx);
 		return (0);
 	}
 
 	MNT_ILOCK(mp);
 	vfs_assert_mount_counters(mp);
 	MNT_REF(mp);
 	/*
 	 * If mount point is currently being unmounted, sleep until the
 	 * mount point fate is decided.  If thread doing the unmounting fails,
 	 * it will clear MNTK_UNMOUNT flag before waking us up, indicating
 	 * that this mount point has survived the unmount attempt and vfs_busy
 	 * should retry.  Otherwise the unmounter thread will set MNTK_REFEXPIRE
 	 * flag in addition to MNTK_UNMOUNT, indicating that mount point is
 	 * about to be really destroyed.  vfs_busy needs to release its
 	 * reference on the mount point in this case and return with ENOENT,
 	 * telling the caller that mount mount it tried to busy is no longer
 	 * valid.
 	 */
 	while (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 		if (flags & MBF_NOWAIT || mp->mnt_kern_flag & MNTK_REFEXPIRE) {
 			MNT_REL(mp);
 			MNT_IUNLOCK(mp);
 			CTR1(KTR_VFS, "%s: failed busying before sleeping",
 			    __func__);
 			return (ENOENT);
 		}
 		if (flags & MBF_MNTLSTLOCK)
 			mtx_unlock(&mountlist_mtx);
 		mp->mnt_kern_flag |= MNTK_MWAIT;
 		msleep(mp, MNT_MTX(mp), PVFS | PDROP, "vfs_busy", 0);
 		if (flags & MBF_MNTLSTLOCK)
 			mtx_lock(&mountlist_mtx);
 		MNT_ILOCK(mp);
 	}
 	if (flags & MBF_MNTLSTLOCK)
 		mtx_unlock(&mountlist_mtx);
 	mp->mnt_lockref++;
 	MNT_IUNLOCK(mp);
 	return (0);
 }
 
 /*
  * Free a busy filesystem.
  */
 void
 vfs_unbusy(struct mount *mp)
 {
 	int c;
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 
 	if (vfs_op_thread_enter(mp)) {
 		MPASS((mp->mnt_kern_flag & MNTK_DRAINING) == 0);
 		vfs_mp_count_sub_pcpu(mp, lockref, 1);
 		vfs_mp_count_sub_pcpu(mp, ref, 1);
 		vfs_op_thread_exit(mp);
 		return;
 	}
 
 	MNT_ILOCK(mp);
 	vfs_assert_mount_counters(mp);
 	MNT_REL(mp);
 	c = --mp->mnt_lockref;
 	if (mp->mnt_vfs_ops == 0) {
 		MPASS((mp->mnt_kern_flag & MNTK_DRAINING) == 0);
 		MNT_IUNLOCK(mp);
 		return;
 	}
 	if (c < 0)
 		vfs_dump_mount_counters(mp);
 	if (c == 0 && (mp->mnt_kern_flag & MNTK_DRAINING) != 0) {
 		MPASS(mp->mnt_kern_flag & MNTK_UNMOUNT);
 		CTR1(KTR_VFS, "%s: waking up waiters", __func__);
 		mp->mnt_kern_flag &= ~MNTK_DRAINING;
 		wakeup(&mp->mnt_lockref);
 	}
 	MNT_IUNLOCK(mp);
 }
 
 /*
  * Lookup a mount point by filesystem identifier.
  */
 struct mount *
 vfs_getvfs(fsid_t *fsid)
 {
 	struct mount *mp;
 
 	CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid);
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 			vfs_ref(mp);
 			mtx_unlock(&mountlist_mtx);
 			return (mp);
 		}
 	}
 	mtx_unlock(&mountlist_mtx);
 	CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid);
 	return ((struct mount *) 0);
 }
 
 /*
  * Lookup a mount point by filesystem identifier, busying it before
  * returning.
  *
  * To avoid congestion on mountlist_mtx, implement simple direct-mapped
  * cache for popular filesystem identifiers.  The cache is lockess, using
  * the fact that struct mount's are never freed.  In worst case we may
  * get pointer to unmounted or even different filesystem, so we have to
  * check what we got, and go slow way if so.
  */
 struct mount *
 vfs_busyfs(fsid_t *fsid)
 {
 #define	FSID_CACHE_SIZE	256
 	typedef struct mount * volatile vmp_t;
 	static vmp_t cache[FSID_CACHE_SIZE];
 	struct mount *mp;
 	int error;
 	uint32_t hash;
 
 	CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid);
 	hash = fsid->val[0] ^ fsid->val[1];
 	hash = (hash >> 16 ^ hash) & (FSID_CACHE_SIZE - 1);
 	mp = cache[hash];
 	if (mp == NULL ||
 	    mp->mnt_stat.f_fsid.val[0] != fsid->val[0] ||
 	    mp->mnt_stat.f_fsid.val[1] != fsid->val[1])
 		goto slow;
 	if (vfs_busy(mp, 0) != 0) {
 		cache[hash] = NULL;
 		goto slow;
 	}
 	if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 	    mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
 		return (mp);
 	else
 	    vfs_unbusy(mp);
 
 slow:
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 			error = vfs_busy(mp, MBF_MNTLSTLOCK);
 			if (error) {
 				cache[hash] = NULL;
 				mtx_unlock(&mountlist_mtx);
 				return (NULL);
 			}
 			cache[hash] = mp;
 			return (mp);
 		}
 	}
 	CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid);
 	mtx_unlock(&mountlist_mtx);
 	return ((struct mount *) 0);
 }
 
 /*
  * Check if a user can access privileged mount options.
  */
 int
 vfs_suser(struct mount *mp, struct thread *td)
 {
 	int error;
 
 	if (jailed(td->td_ucred)) {
 		/*
 		 * If the jail of the calling thread lacks permission for
 		 * this type of file system, deny immediately.
 		 */
 		if (!prison_allow(td->td_ucred, mp->mnt_vfc->vfc_prison_flag))
 			return (EPERM);
 
 		/*
 		 * If the file system was mounted outside the jail of the
 		 * calling thread, deny immediately.
 		 */
 		if (prison_check(td->td_ucred, mp->mnt_cred) != 0)
 			return (EPERM);
 	}
 
 	/*
 	 * If file system supports delegated administration, we don't check
 	 * for the PRIV_VFS_MOUNT_OWNER privilege - it will be better verified
 	 * by the file system itself.
 	 * If this is not the user that did original mount, we check for
 	 * the PRIV_VFS_MOUNT_OWNER privilege.
 	 */
 	if (!(mp->mnt_vfc->vfc_flags & VFCF_DELEGADMIN) &&
 	    mp->mnt_cred->cr_uid != td->td_ucred->cr_uid) {
 		if ((error = priv_check(td, PRIV_VFS_MOUNT_OWNER)) != 0)
 			return (error);
 	}
 	return (0);
 }
 
 /*
  * Get a new unique fsid.  Try to make its val[0] unique, since this value
  * will be used to create fake device numbers for stat().  Also try (but
  * not so hard) make its val[0] unique mod 2^16, since some emulators only
  * support 16-bit device numbers.  We end up with unique val[0]'s for the
  * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls.
  *
  * Keep in mind that several mounts may be running in parallel.  Starting
  * the search one past where the previous search terminated is both a
  * micro-optimization and a defense against returning the same fsid to
  * different mounts.
  */
 void
 vfs_getnewfsid(struct mount *mp)
 {
 	static uint16_t mntid_base;
 	struct mount *nmp;
 	fsid_t tfsid;
 	int mtype;
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 	mtx_lock(&mntid_mtx);
 	mtype = mp->mnt_vfc->vfc_typenum;
 	tfsid.val[1] = mtype;
 	mtype = (mtype & 0xFF) << 24;
 	for (;;) {
 		tfsid.val[0] = makedev(255,
 		    mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF));
 		mntid_base++;
 		if ((nmp = vfs_getvfs(&tfsid)) == NULL)
 			break;
 		vfs_rel(nmp);
 	}
 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 	mp->mnt_stat.f_fsid.val[1] = tfsid.val[1];
 	mtx_unlock(&mntid_mtx);
 }
 
 /*
  * Knob to control the precision of file timestamps:
  *
  *   0 = seconds only; nanoseconds zeroed.
  *   1 = seconds and nanoseconds, accurate within 1/HZ.
  *   2 = seconds and nanoseconds, truncated to microseconds.
  * >=3 = seconds and nanoseconds, maximum precision.
  */
 enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC };
 
 static int timestamp_precision = TSP_USEC;
 SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW,
     &timestamp_precision, 0, "File timestamp precision (0: seconds, "
     "1: sec + ns accurate to 1/HZ, 2: sec + ns truncated to us, "
     "3+: sec + ns (max. precision))");
 
 /*
  * Get a current timestamp.
  */
 void
 vfs_timestamp(struct timespec *tsp)
 {
 	struct timeval tv;
 
 	switch (timestamp_precision) {
 	case TSP_SEC:
 		tsp->tv_sec = time_second;
 		tsp->tv_nsec = 0;
 		break;
 	case TSP_HZ:
 		getnanotime(tsp);
 		break;
 	case TSP_USEC:
 		microtime(&tv);
 		TIMEVAL_TO_TIMESPEC(&tv, tsp);
 		break;
 	case TSP_NSEC:
 	default:
 		nanotime(tsp);
 		break;
 	}
 }
 
 /*
  * Set vnode attributes to VNOVAL
  */
 void
 vattr_null(struct vattr *vap)
 {
 
 	vap->va_type = VNON;
 	vap->va_size = VNOVAL;
 	vap->va_bytes = VNOVAL;
 	vap->va_mode = VNOVAL;
 	vap->va_nlink = VNOVAL;
 	vap->va_uid = VNOVAL;
 	vap->va_gid = VNOVAL;
 	vap->va_fsid = VNOVAL;
 	vap->va_fileid = VNOVAL;
 	vap->va_blocksize = VNOVAL;
 	vap->va_rdev = VNOVAL;
 	vap->va_atime.tv_sec = VNOVAL;
 	vap->va_atime.tv_nsec = VNOVAL;
 	vap->va_mtime.tv_sec = VNOVAL;
 	vap->va_mtime.tv_nsec = VNOVAL;
 	vap->va_ctime.tv_sec = VNOVAL;
 	vap->va_ctime.tv_nsec = VNOVAL;
 	vap->va_birthtime.tv_sec = VNOVAL;
 	vap->va_birthtime.tv_nsec = VNOVAL;
 	vap->va_flags = VNOVAL;
 	vap->va_gen = VNOVAL;
 	vap->va_vaflags = 0;
 }
 
 /*
  * Try to reduce the total number of vnodes.
  *
  * This routine (and its user) are buggy in at least the following ways:
  * - all parameters were picked years ago when RAM sizes were significantly
  *   smaller
  * - it can pick vnodes based on pages used by the vm object, but filesystems
  *   like ZFS don't use it making the pick broken
  * - since ZFS has its own aging policy it gets partially combated by this one
  * - a dedicated method should be provided for filesystems to let them decide
  *   whether the vnode should be recycled
  *
  * This routine is called when we have too many vnodes.  It attempts
  * to free <count> vnodes and will potentially free vnodes that still
  * have VM backing store (VM backing store is typically the cause
  * of a vnode blowout so we want to do this).  Therefore, this operation
  * is not considered cheap.
  *
  * A number of conditions may prevent a vnode from being reclaimed.
  * the buffer cache may have references on the vnode, a directory
  * vnode may still have references due to the namei cache representing
  * underlying files, or the vnode may be in active use.   It is not
  * desirable to reuse such vnodes.  These conditions may cause the
  * number of vnodes to reach some minimum value regardless of what
  * you set kern.maxvnodes to.  Do not set kern.maxvnodes too low.
  *
  * @param reclaim_nc_src Only reclaim directories with outgoing namecache
  * 			 entries if this argument is strue
  * @param trigger	 Only reclaim vnodes with fewer than this many resident
  *			 pages.
  * @param target	 How many vnodes to reclaim.
  * @return		 The number of vnodes that were reclaimed.
  */
 static int
 vlrureclaim(bool reclaim_nc_src, int trigger, u_long target)
 {
 	struct vnode *vp, *mvp;
 	struct mount *mp;
 	u_long done;
 	bool retried;
 
 	mtx_assert(&vnode_list_mtx, MA_OWNED);
 
 	retried = false;
 	done = 0;
 
 	mvp = vnode_list_reclaim_marker;
 restart:
 	vp = mvp;
 	while (done < target) {
 		vp = TAILQ_NEXT(vp, v_vnodelist);
 		if (__predict_false(vp == NULL))
 			break;
 
 		if (__predict_false(vp->v_type == VMARKER))
 			continue;
 
 		/*
 		 * If it's been deconstructed already, it's still
 		 * referenced, or it exceeds the trigger, skip it.
 		 * Also skip free vnodes.  We are trying to make space
 		 * to expand the free list, not reduce it.
 		 */
 		if (vp->v_usecount > 0 || vp->v_holdcnt == 0 ||
 		    (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)))
 			goto next_iter;
 
 		if (vp->v_type == VBAD || vp->v_type == VNON)
 			goto next_iter;
 
 		if (!VI_TRYLOCK(vp))
 			goto next_iter;
 
 		if (vp->v_usecount > 0 || vp->v_holdcnt == 0 ||
 		    (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) ||
 		    vp->v_type == VBAD || vp->v_type == VNON ||
 		    (vp->v_object != NULL &&
 		    vp->v_object->resident_page_count > trigger)) {
 			VI_UNLOCK(vp);
 			goto next_iter;
 		}
 		vholdl(vp);
 		VI_UNLOCK(vp);
 		TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
 		TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist);
 		mtx_unlock(&vnode_list_mtx);
 
 		if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
 			vdrop(vp);
 			goto next_iter_unlocked;
 		}
 		if (VOP_LOCK(vp, LK_EXCLUSIVE|LK_NOWAIT) != 0) {
 			vdrop(vp);
 			vn_finished_write(mp);
 			goto next_iter_unlocked;
 		}
 
 		VI_LOCK(vp);
 		if (vp->v_usecount > 0 ||
 		    (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) ||
 		    (vp->v_object != NULL &&
 		    vp->v_object->resident_page_count > trigger)) {
 			VOP_UNLOCK(vp);
 			vdropl(vp);
 			vn_finished_write(mp);
 			goto next_iter_unlocked;
 		}
 		counter_u64_add(recycles_count, 1);
 		vgonel(vp);
 		VOP_UNLOCK(vp);
 		vdropl(vp);
 		vn_finished_write(mp);
 		done++;
 next_iter_unlocked:
 		if (should_yield())
 			kern_yield(PRI_USER);
 		mtx_lock(&vnode_list_mtx);
 		goto restart;
 next_iter:
 		MPASS(vp->v_type != VMARKER);
 		if (!should_yield())
 			continue;
 		TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
 		TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist);
 		mtx_unlock(&vnode_list_mtx);
 		kern_yield(PRI_USER);
 		mtx_lock(&vnode_list_mtx);
 		goto restart;
 	}
 	if (done == 0 && !retried) {
 		TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
 		TAILQ_INSERT_HEAD(&vnode_list, mvp, v_vnodelist);
 		retried = true;
 		goto restart;
 	}
 	return (done);
 }
 
 static int max_vnlru_free = 10000; /* limit on vnode free requests per call */
 SYSCTL_INT(_debug, OID_AUTO, max_vnlru_free, CTLFLAG_RW, &max_vnlru_free,
     0,
     "limit on vnode free requests per call to the vnlru_free routine");
 
 /*
  * Attempt to reduce the free list by the requested amount.
  */
 static int
 vnlru_free_locked(int count, struct vfsops *mnt_op)
 {
 	struct vnode *vp, *mvp;
 	struct mount *mp;
 	int ocount;
 
 	mtx_assert(&vnode_list_mtx, MA_OWNED);
 	if (count > max_vnlru_free)
 		count = max_vnlru_free;
 	ocount = count;
 	mvp = vnode_list_free_marker;
 restart:
 	vp = mvp;
 	while (count > 0) {
 		vp = TAILQ_NEXT(vp, v_vnodelist);
 		if (__predict_false(vp == NULL)) {
 			TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
 			TAILQ_INSERT_TAIL(&vnode_list, mvp, v_vnodelist);
 			break;
 		}
 		if (__predict_false(vp->v_type == VMARKER))
 			continue;
 
 		/*
 		 * Don't recycle if our vnode is from different type
 		 * of mount point.  Note that mp is type-safe, the
 		 * check does not reach unmapped address even if
 		 * vnode is reclaimed.
 		 * Don't recycle if we can't get the interlock without
 		 * blocking.
 		 */
 		if (vp->v_holdcnt > 0 || (mnt_op != NULL && (mp = vp->v_mount) != NULL &&
 		    mp->mnt_op != mnt_op) || !VI_TRYLOCK(vp)) {
 			continue;
 		}
 		TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
 		TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist);
 		if (__predict_false(vp->v_type == VBAD || vp->v_type == VNON)) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		vholdl(vp);
 		count--;
 		mtx_unlock(&vnode_list_mtx);
 		VI_UNLOCK(vp);
 		vtryrecycle(vp);
 		vdrop(vp);
 		mtx_lock(&vnode_list_mtx);
 		goto restart;
 	}
 	return (ocount - count);
 }
 
 void
 vnlru_free(int count, struct vfsops *mnt_op)
 {
 
 	mtx_lock(&vnode_list_mtx);
 	vnlru_free_locked(count, mnt_op);
 	mtx_unlock(&vnode_list_mtx);
 }
 
 static void
 vnlru_recalc(void)
 {
 
 	mtx_assert(&vnode_list_mtx, MA_OWNED);
 	gapvnodes = imax(desiredvnodes - wantfreevnodes, 100);
 	vhiwat = gapvnodes / 11; /* 9% -- just under the 10% in vlrureclaim() */
 	vlowat = vhiwat / 2;
 }
 
 /*
  * Attempt to recycle vnodes in a context that is always safe to block.
  * Calling vlrurecycle() from the bowels of filesystem code has some
  * interesting deadlock problems.
  */
 static struct proc *vnlruproc;
 static int vnlruproc_sig;
 
 /*
  * The main freevnodes counter is only updated when threads requeue their vnode
  * batches. CPUs are conditionally walked to compute a more accurate total.
  *
  * Limit how much of a slop are we willing to tolerate. Note: the actual value
  * at any given moment can still exceed slop, but it should not be by significant
  * margin in practice.
  */
 #define VNLRU_FREEVNODES_SLOP 128
 
 static u_long
 vnlru_read_freevnodes(void)
 {
 	struct vdbatch *vd;
 	long slop;
 	int cpu;
 
 	mtx_assert(&vnode_list_mtx, MA_OWNED);
 	if (freevnodes > freevnodes_old)
 		slop = freevnodes - freevnodes_old;
 	else
 		slop = freevnodes_old - freevnodes;
 	if (slop < VNLRU_FREEVNODES_SLOP)
 		return (freevnodes >= 0 ? freevnodes : 0);
 	freevnodes_old = freevnodes;
 	CPU_FOREACH(cpu) {
 		vd = DPCPU_ID_PTR((cpu), vd);
 		freevnodes_old += vd->freevnodes;
 	}
 	return (freevnodes_old >= 0 ? freevnodes_old : 0);
 }
 
 static bool
 vnlru_under(u_long rnumvnodes, u_long limit)
 {
 	u_long rfreevnodes, space;
 
 	if (__predict_false(rnumvnodes > desiredvnodes))
 		return (true);
 
 	space = desiredvnodes - rnumvnodes;
 	if (space < limit) {
 		rfreevnodes = vnlru_read_freevnodes();
 		if (rfreevnodes > wantfreevnodes)
 			space += rfreevnodes - wantfreevnodes;
 	}
 	return (space < limit);
 }
 
 static bool
 vnlru_under_unlocked(u_long rnumvnodes, u_long limit)
 {
 	long rfreevnodes, space;
 
 	if (__predict_false(rnumvnodes > desiredvnodes))
 		return (true);
 
 	space = desiredvnodes - rnumvnodes;
 	if (space < limit) {
 		rfreevnodes = atomic_load_long(&freevnodes);
 		if (rfreevnodes > wantfreevnodes)
 			space += rfreevnodes - wantfreevnodes;
 	}
 	return (space < limit);
 }
 
 static void
 vnlru_kick(void)
 {
 
 	mtx_assert(&vnode_list_mtx, MA_OWNED);
 	if (vnlruproc_sig == 0) {
 		vnlruproc_sig = 1;
 		wakeup(vnlruproc);
 	}
 }
 
 static void
 vnlru_proc(void)
 {
 	u_long rnumvnodes, rfreevnodes, target;
 	unsigned long onumvnodes;
 	int done, force, trigger, usevnodes;
 	bool reclaim_nc_src, want_reread;
 
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, vnlruproc,
 	    SHUTDOWN_PRI_FIRST);
 
 	force = 0;
 	want_reread = false;
 	for (;;) {
 		kproc_suspend_check(vnlruproc);
 		mtx_lock(&vnode_list_mtx);
 		rnumvnodes = atomic_load_long(&numvnodes);
 
 		if (want_reread) {
 			force = vnlru_under(numvnodes, vhiwat) ? 1 : 0;
 			want_reread = false;
 		}
 
 		/*
 		 * If numvnodes is too large (due to desiredvnodes being
 		 * adjusted using its sysctl, or emergency growth), first
 		 * try to reduce it by discarding from the free list.
 		 */
 		if (rnumvnodes > desiredvnodes) {
 			vnlru_free_locked(rnumvnodes - desiredvnodes, NULL);
 			rnumvnodes = atomic_load_long(&numvnodes);
 		}
 		/*
 		 * Sleep if the vnode cache is in a good state.  This is
 		 * when it is not over-full and has space for about a 4%
 		 * or 9% expansion (by growing its size or inexcessively
 		 * reducing its free list).  Otherwise, try to reclaim
 		 * space for a 10% expansion.
 		 */
 		if (vstir && force == 0) {
 			force = 1;
 			vstir = 0;
 		}
 		if (force == 0 && !vnlru_under(rnumvnodes, vlowat)) {
 			vnlruproc_sig = 0;
 			wakeup(&vnlruproc_sig);
 			msleep(vnlruproc, &vnode_list_mtx,
 			    PVFS|PDROP, "vlruwt", hz);
 			continue;
 		}
 		rfreevnodes = vnlru_read_freevnodes();
 
 		onumvnodes = rnumvnodes;
 		/*
 		 * Calculate parameters for recycling.  These are the same
 		 * throughout the loop to give some semblance of fairness.
 		 * The trigger point is to avoid recycling vnodes with lots
 		 * of resident pages.  We aren't trying to free memory; we
 		 * are trying to recycle or at least free vnodes.
 		 */
 		if (rnumvnodes <= desiredvnodes)
 			usevnodes = rnumvnodes - rfreevnodes;
 		else
 			usevnodes = rnumvnodes;
 		if (usevnodes <= 0)
 			usevnodes = 1;
 		/*
 		 * The trigger value is is chosen to give a conservatively
 		 * large value to ensure that it alone doesn't prevent
 		 * making progress.  The value can easily be so large that
 		 * it is effectively infinite in some congested and
 		 * misconfigured cases, and this is necessary.  Normally
 		 * it is about 8 to 100 (pages), which is quite large.
 		 */
 		trigger = vm_cnt.v_page_count * 2 / usevnodes;
 		if (force < 2)
 			trigger = vsmalltrigger;
 		reclaim_nc_src = force >= 3;
 		target = rnumvnodes * (int64_t)gapvnodes / imax(desiredvnodes, 1);
 		target = target / 10 + 1;
 		done = vlrureclaim(reclaim_nc_src, trigger, target);
 		mtx_unlock(&vnode_list_mtx);
 		if (onumvnodes > desiredvnodes && numvnodes <= desiredvnodes)
 			uma_reclaim(UMA_RECLAIM_DRAIN);
 		if (done == 0) {
 			if (force == 0 || force == 1) {
 				force = 2;
 				continue;
 			}
 			if (force == 2) {
 				force = 3;
 				continue;
 			}
 			want_reread = true;
 			force = 0;
 			vnlru_nowhere++;
 			tsleep(vnlruproc, PPAUSE, "vlrup", hz * 3);
 		} else {
 			want_reread = true;
 			kern_yield(PRI_USER);
 		}
 	}
 }
 
 static struct kproc_desc vnlru_kp = {
 	"vnlru",
 	vnlru_proc,
 	&vnlruproc
 };
 SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start,
     &vnlru_kp);
  
 /*
  * Routines having to do with the management of the vnode table.
  */
 
 /*
  * Try to recycle a freed vnode.  We abort if anyone picks up a reference
  * before we actually vgone().  This function must be called with the vnode
  * held to prevent the vnode from being returned to the free list midway
  * through vgone().
  */
 static int
 vtryrecycle(struct vnode *vp)
 {
 	struct mount *vnmp;
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	VNASSERT(vp->v_holdcnt, vp,
 	    ("vtryrecycle: Recycling vp %p without a reference.", vp));
 	/*
 	 * This vnode may found and locked via some other list, if so we
 	 * can't recycle it yet.
 	 */
 	if (VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 		CTR2(KTR_VFS,
 		    "%s: impossible to recycle, vp %p lock is already held",
 		    __func__, vp);
 		return (EWOULDBLOCK);
 	}
 	/*
 	 * Don't recycle if its filesystem is being suspended.
 	 */
 	if (vn_start_write(vp, &vnmp, V_NOWAIT) != 0) {
 		VOP_UNLOCK(vp);
 		CTR2(KTR_VFS,
 		    "%s: impossible to recycle, cannot start the write for %p",
 		    __func__, vp);
 		return (EBUSY);
 	}
 	/*
 	 * If we got this far, we need to acquire the interlock and see if
 	 * anyone picked up this vnode from another list.  If not, we will
 	 * mark it with DOOMED via vgonel() so that anyone who does find it
 	 * will skip over it.
 	 */
 	VI_LOCK(vp);
 	if (vp->v_usecount) {
 		VOP_UNLOCK(vp);
 		VI_UNLOCK(vp);
 		vn_finished_write(vnmp);
 		CTR2(KTR_VFS,
 		    "%s: impossible to recycle, %p is already referenced",
 		    __func__, vp);
 		return (EBUSY);
 	}
 	if (!VN_IS_DOOMED(vp)) {
 		counter_u64_add(recycles_free_count, 1);
 		vgonel(vp);
 	}
 	VOP_UNLOCK(vp);
 	VI_UNLOCK(vp);
 	vn_finished_write(vnmp);
 	return (0);
 }
 
 /*
  * Allocate a new vnode.
  *
  * The operation never returns an error. Returning an error was disabled
  * in r145385 (dated 2005) with the following comment:
  *
  * XXX Not all VFS_VGET/ffs_vget callers check returns.
  *
  * Given the age of this commit (almost 15 years at the time of writing this
  * comment) restoring the ability to fail requires a significant audit of
  * all codepaths.
  *
  * The routine can try to free a vnode or stall for up to 1 second waiting for
  * vnlru to clear things up, but ultimately always performs a M_WAITOK allocation.
  */
 static u_long vn_alloc_cyclecount;
 
 static struct vnode * __noinline
 vn_alloc_hard(struct mount *mp)
 {
 	u_long rnumvnodes, rfreevnodes;
 
 	mtx_lock(&vnode_list_mtx);
 	rnumvnodes = atomic_load_long(&numvnodes);
 	if (rnumvnodes + 1 < desiredvnodes) {
 		vn_alloc_cyclecount = 0;
 		goto alloc;
 	}
 	rfreevnodes = vnlru_read_freevnodes();
 	if (vn_alloc_cyclecount++ >= rfreevnodes) {
 		vn_alloc_cyclecount = 0;
 		vstir = 1;
 	}
 	/*
 	 * Grow the vnode cache if it will not be above its target max
 	 * after growing.  Otherwise, if the free list is nonempty, try
 	 * to reclaim 1 item from it before growing the cache (possibly
 	 * above its target max if the reclamation failed or is delayed).
 	 * Otherwise, wait for some space.  In all cases, schedule
 	 * vnlru_proc() if we are getting short of space.  The watermarks
 	 * should be chosen so that we never wait or even reclaim from
 	 * the free list to below its target minimum.
 	 */
 	if (vnlru_free_locked(1, NULL) > 0)
 		goto alloc;
 	if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
 		/*
 		 * Wait for space for a new vnode.
 		 */
 		vnlru_kick();
 		msleep(&vnlruproc_sig, &vnode_list_mtx, PVFS, "vlruwk", hz);
 		if (atomic_load_long(&numvnodes) + 1 > desiredvnodes &&
 		    vnlru_read_freevnodes() > 1)
 			vnlru_free_locked(1, NULL);
 	}
 alloc:
 	rnumvnodes = atomic_fetchadd_long(&numvnodes, 1) + 1;
 	if (vnlru_under(rnumvnodes, vlowat))
 		vnlru_kick();
 	mtx_unlock(&vnode_list_mtx);
 	return (uma_zalloc(vnode_zone, M_WAITOK));
 }
 
 static struct vnode *
 vn_alloc(struct mount *mp)
 {
 	u_long rnumvnodes;
 
 	if (__predict_false(vn_alloc_cyclecount != 0))
 		return (vn_alloc_hard(mp));
 	rnumvnodes = atomic_fetchadd_long(&numvnodes, 1) + 1;
 	if (__predict_false(vnlru_under_unlocked(rnumvnodes, vlowat))) {
 		atomic_subtract_long(&numvnodes, 1);
 		return (vn_alloc_hard(mp));
 	}
 
 	return (uma_zalloc(vnode_zone, M_WAITOK));
 }
 
 static void
 vn_free(struct vnode *vp)
 {
 
 	atomic_subtract_long(&numvnodes, 1);
 	uma_zfree(vnode_zone, vp);
 }
 
 /*
  * Return the next vnode from the free list.
  */
 int
 getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
     struct vnode **vpp)
 {
 	struct vnode *vp;
 	struct thread *td;
 	struct lock_object *lo;
 
 	CTR3(KTR_VFS, "%s: mp %p with tag %s", __func__, mp, tag);
 
 	KASSERT(vops->registered,
 	    ("%s: not registered vector op %p\n", __func__, vops));
 
 	td = curthread;
 	if (td->td_vp_reserved != NULL) {
 		vp = td->td_vp_reserved;
 		td->td_vp_reserved = NULL;
 	} else {
 		vp = vn_alloc(mp);
 	}
 	counter_u64_add(vnodes_created, 1);
 	/*
 	 * Locks are given the generic name "vnode" when created.
 	 * Follow the historic practice of using the filesystem
 	 * name when they allocated, e.g., "zfs", "ufs", "nfs, etc.
 	 *
 	 * Locks live in a witness group keyed on their name. Thus,
 	 * when a lock is renamed, it must also move from the witness
 	 * group of its old name to the witness group of its new name.
 	 *
 	 * The change only needs to be made when the vnode moves
 	 * from one filesystem type to another. We ensure that each
 	 * filesystem use a single static name pointer for its tag so
 	 * that we can compare pointers rather than doing a strcmp().
 	 */
 	lo = &vp->v_vnlock->lock_object;
 #ifdef WITNESS
 	if (lo->lo_name != tag) {
 #endif
 		lo->lo_name = tag;
 #ifdef WITNESS
 		WITNESS_DESTROY(lo);
 		WITNESS_INIT(lo, tag);
 	}
 #endif
 	/*
 	 * By default, don't allow shared locks unless filesystems opt-in.
 	 */
 	vp->v_vnlock->lock_object.lo_flags |= LK_NOSHARE;
 	/*
 	 * Finalize various vnode identity bits.
 	 */
 	KASSERT(vp->v_object == NULL, ("stale v_object %p", vp));
 	KASSERT(vp->v_lockf == NULL, ("stale v_lockf %p", vp));
 	KASSERT(vp->v_pollinfo == NULL, ("stale v_pollinfo %p", vp));
 	vp->v_type = VNON;
 	vp->v_op = vops;
 	v_init_counters(vp);
 	vp->v_bufobj.bo_ops = &buf_ops_bio;
 #ifdef DIAGNOSTIC
 	if (mp == NULL && vops != &dead_vnodeops)
 		printf("NULL mp in getnewvnode(9), tag %s\n", tag);
 #endif
 #ifdef MAC
 	mac_vnode_init(vp);
 	if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0)
 		mac_vnode_associate_singlelabel(mp, vp);
 #endif
 	if (mp != NULL) {
 		vp->v_bufobj.bo_bsize = mp->mnt_stat.f_iosize;
 		if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0)
 			vp->v_vflag |= VV_NOKNOTE;
 	}
 
 	/*
 	 * For the filesystems which do not use vfs_hash_insert(),
 	 * still initialize v_hash to have vfs_hash_index() useful.
 	 * E.g., nullfs uses vfs_hash_index() on the lower vnode for
 	 * its own hashing.
 	 */
 	vp->v_hash = (uintptr_t)vp >> vnsz2log;
 
 	*vpp = vp;
 	return (0);
 }
 
 void
 getnewvnode_reserve(void)
 {
 	struct thread *td;
 
 	td = curthread;
 	MPASS(td->td_vp_reserved == NULL);
 	td->td_vp_reserved = vn_alloc(NULL);
 }
 
 void
 getnewvnode_drop_reserve(void)
 {
 	struct thread *td;
 
 	td = curthread;
 	if (td->td_vp_reserved != NULL) {
 		vn_free(td->td_vp_reserved);
 		td->td_vp_reserved = NULL;
 	}
 }
 
 static void
 freevnode(struct vnode *vp)
 {
 	struct bufobj *bo;
 
 	/*
 	 * The vnode has been marked for destruction, so free it.
 	 *
 	 * The vnode will be returned to the zone where it will
 	 * normally remain until it is needed for another vnode. We
 	 * need to cleanup (or verify that the cleanup has already
 	 * been done) any residual data left from its current use
 	 * so as not to contaminate the freshly allocated vnode.
 	 */
 	CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp);
 	bo = &vp->v_bufobj;
 	VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't"));
 	VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count"));
 	VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count"));
 	VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count"));
 	VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's"));
 	VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0"));
 	VNASSERT(pctrie_is_empty(&bo->bo_clean.bv_root), vp,
 	    ("clean blk trie not empty"));
 	VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0"));
 	VNASSERT(pctrie_is_empty(&bo->bo_dirty.bv_root), vp,
 	    ("dirty blk trie not empty"));
 	VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst"));
 	VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src"));
 	VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for .."));
 	VNASSERT(TAILQ_EMPTY(&vp->v_rl.rl_waiters), vp,
 	    ("Dangling rangelock waiters"));
 	VI_UNLOCK(vp);
 #ifdef MAC
 	mac_vnode_destroy(vp);
 #endif
 	if (vp->v_pollinfo != NULL) {
 		destroy_vpollinfo(vp->v_pollinfo);
 		vp->v_pollinfo = NULL;
 	}
 #ifdef INVARIANTS
 	/* XXX Elsewhere we detect an already freed vnode via NULL v_op. */
 	vp->v_op = NULL;
 #endif
 	vp->v_mountedhere = NULL;
 	vp->v_unpcb = NULL;
 	vp->v_rdev = NULL;
 	vp->v_fifoinfo = NULL;
 	vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
 	vp->v_irflag = 0;
 	vp->v_iflag = 0;
 	vp->v_vflag = 0;
 	bo->bo_flag = 0;
 	vn_free(vp);
 }
 
 /*
  * Delete from old mount point vnode list, if on one.
  */
 static void
 delmntque(struct vnode *vp)
 {
 	struct mount *mp;
 
 	mp = vp->v_mount;
 	if (mp == NULL)
 		return;
 	MNT_ILOCK(mp);
 	VI_LOCK(vp);
 	if (vp->v_mflag & VMP_LAZYLIST) {
 		mtx_lock(&mp->mnt_listmtx);
 		if (vp->v_mflag & VMP_LAZYLIST) {
 			vp->v_mflag &= ~VMP_LAZYLIST;
 			TAILQ_REMOVE(&mp->mnt_lazyvnodelist, vp, v_lazylist);
 			mp->mnt_lazyvnodelistsize--;
 		}
 		mtx_unlock(&mp->mnt_listmtx);
 	}
 	vp->v_mount = NULL;
 	VI_UNLOCK(vp);
 	VNASSERT(mp->mnt_nvnodelistsize > 0, vp,
 		("bad mount point vnode list size"));
 	TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
 	mp->mnt_nvnodelistsize--;
 	MNT_REL(mp);
 	MNT_IUNLOCK(mp);
 }
 
 static void
 insmntque_stddtr(struct vnode *vp, void *dtr_arg)
 {
 
 	vp->v_data = NULL;
 	vp->v_op = &dead_vnodeops;
 	vgone(vp);
 	vput(vp);
 }
 
 /*
  * Insert into list of vnodes for the new mount point, if available.
  */
 int
 insmntque1(struct vnode *vp, struct mount *mp,
 	void (*dtr)(struct vnode *, void *), void *dtr_arg)
 {
 
 	KASSERT(vp->v_mount == NULL,
 		("insmntque: vnode already on per mount vnode list"));
 	VNASSERT(mp != NULL, vp, ("Don't call insmntque(foo, NULL)"));
 	ASSERT_VOP_ELOCKED(vp, "insmntque: non-locked vp");
 
 	/*
 	 * We acquire the vnode interlock early to ensure that the
 	 * vnode cannot be recycled by another process releasing a
 	 * holdcnt on it before we get it on both the vnode list
 	 * and the active vnode list. The mount mutex protects only
 	 * manipulation of the vnode list and the vnode freelist
 	 * mutex protects only manipulation of the active vnode list.
 	 * Hence the need to hold the vnode interlock throughout.
 	 */
 	MNT_ILOCK(mp);
 	VI_LOCK(vp);
 	if (((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 &&
 	    ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0 ||
 	    mp->mnt_nvnodelistsize == 0)) &&
 	    (vp->v_vflag & VV_FORCEINSMQ) == 0) {
 		VI_UNLOCK(vp);
 		MNT_IUNLOCK(mp);
 		if (dtr != NULL)
 			dtr(vp, dtr_arg);
 		return (EBUSY);
 	}
 	vp->v_mount = mp;
 	MNT_REF(mp);
 	TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
 	VNASSERT(mp->mnt_nvnodelistsize >= 0, vp,
 		("neg mount point vnode list size"));
 	mp->mnt_nvnodelistsize++;
 	VI_UNLOCK(vp);
 	MNT_IUNLOCK(mp);
 	return (0);
 }
 
 int
 insmntque(struct vnode *vp, struct mount *mp)
 {
 
 	return (insmntque1(vp, mp, insmntque_stddtr, NULL));
 }
 
 /*
  * Flush out and invalidate all buffers associated with a bufobj
  * Called with the underlying object locked.
  */
 int
 bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo)
 {
 	int error;
 
 	BO_LOCK(bo);
 	if (flags & V_SAVE) {
 		error = bufobj_wwait(bo, slpflag, slptimeo);
 		if (error) {
 			BO_UNLOCK(bo);
 			return (error);
 		}
 		if (bo->bo_dirty.bv_cnt > 0) {
 			BO_UNLOCK(bo);
 			if ((error = BO_SYNC(bo, MNT_WAIT)) != 0)
 				return (error);
 			/*
 			 * XXX We could save a lock/unlock if this was only
 			 * enabled under INVARIANTS
 			 */
 			BO_LOCK(bo);
 			if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)
 				panic("vinvalbuf: dirty bufs");
 		}
 	}
 	/*
 	 * If you alter this loop please notice that interlock is dropped and
 	 * reacquired in flushbuflist.  Special care is needed to ensure that
 	 * no race conditions occur from this.
 	 */
 	do {
 		error = flushbuflist(&bo->bo_clean,
 		    flags, bo, slpflag, slptimeo);
 		if (error == 0 && !(flags & V_CLEANONLY))
 			error = flushbuflist(&bo->bo_dirty,
 			    flags, bo, slpflag, slptimeo);
 		if (error != 0 && error != EAGAIN) {
 			BO_UNLOCK(bo);
 			return (error);
 		}
 	} while (error != 0);
 
 	/*
 	 * Wait for I/O to complete.  XXX needs cleaning up.  The vnode can
 	 * have write I/O in-progress but if there is a VM object then the
 	 * VM object can also have read-I/O in-progress.
 	 */
 	do {
 		bufobj_wwait(bo, 0, 0);
 		if ((flags & V_VMIO) == 0 && bo->bo_object != NULL) {
 			BO_UNLOCK(bo);
 			vm_object_pip_wait_unlocked(bo->bo_object, "bovlbx");
 			BO_LOCK(bo);
 		}
 	} while (bo->bo_numoutput > 0);
 	BO_UNLOCK(bo);
 
 	/*
 	 * Destroy the copy in the VM cache, too.
 	 */
 	if (bo->bo_object != NULL &&
 	    (flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO)) == 0) {
 		VM_OBJECT_WLOCK(bo->bo_object);
 		vm_object_page_remove(bo->bo_object, 0, 0, (flags & V_SAVE) ?
 		    OBJPR_CLEANONLY : 0);
 		VM_OBJECT_WUNLOCK(bo->bo_object);
 	}
 
 #ifdef INVARIANTS
 	BO_LOCK(bo);
 	if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO |
 	    V_ALLOWCLEAN)) == 0 && (bo->bo_dirty.bv_cnt > 0 ||
 	    bo->bo_clean.bv_cnt > 0))
 		panic("vinvalbuf: flush failed");
 	if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO)) == 0 &&
 	    bo->bo_dirty.bv_cnt > 0)
 		panic("vinvalbuf: flush dirty failed");
 	BO_UNLOCK(bo);
 #endif
 	return (0);
 }
 
 /*
  * Flush out and invalidate all buffers associated with a vnode.
  * Called with the underlying object locked.
  */
 int
 vinvalbuf(struct vnode *vp, int flags, int slpflag, int slptimeo)
 {
 
 	CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags);
 	ASSERT_VOP_LOCKED(vp, "vinvalbuf");
 	if (vp->v_object != NULL && vp->v_object->handle != vp)
 		return (0);
 	return (bufobj_invalbuf(&vp->v_bufobj, flags, slpflag, slptimeo));
 }
 
 /*
  * Flush out buffers on the specified list.
  *
  */
 static int
 flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag,
     int slptimeo)
 {
 	struct buf *bp, *nbp;
 	int retval, error;
 	daddr_t lblkno;
 	b_xflags_t xflags;
 
 	ASSERT_BO_WLOCKED(bo);
 
 	retval = 0;
 	TAILQ_FOREACH_SAFE(bp, &bufv->bv_hd, b_bobufs, nbp) {
 		/*
 		 * If we are flushing both V_NORMAL and V_ALT buffers then
 		 * do not skip any buffers. If we are flushing only V_NORMAL
 		 * buffers then skip buffers marked as BX_ALTDATA. If we are
 		 * flushing only V_ALT buffers then skip buffers not marked
 		 * as BX_ALTDATA.
 		 */
 		if (((flags & (V_NORMAL | V_ALT)) != (V_NORMAL | V_ALT)) &&
 		   (((flags & V_NORMAL) && (bp->b_xflags & BX_ALTDATA) != 0) ||
 		    ((flags & V_ALT) && (bp->b_xflags & BX_ALTDATA) == 0))) {
 			continue;
 		}
 		if (nbp != NULL) {
 			lblkno = nbp->b_lblkno;
 			xflags = nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN);
 		}
 		retval = EAGAIN;
 		error = BUF_TIMELOCK(bp,
 		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo),
 		    "flushbuf", slpflag, slptimeo);
 		if (error) {
 			BO_LOCK(bo);
 			return (error != ENOLCK ? error : EAGAIN);
 		}
 		KASSERT(bp->b_bufobj == bo,
 		    ("bp %p wrong b_bufobj %p should be %p",
 		    bp, bp->b_bufobj, bo));
 		/*
 		 * XXX Since there are no node locks for NFS, I
 		 * believe there is a slight chance that a delayed
 		 * write will occur while sleeping just above, so
 		 * check for it.
 		 */
 		if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) &&
 		    (flags & V_SAVE)) {
 			bremfree(bp);
 			bp->b_flags |= B_ASYNC;
 			bwrite(bp);
 			BO_LOCK(bo);
 			return (EAGAIN);	/* XXX: why not loop ? */
 		}
 		bremfree(bp);
 		bp->b_flags |= (B_INVAL | B_RELBUF);
 		bp->b_flags &= ~B_ASYNC;
 		brelse(bp);
 		BO_LOCK(bo);
 		if (nbp == NULL)
 			break;
 		nbp = gbincore(bo, lblkno);
 		if (nbp == NULL || (nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN))
 		    != xflags)
 			break;			/* nbp invalid */
 	}
 	return (retval);
 }
 
 int
 bnoreuselist(struct bufv *bufv, struct bufobj *bo, daddr_t startn, daddr_t endn)
 {
 	struct buf *bp;
 	int error;
 	daddr_t lblkno;
 
 	ASSERT_BO_LOCKED(bo);
 
 	for (lblkno = startn;;) {
 again:
 		bp = BUF_PCTRIE_LOOKUP_GE(&bufv->bv_root, lblkno);
 		if (bp == NULL || bp->b_lblkno >= endn ||
 		    bp->b_lblkno < startn)
 			break;
 		error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
 		    LK_INTERLOCK, BO_LOCKPTR(bo), "brlsfl", 0, 0);
 		if (error != 0) {
 			BO_RLOCK(bo);
 			if (error == ENOLCK)
 				goto again;
 			return (error);
 		}
 		KASSERT(bp->b_bufobj == bo,
 		    ("bp %p wrong b_bufobj %p should be %p",
 		    bp, bp->b_bufobj, bo));
 		lblkno = bp->b_lblkno + 1;
 		if ((bp->b_flags & B_MANAGED) == 0)
 			bremfree(bp);
 		bp->b_flags |= B_RELBUF;
 		/*
 		 * In the VMIO case, use the B_NOREUSE flag to hint that the
 		 * pages backing each buffer in the range are unlikely to be
 		 * reused.  Dirty buffers will have the hint applied once
 		 * they've been written.
 		 */
 		if ((bp->b_flags & B_VMIO) != 0)
 			bp->b_flags |= B_NOREUSE;
 		brelse(bp);
 		BO_RLOCK(bo);
 	}
 	return (0);
 }
 
 /*
  * Truncate a file's buffer and pages to a specified length.  This
  * is in lieu of the old vinvalbuf mechanism, which performed unneeded
  * sync activity.
  */
 int
 vtruncbuf(struct vnode *vp, off_t length, int blksize)
 {
 	struct buf *bp, *nbp;
 	struct bufobj *bo;
 	daddr_t startlbn;
 
 	CTR4(KTR_VFS, "%s: vp %p with block %d:%ju", __func__,
 	    vp, blksize, (uintmax_t)length);
 
 	/*
 	 * Round up to the *next* lbn.
 	 */
 	startlbn = howmany(length, blksize);
 
 	ASSERT_VOP_LOCKED(vp, "vtruncbuf");
 
 	bo = &vp->v_bufobj;
 restart_unlocked:
 	BO_LOCK(bo);
 
 	while (v_inval_buf_range_locked(vp, bo, startlbn, INT64_MAX) == EAGAIN)
 		;
 
 	if (length > 0) {
 restartsync:
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 			if (bp->b_lblkno > 0)
 				continue;
 			/*
 			 * Since we hold the vnode lock this should only
 			 * fail if we're racing with the buf daemon.
 			 */
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo)) == ENOLCK)
 				goto restart_unlocked;
 
 			VNASSERT((bp->b_flags & B_DELWRI), vp,
 			    ("buf(%p) on dirty queue without DELWRI", bp));
 
 			bremfree(bp);
 			bawrite(bp);
 			BO_LOCK(bo);
 			goto restartsync;
 		}
 	}
 
 	bufobj_wwait(bo, 0, 0);
 	BO_UNLOCK(bo);
 	vnode_pager_setsize(vp, length);
 
 	return (0);
 }
 
 /*
  * Invalidate the cached pages of a file's buffer within the range of block
  * numbers [startlbn, endlbn).
  */
 void
 v_inval_buf_range(struct vnode *vp, daddr_t startlbn, daddr_t endlbn,
     int blksize)
 {
 	struct bufobj *bo;
 	off_t start, end;
 
 	ASSERT_VOP_LOCKED(vp, "v_inval_buf_range");
 
 	start = blksize * startlbn;
 	end = blksize * endlbn;
 
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
 	MPASS(blksize == bo->bo_bsize);
 
 	while (v_inval_buf_range_locked(vp, bo, startlbn, endlbn) == EAGAIN)
 		;
 
 	BO_UNLOCK(bo);
 	vn_pages_remove(vp, OFF_TO_IDX(start), OFF_TO_IDX(end + PAGE_SIZE - 1));
 }
 
 static int
 v_inval_buf_range_locked(struct vnode *vp, struct bufobj *bo,
     daddr_t startlbn, daddr_t endlbn)
 {
 	struct buf *bp, *nbp;
 	bool anyfreed;
 
 	ASSERT_VOP_LOCKED(vp, "v_inval_buf_range_locked");
 	ASSERT_BO_LOCKED(bo);
 
 	do {
 		anyfreed = false;
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) {
 			if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn)
 				continue;
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo)) == ENOLCK) {
 				BO_LOCK(bo);
 				return (EAGAIN);
 			}
 
 			bremfree(bp);
 			bp->b_flags |= B_INVAL | B_RELBUF;
 			bp->b_flags &= ~B_ASYNC;
 			brelse(bp);
 			anyfreed = true;
 
 			BO_LOCK(bo);
 			if (nbp != NULL &&
 			    (((nbp->b_xflags & BX_VNCLEAN) == 0) ||
 			    nbp->b_vp != vp ||
 			    (nbp->b_flags & B_DELWRI) != 0))
 				return (EAGAIN);
 		}
 
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 			if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn)
 				continue;
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo)) == ENOLCK) {
 				BO_LOCK(bo);
 				return (EAGAIN);
 			}
 			bremfree(bp);
 			bp->b_flags |= B_INVAL | B_RELBUF;
 			bp->b_flags &= ~B_ASYNC;
 			brelse(bp);
 			anyfreed = true;
 
 			BO_LOCK(bo);
 			if (nbp != NULL &&
 			    (((nbp->b_xflags & BX_VNDIRTY) == 0) ||
 			    (nbp->b_vp != vp) ||
 			    (nbp->b_flags & B_DELWRI) == 0))
 				return (EAGAIN);
 		}
 	} while (anyfreed);
 	return (0);
 }
 
 static void
 buf_vlist_remove(struct buf *bp)
 {
 	struct bufv *bv;
 
 	KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
 	ASSERT_BO_WLOCKED(bp->b_bufobj);
 	KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) !=
 	    (BX_VNDIRTY|BX_VNCLEAN),
 	    ("buf_vlist_remove: Buf %p is on two lists", bp));
 	if (bp->b_xflags & BX_VNDIRTY)
 		bv = &bp->b_bufobj->bo_dirty;
 	else
 		bv = &bp->b_bufobj->bo_clean;
 	BUF_PCTRIE_REMOVE(&bv->bv_root, bp->b_lblkno);
 	TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs);
 	bv->bv_cnt--;
 	bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
 }
 
 /*
  * Add the buffer to the sorted clean or dirty block list.
  *
  * NOTE: xflags is passed as a constant, optimizing this inline function!
  */
 static void
 buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
 {
 	struct bufv *bv;
 	struct buf *n;
 	int error;
 
 	ASSERT_BO_WLOCKED(bo);
 	KASSERT((xflags & BX_VNDIRTY) == 0 || (bo->bo_flag & BO_DEAD) == 0,
 	    ("dead bo %p", bo));
 	KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0,
 	    ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags));
 	bp->b_xflags |= xflags;
 	if (xflags & BX_VNDIRTY)
 		bv = &bo->bo_dirty;
 	else
 		bv = &bo->bo_clean;
 
 	/*
 	 * Keep the list ordered.  Optimize empty list insertion.  Assume
 	 * we tend to grow at the tail so lookup_le should usually be cheaper
 	 * than _ge. 
 	 */
 	if (bv->bv_cnt == 0 ||
 	    bp->b_lblkno > TAILQ_LAST(&bv->bv_hd, buflists)->b_lblkno)
 		TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs);
 	else if ((n = BUF_PCTRIE_LOOKUP_LE(&bv->bv_root, bp->b_lblkno)) == NULL)
 		TAILQ_INSERT_HEAD(&bv->bv_hd, bp, b_bobufs);
 	else
 		TAILQ_INSERT_AFTER(&bv->bv_hd, n, bp, b_bobufs);
 	error = BUF_PCTRIE_INSERT(&bv->bv_root, bp);
 	if (error)
 		panic("buf_vlist_add:  Preallocated nodes insufficient.");
 	bv->bv_cnt++;
 }
 
 /*
  * Look up a buffer using the buffer tries.
  */
 struct buf *
 gbincore(struct bufobj *bo, daddr_t lblkno)
 {
 	struct buf *bp;
 
 	ASSERT_BO_LOCKED(bo);
 	bp = BUF_PCTRIE_LOOKUP(&bo->bo_clean.bv_root, lblkno);
 	if (bp != NULL)
 		return (bp);
 	return BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno);
 }
 
 /*
  * Associate a buffer with a vnode.
  */
 void
 bgetvp(struct vnode *vp, struct buf *bp)
 {
 	struct bufobj *bo;
 
 	bo = &vp->v_bufobj;
 	ASSERT_BO_WLOCKED(bo);
 	VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free"));
 
 	CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags);
 	VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp,
 	    ("bgetvp: bp already attached! %p", bp));
 
 	vhold(vp);
 	bp->b_vp = vp;
 	bp->b_bufobj = bo;
 	/*
 	 * Insert onto list for new vnode.
 	 */
 	buf_vlist_add(bp, bo, BX_VNCLEAN);
 }
 
 /*
  * Disassociate a buffer from a vnode.
  */
 void
 brelvp(struct buf *bp)
 {
 	struct bufobj *bo;
 	struct vnode *vp;
 
 	CTR3(KTR_BUF, "brelvp(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
 	KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
 
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	vp = bp->b_vp;		/* XXX */
 	bo = bp->b_bufobj;
 	BO_LOCK(bo);
 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN))
 		buf_vlist_remove(bp);
 	else
 		panic("brelvp: Buffer %p not on queue.", bp);
 	if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) {
 		bo->bo_flag &= ~BO_ONWORKLST;
 		mtx_lock(&sync_mtx);
 		LIST_REMOVE(bo, bo_synclist);
 		syncer_worklist_len--;
 		mtx_unlock(&sync_mtx);
 	}
 	bp->b_vp = NULL;
 	bp->b_bufobj = NULL;
 	BO_UNLOCK(bo);
 	vdrop(vp);
 }
 
 /*
  * Add an item to the syncer work queue.
  */
 static void
 vn_syncer_add_to_worklist(struct bufobj *bo, int delay)
 {
 	int slot;
 
 	ASSERT_BO_WLOCKED(bo);
 
 	mtx_lock(&sync_mtx);
 	if (bo->bo_flag & BO_ONWORKLST)
 		LIST_REMOVE(bo, bo_synclist);
 	else {
 		bo->bo_flag |= BO_ONWORKLST;
 		syncer_worklist_len++;
 	}
 
 	if (delay > syncer_maxdelay - 2)
 		delay = syncer_maxdelay - 2;
 	slot = (syncer_delayno + delay) & syncer_mask;
 
 	LIST_INSERT_HEAD(&syncer_workitem_pending[slot], bo, bo_synclist);
 	mtx_unlock(&sync_mtx);
 }
 
 static int
 sysctl_vfs_worklist_len(SYSCTL_HANDLER_ARGS)
 {
 	int error, len;
 
 	mtx_lock(&sync_mtx);
 	len = syncer_worklist_len - sync_vnode_count;
 	mtx_unlock(&sync_mtx);
 	error = SYSCTL_OUT(req, &len, sizeof(len));
 	return (error);
 }
 
 SYSCTL_PROC(_vfs, OID_AUTO, worklist_len,
     CTLTYPE_INT | CTLFLAG_MPSAFE| CTLFLAG_RD, NULL, 0,
     sysctl_vfs_worklist_len, "I", "Syncer thread worklist length");
 
 static struct proc *updateproc;
 static void sched_sync(void);
 static struct kproc_desc up_kp = {
 	"syncer",
 	sched_sync,
 	&updateproc
 };
 SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp);
 
 static int
 sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td)
 {
 	struct vnode *vp;
 	struct mount *mp;
 
 	*bo = LIST_FIRST(slp);
 	if (*bo == NULL)
 		return (0);
 	vp = bo2vnode(*bo);
 	if (VOP_ISLOCKED(vp) != 0 || VI_TRYLOCK(vp) == 0)
 		return (1);
 	/*
 	 * We use vhold in case the vnode does not
 	 * successfully sync.  vhold prevents the vnode from
 	 * going away when we unlock the sync_mtx so that
 	 * we can acquire the vnode interlock.
 	 */
 	vholdl(vp);
 	mtx_unlock(&sync_mtx);
 	VI_UNLOCK(vp);
 	if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
 		vdrop(vp);
 		mtx_lock(&sync_mtx);
 		return (*bo == LIST_FIRST(slp));
 	}
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	(void) VOP_FSYNC(vp, MNT_LAZY, td);
 	VOP_UNLOCK(vp);
 	vn_finished_write(mp);
 	BO_LOCK(*bo);
 	if (((*bo)->bo_flag & BO_ONWORKLST) != 0) {
 		/*
 		 * Put us back on the worklist.  The worklist
 		 * routine will remove us from our current
 		 * position and then add us back in at a later
 		 * position.
 		 */
 		vn_syncer_add_to_worklist(*bo, syncdelay);
 	}
 	BO_UNLOCK(*bo);
 	vdrop(vp);
 	mtx_lock(&sync_mtx);
 	return (0);
 }
 
 static int first_printf = 1;
 
 /*
  * System filesystem synchronizer daemon.
  */
 static void
 sched_sync(void)
 {
 	struct synclist *next, *slp;
 	struct bufobj *bo;
 	long starttime;
 	struct thread *td = curthread;
 	int last_work_seen;
 	int net_worklist_len;
 	int syncer_final_iter;
 	int error;
 
 	last_work_seen = 0;
 	syncer_final_iter = 0;
 	syncer_state = SYNCER_RUNNING;
 	starttime = time_uptime;
 	td->td_pflags |= TDP_NORUNNINGBUF;
 
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc,
 	    SHUTDOWN_PRI_LAST);
 
 	mtx_lock(&sync_mtx);
 	for (;;) {
 		if (syncer_state == SYNCER_FINAL_DELAY &&
 		    syncer_final_iter == 0) {
 			mtx_unlock(&sync_mtx);
 			kproc_suspend_check(td->td_proc);
 			mtx_lock(&sync_mtx);
 		}
 		net_worklist_len = syncer_worklist_len - sync_vnode_count;
 		if (syncer_state != SYNCER_RUNNING &&
 		    starttime != time_uptime) {
 			if (first_printf) {
 				printf("\nSyncing disks, vnodes remaining... ");
 				first_printf = 0;
 			}
 			printf("%d ", net_worklist_len);
 		}
 		starttime = time_uptime;
 
 		/*
 		 * Push files whose dirty time has expired.  Be careful
 		 * of interrupt race on slp queue.
 		 *
 		 * Skip over empty worklist slots when shutting down.
 		 */
 		do {
 			slp = &syncer_workitem_pending[syncer_delayno];
 			syncer_delayno += 1;
 			if (syncer_delayno == syncer_maxdelay)
 				syncer_delayno = 0;
 			next = &syncer_workitem_pending[syncer_delayno];
 			/*
 			 * If the worklist has wrapped since the
 			 * it was emptied of all but syncer vnodes,
 			 * switch to the FINAL_DELAY state and run
 			 * for one more second.
 			 */
 			if (syncer_state == SYNCER_SHUTTING_DOWN &&
 			    net_worklist_len == 0 &&
 			    last_work_seen == syncer_delayno) {
 				syncer_state = SYNCER_FINAL_DELAY;
 				syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP;
 			}
 		} while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) &&
 		    syncer_worklist_len > 0);
 
 		/*
 		 * Keep track of the last time there was anything
 		 * on the worklist other than syncer vnodes.
 		 * Return to the SHUTTING_DOWN state if any
 		 * new work appears.
 		 */
 		if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING)
 			last_work_seen = syncer_delayno;
 		if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY)
 			syncer_state = SYNCER_SHUTTING_DOWN;
 		while (!LIST_EMPTY(slp)) {
 			error = sync_vnode(slp, &bo, td);
 			if (error == 1) {
 				LIST_REMOVE(bo, bo_synclist);
 				LIST_INSERT_HEAD(next, bo, bo_synclist);
 				continue;
 			}
 
 			if (first_printf == 0) {
 				/*
 				 * Drop the sync mutex, because some watchdog
 				 * drivers need to sleep while patting
 				 */
 				mtx_unlock(&sync_mtx);
 				wdog_kern_pat(WD_LASTVAL);
 				mtx_lock(&sync_mtx);
 			}
 
 		}
 		if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0)
 			syncer_final_iter--;
 		/*
 		 * The variable rushjob allows the kernel to speed up the
 		 * processing of the filesystem syncer process. A rushjob
 		 * value of N tells the filesystem syncer to process the next
 		 * N seconds worth of work on its queue ASAP. Currently rushjob
 		 * is used by the soft update code to speed up the filesystem
 		 * syncer process when the incore state is getting so far
 		 * ahead of the disk that the kernel memory pool is being
 		 * threatened with exhaustion.
 		 */
 		if (rushjob > 0) {
 			rushjob -= 1;
 			continue;
 		}
 		/*
 		 * Just sleep for a short period of time between
 		 * iterations when shutting down to allow some I/O
 		 * to happen.
 		 *
 		 * If it has taken us less than a second to process the
 		 * current work, then wait. Otherwise start right over
 		 * again. We can still lose time if any single round
 		 * takes more than two seconds, but it does not really
 		 * matter as we are just trying to generally pace the
 		 * filesystem activity.
 		 */
 		if (syncer_state != SYNCER_RUNNING ||
 		    time_uptime == starttime) {
 			thread_lock(td);
 			sched_prio(td, PPAUSE);
 			thread_unlock(td);
 		}
 		if (syncer_state != SYNCER_RUNNING)
 			cv_timedwait(&sync_wakeup, &sync_mtx,
 			    hz / SYNCER_SHUTDOWN_SPEEDUP);
 		else if (time_uptime == starttime)
 			cv_timedwait(&sync_wakeup, &sync_mtx, hz);
 	}
 }
 
 /*
  * Request the syncer daemon to speed up its work.
  * We never push it to speed up more than half of its
  * normal turn time, otherwise it could take over the cpu.
  */
 int
 speedup_syncer(void)
 {
 	int ret = 0;
 
 	mtx_lock(&sync_mtx);
 	if (rushjob < syncdelay / 2) {
 		rushjob += 1;
 		stat_rush_requests += 1;
 		ret = 1;
 	}
 	mtx_unlock(&sync_mtx);
 	cv_broadcast(&sync_wakeup);
 	return (ret);
 }
 
 /*
  * Tell the syncer to speed up its work and run though its work
  * list several times, then tell it to shut down.
  */
 static void
 syncer_shutdown(void *arg, int howto)
 {
 
 	if (howto & RB_NOSYNC)
 		return;
 	mtx_lock(&sync_mtx);
 	syncer_state = SYNCER_SHUTTING_DOWN;
 	rushjob = 0;
 	mtx_unlock(&sync_mtx);
 	cv_broadcast(&sync_wakeup);
 	kproc_shutdown(arg, howto);
 }
 
 void
 syncer_suspend(void)
 {
 
 	syncer_shutdown(updateproc, 0);
 }
 
 void
 syncer_resume(void)
 {
 
 	mtx_lock(&sync_mtx);
 	first_printf = 1;
 	syncer_state = SYNCER_RUNNING;
 	mtx_unlock(&sync_mtx);
 	cv_broadcast(&sync_wakeup);
 	kproc_resume(updateproc);
 }
 
 /*
  * Reassign a buffer from one vnode to another.
  * Used to assign file specific control information
  * (indirect blocks) to the vnode to which they belong.
  */
 void
 reassignbuf(struct buf *bp)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 	int delay;
 #ifdef INVARIANTS
 	struct bufv *bv;
 #endif
 
 	vp = bp->b_vp;
 	bo = bp->b_bufobj;
 	++reassignbufcalls;
 
 	CTR3(KTR_BUF, "reassignbuf(%p) vp %p flags %X",
 	    bp, bp->b_vp, bp->b_flags);
 	/*
 	 * B_PAGING flagged buffers cannot be reassigned because their vp
 	 * is not fully linked in.
 	 */
 	if (bp->b_flags & B_PAGING)
 		panic("cannot reassign paging buffer");
 
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	BO_LOCK(bo);
 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN))
 		buf_vlist_remove(bp);
 	else
 		panic("reassignbuf: Buffer %p not on queue.", bp);
 	/*
 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
 	 * of clean buffers.
 	 */
 	if (bp->b_flags & B_DELWRI) {
 		if ((bo->bo_flag & BO_ONWORKLST) == 0) {
 			switch (vp->v_type) {
 			case VDIR:
 				delay = dirdelay;
 				break;
 			case VCHR:
 				delay = metadelay;
 				break;
 			default:
 				delay = filedelay;
 			}
 			vn_syncer_add_to_worklist(bo, delay);
 		}
 		buf_vlist_add(bp, bo, BX_VNDIRTY);
 	} else {
 		buf_vlist_add(bp, bo, BX_VNCLEAN);
 
 		if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) {
 			mtx_lock(&sync_mtx);
 			LIST_REMOVE(bo, bo_synclist);
 			syncer_worklist_len--;
 			mtx_unlock(&sync_mtx);
 			bo->bo_flag &= ~BO_ONWORKLST;
 		}
 	}
 #ifdef INVARIANTS
 	bv = &bo->bo_clean;
 	bp = TAILQ_FIRST(&bv->bv_hd);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 	bp = TAILQ_LAST(&bv->bv_hd, buflists);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 	bv = &bo->bo_dirty;
 	bp = TAILQ_FIRST(&bv->bv_hd);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 	bp = TAILQ_LAST(&bv->bv_hd, buflists);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 #endif
 	BO_UNLOCK(bo);
 }
 
 static void
 v_init_counters(struct vnode *vp)
 {
 
 	VNASSERT(vp->v_type == VNON && vp->v_data == NULL && vp->v_iflag == 0,
 	    vp, ("%s called for an initialized vnode", __FUNCTION__));
 	ASSERT_VI_UNLOCKED(vp, __FUNCTION__);
 
 	refcount_init(&vp->v_holdcnt, 1);
 	refcount_init(&vp->v_usecount, 1);
 }
 
 /*
  * Increment si_usecount of the associated device, if any.
  */
 static void
 v_incr_devcount(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __FUNCTION__);
 	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
 		dev_lock();
 		vp->v_rdev->si_usecount++;
 		dev_unlock();
 	}
 }
 
 /*
  * Decrement si_usecount of the associated device, if any.
  */
 static void
 v_decr_devcount(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __FUNCTION__);
 	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
 		dev_lock();
 		vp->v_rdev->si_usecount--;
 		dev_unlock();
 	}
 }
 
 /*
  * Grab a particular vnode from the free list, increment its
  * reference count and lock it.  VIRF_DOOMED is set if the vnode
  * is being destroyed.  Only callers who specify LK_RETRY will
  * see doomed vnodes.  If inactive processing was delayed in
  * vput try to do it here.
  *
  * usecount is manipulated using atomics without holding any locks.
  *
  * holdcnt can be manipulated using atomics without holding any locks,
  * except when transitioning 1<->0, in which case the interlock is held.
  */
 enum vgetstate
 vget_prep(struct vnode *vp)
 {
 	enum vgetstate vs;
 
 	if (refcount_acquire_if_not_zero(&vp->v_usecount)) {
 		vs = VGET_USECOUNT;
 	} else {
 		vhold(vp);
 		vs = VGET_HOLDCNT;
 	}
 	return (vs);
 }
 
 int
 vget(struct vnode *vp, int flags, struct thread *td)
 {
 	enum vgetstate vs;
 
 	MPASS(td == curthread);
 
 	vs = vget_prep(vp);
 	return (vget_finish(vp, flags, vs));
 }
 
 static int __noinline
 vget_finish_vchr(struct vnode *vp)
 {
 
 	VNASSERT(vp->v_type == VCHR, vp, ("type != VCHR)"));
 
 	/*
 	 * See the comment in vget_finish before usecount bump.
 	 */
 	if (refcount_acquire_if_not_zero(&vp->v_usecount)) {
 #ifdef INVARIANTS
 		int old = atomic_fetchadd_int(&vp->v_holdcnt, -1);
 		VNASSERT(old > 0, vp, ("%s: wrong hold count %d", __func__, old));
 #else
 		refcount_release(&vp->v_holdcnt);
 #endif
 		return (0);
 	}
 
 	VI_LOCK(vp);
 	if (refcount_acquire_if_not_zero(&vp->v_usecount)) {
 #ifdef INVARIANTS
 		int old = atomic_fetchadd_int(&vp->v_holdcnt, -1);
 		VNASSERT(old > 1, vp, ("%s: wrong hold count %d", __func__, old));
 #else
 		refcount_release(&vp->v_holdcnt);
 #endif
 		VI_UNLOCK(vp);
 		return (0);
 	}
 	v_incr_devcount(vp);
 	refcount_acquire(&vp->v_usecount);
 	VI_UNLOCK(vp);
 	return (0);
 }
 
 int
 vget_finish(struct vnode *vp, int flags, enum vgetstate vs)
 {
 	int error, old;
 
 	VNASSERT((flags & LK_TYPE_MASK) != 0, vp,
 	    ("%s: invalid lock operation", __func__));
 
 	if ((flags & LK_INTERLOCK) != 0)
 		ASSERT_VI_LOCKED(vp, __func__);
 	else
 		ASSERT_VI_UNLOCKED(vp, __func__);
 	VNASSERT(vp->v_holdcnt > 0, vp, ("%s: vnode not held", __func__));
 	if (vs == VGET_USECOUNT) {
 		VNASSERT(vp->v_usecount > 0, vp,
 		    ("%s: vnode without usecount when VGET_USECOUNT was passed",
 		    __func__));
 	}
 
-	if ((error = vn_lock(vp, flags)) != 0) {
+	error = vn_lock(vp, flags);
+	if (__predict_false(error != 0)) {
 		if (vs == VGET_USECOUNT)
 			vrele(vp);
 		else
 			vdrop(vp);
 		CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__,
 		    vp);
 		return (error);
 	}
 
 	if (vs == VGET_USECOUNT) {
 		return (0);
 	}
 
 	if (__predict_false(vp->v_type == VCHR))
 		return (vget_finish_vchr(vp));
 
 	/*
 	 * We hold the vnode. If the usecount is 0 it will be utilized to keep
 	 * the vnode around. Otherwise someone else lended their hold count and
 	 * we have to drop ours.
 	 */
 	old = atomic_fetchadd_int(&vp->v_usecount, 1);
 	VNASSERT(old >= 0, vp, ("%s: wrong use count %d", __func__, old));
 	if (old != 0) {
 #ifdef INVARIANTS
 		old = atomic_fetchadd_int(&vp->v_holdcnt, -1);
 		VNASSERT(old > 1, vp, ("%s: wrong hold count %d", __func__, old));
 #else
 		refcount_release(&vp->v_holdcnt);
 #endif
 	}
 	return (0);
 }
 
 /*
  * Increase the reference (use) and hold count of a vnode.
  * This will also remove the vnode from the free list if it is presently free.
  */
 static void __noinline
 vref_vchr(struct vnode *vp, bool interlock)
 {
 
 	/*
 	 * See the comment in vget_finish before usecount bump.
 	 */
 	if (!interlock) {
 		if (refcount_acquire_if_not_zero(&vp->v_usecount)) {
 			VNODE_REFCOUNT_FENCE_ACQ();
 			VNASSERT(vp->v_holdcnt > 0, vp,
 			    ("%s: active vnode not held", __func__));
 			return;
 		}
 		VI_LOCK(vp);
 		/*
 		 * By the time we get here the vnode might have been doomed, at
 		 * which point the 0->1 use count transition is no longer
 		 * protected by the interlock. Since it can't bounce back to
 		 * VCHR and requires vref semantics, punt it back
 		 */
 		if (__predict_false(vp->v_type == VBAD)) {
 			VI_UNLOCK(vp);
 			vref(vp);
 			return;
 		}
 	}
 	VNASSERT(vp->v_type == VCHR, vp, ("type != VCHR)"));
 	if (refcount_acquire_if_not_zero(&vp->v_usecount)) {
 		VNODE_REFCOUNT_FENCE_ACQ();
 		VNASSERT(vp->v_holdcnt > 0, vp,
 		    ("%s: active vnode not held", __func__));
 		if (!interlock)
 			VI_UNLOCK(vp);
 		return;
 	}
 	vhold(vp);
 	v_incr_devcount(vp);
 	refcount_acquire(&vp->v_usecount);
 	if (!interlock)
 		VI_UNLOCK(vp);
 	return;
 }
 
 void
 vref(struct vnode *vp)
 {
 	int old;
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	if (__predict_false(vp->v_type == VCHR)) {
 		 vref_vchr(vp, false);
 		 return;
 	}
 
 	if (refcount_acquire_if_not_zero(&vp->v_usecount)) {
 		VNODE_REFCOUNT_FENCE_ACQ();
 		VNASSERT(vp->v_holdcnt > 0, vp,
 		    ("%s: active vnode not held", __func__));
 		return;
 	}
 	vhold(vp);
 	/*
 	 * See the comment in vget_finish.
 	 */
 	old = atomic_fetchadd_int(&vp->v_usecount, 1);
 	VNASSERT(old >= 0, vp, ("%s: wrong use count %d", __func__, old));
 	if (old != 0) {
 #ifdef INVARIANTS
 		old = atomic_fetchadd_int(&vp->v_holdcnt, -1);
 		VNASSERT(old > 1, vp, ("%s: wrong hold count %d", __func__, old));
 #else
 		refcount_release(&vp->v_holdcnt);
 #endif
 	}
 }
 
 void
 vrefl(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	if (__predict_false(vp->v_type == VCHR)) {
 		vref_vchr(vp, true);
 		return;
 	}
 	vref(vp);
 }
 
 void
 vrefact(struct vnode *vp)
 {
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 #ifdef INVARIANTS
 	int old = atomic_fetchadd_int(&vp->v_usecount, 1);
 	VNASSERT(old > 0, vp, ("%s: wrong use count %d", __func__, old));
 #else
 	refcount_acquire(&vp->v_usecount);
 #endif
 }
 
 /*
  * Return reference count of a vnode.
  *
  * The results of this call are only guaranteed when some mechanism is used to
  * stop other processes from gaining references to the vnode.  This may be the
  * case if the caller holds the only reference.  This is also useful when stale
  * data is acceptable as race conditions may be accounted for by some other
  * means.
  */
 int
 vrefcnt(struct vnode *vp)
 {
 
 	return (vp->v_usecount);
 }
 
 void
 vlazy(struct vnode *vp)
 {
 	struct mount *mp;
 
 	VNASSERT(vp->v_holdcnt > 0, vp, ("%s: vnode not held", __func__));
 
 	if ((vp->v_mflag & VMP_LAZYLIST) != 0)
 		return;
 	mp = vp->v_mount;
 	mtx_lock(&mp->mnt_listmtx);
 	if ((vp->v_mflag & VMP_LAZYLIST) == 0) {
 		vp->v_mflag |= VMP_LAZYLIST;
 		TAILQ_INSERT_TAIL(&mp->mnt_lazyvnodelist, vp, v_lazylist);
 		mp->mnt_lazyvnodelistsize++;
 	}
 	mtx_unlock(&mp->mnt_listmtx);
 }
 
 static void
 vdefer_inactive(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	VNASSERT(vp->v_holdcnt > 0, vp,
 	    ("%s: vnode without hold count", __func__));
 	if (VN_IS_DOOMED(vp)) {
 		vdropl(vp);
 		return;
 	}
 	if (vp->v_iflag & VI_DEFINACT) {
 		VNASSERT(vp->v_holdcnt > 1, vp, ("lost hold count"));
 		vdropl(vp);
 		return;
 	}
 	if (vp->v_usecount > 0) {
 		vp->v_iflag &= ~VI_OWEINACT;
 		vdropl(vp);
 		return;
 	}
 	vlazy(vp);
 	vp->v_iflag |= VI_DEFINACT;
 	VI_UNLOCK(vp);
 	counter_u64_add(deferred_inact, 1);
 }
 
 static void
 vdefer_inactive_unlocked(struct vnode *vp)
 {
 
 	VI_LOCK(vp);
 	if ((vp->v_iflag & VI_OWEINACT) == 0) {
 		vdropl(vp);
 		return;
 	}
 	vdefer_inactive(vp);
 }
 
 enum vputx_op { VPUTX_VRELE, VPUTX_VPUT, VPUTX_VUNREF };
 
 /*
  * Decrement the use and hold counts for a vnode.
  *
  * See an explanation near vget() as to why atomic operation is safe.
  *
  * XXX Some filesystems pass in an exclusively locked vnode and strongly depend
  * on the lock being held all the way until VOP_INACTIVE. This in particular
  * happens with UFS which adds half-constructed vnodes to the hash, where they
  * can be found by other code.
  */
 static void
 vputx(struct vnode *vp, enum vputx_op func)
 {
 	int error;
 
 	KASSERT(vp != NULL, ("vputx: null vp"));
 	if (func == VPUTX_VUNREF)
 		ASSERT_VOP_LOCKED(vp, "vunref");
 	else if (func == VPUTX_VPUT)
 		ASSERT_VOP_LOCKED(vp, "vput");
 	ASSERT_VI_UNLOCKED(vp, __func__);
 	VNASSERT(vp->v_holdcnt > 0 && vp->v_usecount > 0, vp,
 	    ("%s: wrong ref counts", __func__));
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 
 	/*
 	 * We want to hold the vnode until the inactive finishes to
 	 * prevent vgone() races.  We drop the use count here and the
 	 * hold count below when we're done.
 	 *
 	 * If we release the last usecount we take ownership of the hold
 	 * count which provides liveness of the vnode, in which case we
 	 * have to vdrop.
 	 */
 	if (!refcount_release(&vp->v_usecount)) {
 		if (func == VPUTX_VPUT)
 			VOP_UNLOCK(vp);
 		return;
 	}
 	VI_LOCK(vp);
 	v_decr_devcount(vp);
 	/*
 	 * By the time we got here someone else might have transitioned
 	 * the count back to > 0.
 	 */
 	if (vp->v_usecount > 0 || vp->v_iflag & VI_DOINGINACT)
 		goto out;
 
 	/*
 	 * Check if the fs wants to perform inactive processing. Note we
 	 * may be only holding the interlock, in which case it is possible
 	 * someone else called vgone on the vnode and ->v_data is now NULL.
 	 * Since vgone performs inactive on its own there is nothing to do
 	 * here but to drop our hold count.
 	 */
 	if (__predict_false(VN_IS_DOOMED(vp)) ||
 	    VOP_NEED_INACTIVE(vp) == 0)
 		goto out;
 
 	/*
 	 * We must call VOP_INACTIVE with the node locked. Mark
 	 * as VI_DOINGINACT to avoid recursion.
 	 */
 	vp->v_iflag |= VI_OWEINACT;
 	switch (func) {
 	case VPUTX_VRELE:
 		error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK);
 		VI_LOCK(vp);
 		break;
 	case VPUTX_VPUT:
 		error = 0;
 		if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 			error = VOP_LOCK(vp, LK_UPGRADE | LK_INTERLOCK |
 			    LK_NOWAIT);
 			VI_LOCK(vp);
 		}
 		break;
 	case VPUTX_VUNREF:
 		error = 0;
 		if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 			error = VOP_LOCK(vp, LK_TRYUPGRADE | LK_INTERLOCK);
 			VI_LOCK(vp);
 		}
 		break;
 	}
 	if (error == 0) {
 		vinactive(vp);
 		if (func != VPUTX_VUNREF)
 			VOP_UNLOCK(vp);
 		vdropl(vp);
 	} else {
 		vdefer_inactive(vp);
 	}
 	return;
 out:
 	if (func == VPUTX_VPUT)
 		VOP_UNLOCK(vp);
 	vdropl(vp);
 }
 
 /*
  * Vnode put/release.
  * If count drops to zero, call inactive routine and return to freelist.
  */
 void
 vrele(struct vnode *vp)
 {
 
 	vputx(vp, VPUTX_VRELE);
 }
 
 /*
  * Release an already locked vnode.  This give the same effects as
  * unlock+vrele(), but takes less time and avoids releasing and
  * re-aquiring the lock (as vrele() acquires the lock internally.)
  */
 void
 vput(struct vnode *vp)
 {
 
 	vputx(vp, VPUTX_VPUT);
 }
 
 /*
  * Release an exclusively locked vnode. Do not unlock the vnode lock.
  */
 void
 vunref(struct vnode *vp)
 {
 
 	vputx(vp, VPUTX_VUNREF);
 }
 
 void
 vhold(struct vnode *vp)
 {
 	struct vdbatch *vd;
 	int old;
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	old = atomic_fetchadd_int(&vp->v_holdcnt, 1);
 	VNASSERT(old >= 0, vp, ("%s: wrong hold count %d", __func__, old));
 	if (old != 0)
 		return;
 	critical_enter();
 	vd = DPCPU_PTR(vd);
 	vd->freevnodes--;
 	critical_exit();
 }
 
 void
 vholdl(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	vhold(vp);
 }
 
 void
 vholdnz(struct vnode *vp)
 {
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 #ifdef INVARIANTS
 	int old = atomic_fetchadd_int(&vp->v_holdcnt, 1);
 	VNASSERT(old > 0, vp, ("%s: wrong hold count %d", __func__, old));
 #else
 	atomic_add_int(&vp->v_holdcnt, 1);
 #endif
 }
 
 static void __noinline
 vdbatch_process(struct vdbatch *vd)
 {
 	struct vnode *vp;
 	int i;
 
 	mtx_assert(&vd->lock, MA_OWNED);
 	MPASS(curthread->td_pinned > 0);
 	MPASS(vd->index == VDBATCH_SIZE);
 
 	mtx_lock(&vnode_list_mtx);
 	critical_enter();
 	freevnodes += vd->freevnodes;
 	for (i = 0; i < VDBATCH_SIZE; i++) {
 		vp = vd->tab[i];
 		TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
 		TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist);
 		MPASS(vp->v_dbatchcpu != NOCPU);
 		vp->v_dbatchcpu = NOCPU;
 	}
 	mtx_unlock(&vnode_list_mtx);
-	critical_exit();
 	vd->freevnodes = 0;
 	bzero(vd->tab, sizeof(vd->tab));
 	vd->index = 0;
+	critical_exit();
 }
 
 static void
 vdbatch_enqueue(struct vnode *vp)
 {
 	struct vdbatch *vd;
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	VNASSERT(!VN_IS_DOOMED(vp), vp,
 	    ("%s: deferring requeue of a doomed vnode", __func__));
 
 	critical_enter();
 	vd = DPCPU_PTR(vd);
 	vd->freevnodes++;
 	if (vp->v_dbatchcpu != NOCPU) {
 		VI_UNLOCK(vp);
 		critical_exit();
 		return;
 	}
 
 	sched_pin();
 	critical_exit();
 	mtx_lock(&vd->lock);
 	MPASS(vd->index < VDBATCH_SIZE);
 	MPASS(vd->tab[vd->index] == NULL);
 	/*
 	 * A hack: we depend on being pinned so that we know what to put in
 	 * ->v_dbatchcpu.
 	 */
 	vp->v_dbatchcpu = curcpu;
 	vd->tab[vd->index] = vp;
 	vd->index++;
 	VI_UNLOCK(vp);
 	if (vd->index == VDBATCH_SIZE)
 		vdbatch_process(vd);
 	mtx_unlock(&vd->lock);
 	sched_unpin();
 }
 
 /*
  * This routine must only be called for vnodes which are about to be
  * deallocated. Supporting dequeue for arbitrary vndoes would require
  * validating that the locked batch matches.
  */
 static void
 vdbatch_dequeue(struct vnode *vp)
 {
 	struct vdbatch *vd;
 	int i;
 	short cpu;
 
 	VNASSERT(vp->v_type == VBAD || vp->v_type == VNON, vp,
 	    ("%s: called for a used vnode\n", __func__));
 
 	cpu = vp->v_dbatchcpu;
 	if (cpu == NOCPU)
 		return;
 
 	vd = DPCPU_ID_PTR(cpu, vd);
 	mtx_lock(&vd->lock);
 	for (i = 0; i < vd->index; i++) {
 		if (vd->tab[i] != vp)
 			continue;
 		vp->v_dbatchcpu = NOCPU;
 		vd->index--;
 		vd->tab[i] = vd->tab[vd->index];
 		vd->tab[vd->index] = NULL;
 		break;
 	}
 	mtx_unlock(&vd->lock);
 	/*
 	 * Either we dequeued the vnode above or the target CPU beat us to it.
 	 */
 	MPASS(vp->v_dbatchcpu == NOCPU);
 }
 
 /*
  * Drop the hold count of the vnode.  If this is the last reference to
  * the vnode we place it on the free list unless it has been vgone'd
  * (marked VIRF_DOOMED) in which case we will free it.
  *
  * Because the vnode vm object keeps a hold reference on the vnode if
  * there is at least one resident non-cached page, the vnode cannot
  * leave the active list without the page cleanup done.
  */
 static void
 vdrop_deactivate(struct vnode *vp)
 {
 	struct mount *mp;
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	/*
 	 * Mark a vnode as free: remove it from its active list
 	 * and put it up for recycling on the freelist.
 	 */
 	VNASSERT(!VN_IS_DOOMED(vp), vp,
 	    ("vdrop: returning doomed vnode"));
 	VNASSERT(vp->v_op != NULL, vp,
 	    ("vdrop: vnode already reclaimed."));
 	VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp,
 	    ("vnode with VI_OWEINACT set"));
 	VNASSERT((vp->v_iflag & VI_DEFINACT) == 0, vp,
 	    ("vnode with VI_DEFINACT set"));
 	if (vp->v_mflag & VMP_LAZYLIST) {
 		mp = vp->v_mount;
 		mtx_lock(&mp->mnt_listmtx);
 		VNASSERT(vp->v_mflag & VMP_LAZYLIST, vp, ("lost VMP_LAZYLIST"));
 		/*
 		 * Don't remove the vnode from the lazy list if another thread
 		 * has increased the hold count. It may have re-enqueued the
 		 * vnode to the lazy list and is now responsible for its
 		 * removal.
 		 */
 		if (vp->v_holdcnt == 0) {
 			vp->v_mflag &= ~VMP_LAZYLIST;
 			TAILQ_REMOVE(&mp->mnt_lazyvnodelist, vp, v_lazylist);
 			mp->mnt_lazyvnodelistsize--;
 		}
 		mtx_unlock(&mp->mnt_listmtx);
 	}
 	vdbatch_enqueue(vp);
 }
 
 void
 vdrop(struct vnode *vp)
 {
 
 	ASSERT_VI_UNLOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	if (refcount_release_if_not_last(&vp->v_holdcnt))
 		return;
 	VI_LOCK(vp);
 	vdropl(vp);
 }
 
 void
 vdropl(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	if (!refcount_release(&vp->v_holdcnt)) {
 		VI_UNLOCK(vp);
 		return;
 	}
 	if (VN_IS_DOOMED(vp)) {
 		freevnode(vp);
 		return;
 	}
 	vdrop_deactivate(vp);
 }
 
 /*
  * Call VOP_INACTIVE on the vnode and manage the DOINGINACT and OWEINACT
  * flags.  DOINGINACT prevents us from recursing in calls to vinactive.
  */
 static void
 vinactivef(struct vnode *vp)
 {
 	struct vm_object *obj;
 
 	ASSERT_VOP_ELOCKED(vp, "vinactive");
 	ASSERT_VI_LOCKED(vp, "vinactive");
 	VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp,
 	    ("vinactive: recursed on VI_DOINGINACT"));
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	vp->v_iflag |= VI_DOINGINACT;
 	vp->v_iflag &= ~VI_OWEINACT;
 	VI_UNLOCK(vp);
 	/*
 	 * Before moving off the active list, we must be sure that any
 	 * modified pages are converted into the vnode's dirty
 	 * buffers, since these will no longer be checked once the
 	 * vnode is on the inactive list.
 	 *
 	 * The write-out of the dirty pages is asynchronous.  At the
 	 * point that VOP_INACTIVE() is called, there could still be
 	 * pending I/O and dirty pages in the object.
 	 */
 	if ((obj = vp->v_object) != NULL && (vp->v_vflag & VV_NOSYNC) == 0 &&
 	    vm_object_mightbedirty(obj)) {
 		VM_OBJECT_WLOCK(obj);
 		vm_object_page_clean(obj, 0, 0, 0);
 		VM_OBJECT_WUNLOCK(obj);
 	}
 	VOP_INACTIVE(vp, curthread);
 	VI_LOCK(vp);
 	VNASSERT(vp->v_iflag & VI_DOINGINACT, vp,
 	    ("vinactive: lost VI_DOINGINACT"));
 	vp->v_iflag &= ~VI_DOINGINACT;
 }
 
 void
 vinactive(struct vnode *vp)
 {
 
 	ASSERT_VOP_ELOCKED(vp, "vinactive");
 	ASSERT_VI_LOCKED(vp, "vinactive");
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 
 	if ((vp->v_iflag & VI_OWEINACT) == 0)
 		return;
 	if (vp->v_iflag & VI_DOINGINACT)
 		return;
 	if (vp->v_usecount > 0) {
 		vp->v_iflag &= ~VI_OWEINACT;
 		return;
 	}
 	vinactivef(vp);
 }
 
 /*
  * Remove any vnodes in the vnode table belonging to mount point mp.
  *
  * If FORCECLOSE is not specified, there should not be any active ones,
  * return error if any are found (nb: this is a user error, not a
  * system error). If FORCECLOSE is specified, detach any active vnodes
  * that are found.
  *
  * If WRITECLOSE is set, only flush out regular file vnodes open for
  * writing.
  *
  * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped.
  *
  * `rootrefs' specifies the base reference count for the root vnode
  * of this filesystem. The root vnode is considered busy if its
  * v_usecount exceeds this value. On a successful return, vflush(, td)
  * will call vrele() on the root vnode exactly rootrefs times.
  * If the SKIPSYSTEM or WRITECLOSE flags are specified, rootrefs must
  * be zero.
  */
 #ifdef DIAGNOSTIC
 static int busyprt = 0;		/* print out busy vnodes */
 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "Print out busy vnodes");
 #endif
 
 int
 vflush(struct mount *mp, int rootrefs, int flags, struct thread *td)
 {
 	struct vnode *vp, *mvp, *rootvp = NULL;
 	struct vattr vattr;
 	int busy = 0, error;
 
 	CTR4(KTR_VFS, "%s: mp %p with rootrefs %d and flags %d", __func__, mp,
 	    rootrefs, flags);
 	if (rootrefs > 0) {
 		KASSERT((flags & (SKIPSYSTEM | WRITECLOSE)) == 0,
 		    ("vflush: bad args"));
 		/*
 		 * Get the filesystem root vnode. We can vput() it
 		 * immediately, since with rootrefs > 0, it won't go away.
 		 */
 		if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rootvp)) != 0) {
 			CTR2(KTR_VFS, "%s: vfs_root lookup failed with %d",
 			    __func__, error);
 			return (error);
 		}
 		vput(rootvp);
 	}
 loop:
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		vholdl(vp);
 		error = vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE);
 		if (error) {
 			vdrop(vp);
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			goto loop;
 		}
 		/*
 		 * Skip over a vnodes marked VV_SYSTEM.
 		 */
 		if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
 			VOP_UNLOCK(vp);
 			vdrop(vp);
 			continue;
 		}
 		/*
 		 * If WRITECLOSE is set, flush out unlinked but still open
 		 * files (even if open only for reading) and regular file
 		 * vnodes open for writing.
 		 */
 		if (flags & WRITECLOSE) {
 			if (vp->v_object != NULL) {
 				VM_OBJECT_WLOCK(vp->v_object);
 				vm_object_page_clean(vp->v_object, 0, 0, 0);
 				VM_OBJECT_WUNLOCK(vp->v_object);
 			}
 			error = VOP_FSYNC(vp, MNT_WAIT, td);
 			if (error != 0) {
 				VOP_UNLOCK(vp);
 				vdrop(vp);
 				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 				return (error);
 			}
 			error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 			VI_LOCK(vp);
 
 			if ((vp->v_type == VNON ||
 			    (error == 0 && vattr.va_nlink > 0)) &&
 			    (vp->v_writecount <= 0 || vp->v_type != VREG)) {
 				VOP_UNLOCK(vp);
 				vdropl(vp);
 				continue;
 			}
 		} else
 			VI_LOCK(vp);
 		/*
 		 * With v_usecount == 0, all we need to do is clear out the
 		 * vnode data structures and we are done.
 		 *
 		 * If FORCECLOSE is set, forcibly close the vnode.
 		 */
 		if (vp->v_usecount == 0 || (flags & FORCECLOSE)) {
 			vgonel(vp);
 		} else {
 			busy++;
 #ifdef DIAGNOSTIC
 			if (busyprt)
 				vn_printf(vp, "vflush: busy vnode ");
 #endif
 		}
 		VOP_UNLOCK(vp);
 		vdropl(vp);
 	}
 	if (rootrefs > 0 && (flags & FORCECLOSE) == 0) {
 		/*
 		 * If just the root vnode is busy, and if its refcount
 		 * is equal to `rootrefs', then go ahead and kill it.
 		 */
 		VI_LOCK(rootvp);
 		KASSERT(busy > 0, ("vflush: not busy"));
 		VNASSERT(rootvp->v_usecount >= rootrefs, rootvp,
 		    ("vflush: usecount %d < rootrefs %d",
 		     rootvp->v_usecount, rootrefs));
 		if (busy == 1 && rootvp->v_usecount == rootrefs) {
 			VOP_LOCK(rootvp, LK_EXCLUSIVE|LK_INTERLOCK);
 			vgone(rootvp);
 			VOP_UNLOCK(rootvp);
 			busy = 0;
 		} else
 			VI_UNLOCK(rootvp);
 	}
 	if (busy) {
 		CTR2(KTR_VFS, "%s: failing as %d vnodes are busy", __func__,
 		    busy);
 		return (EBUSY);
 	}
 	for (; rootrefs > 0; rootrefs--)
 		vrele(rootvp);
 	return (0);
 }
 
 /*
  * Recycle an unused vnode to the front of the free list.
  */
 int
 vrecycle(struct vnode *vp)
 {
 	int recycled;
 
 	VI_LOCK(vp);
 	recycled = vrecyclel(vp);
 	VI_UNLOCK(vp);
 	return (recycled);
 }
 
 /*
  * vrecycle, with the vp interlock held.
  */
 int
 vrecyclel(struct vnode *vp)
 {
 	int recycled;
 
 	ASSERT_VOP_ELOCKED(vp, __func__);
 	ASSERT_VI_LOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	recycled = 0;
 	if (vp->v_usecount == 0) {
 		recycled = 1;
 		vgonel(vp);
 	}
 	return (recycled);
 }
 
 /*
  * Eliminate all activity associated with a vnode
  * in preparation for reuse.
  */
 void
 vgone(struct vnode *vp)
 {
 	VI_LOCK(vp);
 	vgonel(vp);
 	VI_UNLOCK(vp);
 }
 
 static void
 notify_lowervp_vfs_dummy(struct mount *mp __unused,
     struct vnode *lowervp __unused)
 {
 }
 
 /*
  * Notify upper mounts about reclaimed or unlinked vnode.
  */
 void
 vfs_notify_upper(struct vnode *vp, int event)
 {
 	static struct vfsops vgonel_vfsops = {
 		.vfs_reclaim_lowervp = notify_lowervp_vfs_dummy,
 		.vfs_unlink_lowervp = notify_lowervp_vfs_dummy,
 	};
 	struct mount *mp, *ump, *mmp;
 
 	mp = vp->v_mount;
 	if (mp == NULL)
 		return;
 	if (TAILQ_EMPTY(&mp->mnt_uppers))
 		return;
 
 	mmp = malloc(sizeof(struct mount), M_TEMP, M_WAITOK | M_ZERO);
 	mmp->mnt_op = &vgonel_vfsops;
 	mmp->mnt_kern_flag |= MNTK_MARKER;
 	MNT_ILOCK(mp);
 	mp->mnt_kern_flag |= MNTK_VGONE_UPPER;
 	for (ump = TAILQ_FIRST(&mp->mnt_uppers); ump != NULL;) {
 		if ((ump->mnt_kern_flag & MNTK_MARKER) != 0) {
 			ump = TAILQ_NEXT(ump, mnt_upper_link);
 			continue;
 		}
 		TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link);
 		MNT_IUNLOCK(mp);
 		switch (event) {
 		case VFS_NOTIFY_UPPER_RECLAIM:
 			VFS_RECLAIM_LOWERVP(ump, vp);
 			break;
 		case VFS_NOTIFY_UPPER_UNLINK:
 			VFS_UNLINK_LOWERVP(ump, vp);
 			break;
 		default:
 			KASSERT(0, ("invalid event %d", event));
 			break;
 		}
 		MNT_ILOCK(mp);
 		ump = TAILQ_NEXT(mmp, mnt_upper_link);
 		TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link);
 	}
 	free(mmp, M_TEMP);
 	mp->mnt_kern_flag &= ~MNTK_VGONE_UPPER;
 	if ((mp->mnt_kern_flag & MNTK_VGONE_WAITER) != 0) {
 		mp->mnt_kern_flag &= ~MNTK_VGONE_WAITER;
 		wakeup(&mp->mnt_uppers);
 	}
 	MNT_IUNLOCK(mp);
 }
 
 /*
  * vgone, with the vp interlock held.
  */
 static void
 vgonel(struct vnode *vp)
 {
 	struct thread *td;
 	struct mount *mp;
 	vm_object_t object;
 	bool active, oweinact;
 
 	ASSERT_VOP_ELOCKED(vp, "vgonel");
 	ASSERT_VI_LOCKED(vp, "vgonel");
 	VNASSERT(vp->v_holdcnt, vp,
 	    ("vgonel: vp %p has no reference.", vp));
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	td = curthread;
 
 	/*
 	 * Don't vgonel if we're already doomed.
 	 */
 	if (vp->v_irflag & VIRF_DOOMED)
 		return;
 	vp->v_irflag |= VIRF_DOOMED;
 
 	/*
 	 * Check to see if the vnode is in use.  If so, we have to call
 	 * VOP_CLOSE() and VOP_INACTIVE().
 	 */
 	active = vp->v_usecount > 0;
 	oweinact = (vp->v_iflag & VI_OWEINACT) != 0;
 	/*
 	 * If we need to do inactive VI_OWEINACT will be set.
 	 */
 	if (vp->v_iflag & VI_DEFINACT) {
 		VNASSERT(vp->v_holdcnt > 1, vp, ("lost hold count"));
 		vp->v_iflag &= ~VI_DEFINACT;
 		vdropl(vp);
 	} else {
 		VNASSERT(vp->v_holdcnt > 0, vp, ("vnode without hold count"));
 		VI_UNLOCK(vp);
 	}
 	vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM);
 
 	/*
 	 * If purging an active vnode, it must be closed and
 	 * deactivated before being reclaimed.
 	 */
 	if (active)
 		VOP_CLOSE(vp, FNONBLOCK, NOCRED, td);
 	if (oweinact || active) {
 		VI_LOCK(vp);
 		vinactivef(vp);
 		VI_UNLOCK(vp);
 	}
 	if (vp->v_type == VSOCK)
 		vfs_unp_reclaim(vp);
 
 	/*
 	 * Clean out any buffers associated with the vnode.
 	 * If the flush fails, just toss the buffers.
 	 */
 	mp = NULL;
 	if (!TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd))
 		(void) vn_start_secondary_write(vp, &mp, V_WAIT);
 	if (vinvalbuf(vp, V_SAVE, 0, 0) != 0) {
 		while (vinvalbuf(vp, 0, 0, 0) != 0)
 			;
 	}
 
 	BO_LOCK(&vp->v_bufobj);
 	KASSERT(TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd) &&
 	    vp->v_bufobj.bo_dirty.bv_cnt == 0 &&
 	    TAILQ_EMPTY(&vp->v_bufobj.bo_clean.bv_hd) &&
 	    vp->v_bufobj.bo_clean.bv_cnt == 0,
 	    ("vp %p bufobj not invalidated", vp));
 
 	/*
 	 * For VMIO bufobj, BO_DEAD is set later, or in
 	 * vm_object_terminate() after the object's page queue is
 	 * flushed.
 	 */
 	object = vp->v_bufobj.bo_object;
 	if (object == NULL)
 		vp->v_bufobj.bo_flag |= BO_DEAD;
 	BO_UNLOCK(&vp->v_bufobj);
 
 	/*
 	 * Handle the VM part.  Tmpfs handles v_object on its own (the
 	 * OBJT_VNODE check).  Nullfs or other bypassing filesystems
 	 * should not touch the object borrowed from the lower vnode
 	 * (the handle check).
 	 */
 	if (object != NULL && object->type == OBJT_VNODE &&
 	    object->handle == vp)
 		vnode_destroy_vobject(vp);
 
 	/*
 	 * Reclaim the vnode.
 	 */
 	if (VOP_RECLAIM(vp, td))
 		panic("vgone: cannot reclaim");
 	if (mp != NULL)
 		vn_finished_secondary_write(mp);
 	VNASSERT(vp->v_object == NULL, vp,
 	    ("vop_reclaim left v_object vp=%p", vp));
 	/*
 	 * Clear the advisory locks and wake up waiting threads.
 	 */
 	(void)VOP_ADVLOCKPURGE(vp);
 	vp->v_lockf = NULL;
 	/*
 	 * Delete from old mount point vnode list.
 	 */
 	delmntque(vp);
 	cache_purge(vp);
 	/*
 	 * Done with purge, reset to the standard lock and invalidate
 	 * the vnode.
 	 */
 	VI_LOCK(vp);
 	vp->v_vnlock = &vp->v_lock;
 	vp->v_op = &dead_vnodeops;
 	vp->v_type = VBAD;
 }
 
 /*
  * Calculate the total number of references to a special device.
  */
 int
 vcount(struct vnode *vp)
 {
 	int count;
 
 	dev_lock();
 	count = vp->v_rdev->si_usecount;
 	dev_unlock();
 	return (count);
 }
 
 /*
  * Print out a description of a vnode.
  */
 static char *typename[] =
 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD",
  "VMARKER"};
 
 void
 vn_printf(struct vnode *vp, const char *fmt, ...)
 {
 	va_list ap;
 	char buf[256], buf2[16];
 	u_long flags;
 
 	va_start(ap, fmt);
 	vprintf(fmt, ap);
 	va_end(ap);
 	printf("%p: ", (void *)vp);
 	printf("type %s\n", typename[vp->v_type]);
 	printf("    usecount %d, writecount %d, refcount %d",
 	    vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
 	switch (vp->v_type) {
 	case VDIR:
 		printf(" mountedhere %p\n", vp->v_mountedhere);
 		break;
 	case VCHR:
 		printf(" rdev %p\n", vp->v_rdev);
 		break;
 	case VSOCK:
 		printf(" socket %p\n", vp->v_unpcb);
 		break;
 	case VFIFO:
 		printf(" fifoinfo %p\n", vp->v_fifoinfo);
 		break;
 	default:
 		printf("\n");
 		break;
 	}
 	buf[0] = '\0';
 	buf[1] = '\0';
 	if (vp->v_irflag & VIRF_DOOMED)
 		strlcat(buf, "|VIRF_DOOMED", sizeof(buf));
 	flags = vp->v_irflag & ~(VIRF_DOOMED);
 	if (flags != 0) {
 		snprintf(buf2, sizeof(buf2), "|VIRF(0x%lx)", flags);
 		strlcat(buf, buf2, sizeof(buf));
 	}
 	if (vp->v_vflag & VV_ROOT)
 		strlcat(buf, "|VV_ROOT", sizeof(buf));
 	if (vp->v_vflag & VV_ISTTY)
 		strlcat(buf, "|VV_ISTTY", sizeof(buf));
 	if (vp->v_vflag & VV_NOSYNC)
 		strlcat(buf, "|VV_NOSYNC", sizeof(buf));
 	if (vp->v_vflag & VV_ETERNALDEV)
 		strlcat(buf, "|VV_ETERNALDEV", sizeof(buf));
 	if (vp->v_vflag & VV_CACHEDLABEL)
 		strlcat(buf, "|VV_CACHEDLABEL", sizeof(buf));
 	if (vp->v_vflag & VV_VMSIZEVNLOCK)
 		strlcat(buf, "|VV_VMSIZEVNLOCK", sizeof(buf));
 	if (vp->v_vflag & VV_COPYONWRITE)
 		strlcat(buf, "|VV_COPYONWRITE", sizeof(buf));
 	if (vp->v_vflag & VV_SYSTEM)
 		strlcat(buf, "|VV_SYSTEM", sizeof(buf));
 	if (vp->v_vflag & VV_PROCDEP)
 		strlcat(buf, "|VV_PROCDEP", sizeof(buf));
 	if (vp->v_vflag & VV_NOKNOTE)
 		strlcat(buf, "|VV_NOKNOTE", sizeof(buf));
 	if (vp->v_vflag & VV_DELETED)
 		strlcat(buf, "|VV_DELETED", sizeof(buf));
 	if (vp->v_vflag & VV_MD)
 		strlcat(buf, "|VV_MD", sizeof(buf));
 	if (vp->v_vflag & VV_FORCEINSMQ)
 		strlcat(buf, "|VV_FORCEINSMQ", sizeof(buf));
 	if (vp->v_vflag & VV_READLINK)
 		strlcat(buf, "|VV_READLINK", sizeof(buf));
 	flags = vp->v_vflag & ~(VV_ROOT | VV_ISTTY | VV_NOSYNC | VV_ETERNALDEV |
 	    VV_CACHEDLABEL | VV_COPYONWRITE | VV_SYSTEM | VV_PROCDEP |
 	    VV_NOKNOTE | VV_DELETED | VV_MD | VV_FORCEINSMQ);
 	if (flags != 0) {
 		snprintf(buf2, sizeof(buf2), "|VV(0x%lx)", flags);
 		strlcat(buf, buf2, sizeof(buf));
 	}
 	if (vp->v_iflag & VI_TEXT_REF)
 		strlcat(buf, "|VI_TEXT_REF", sizeof(buf));
 	if (vp->v_iflag & VI_MOUNT)
 		strlcat(buf, "|VI_MOUNT", sizeof(buf));
 	if (vp->v_iflag & VI_DOINGINACT)
 		strlcat(buf, "|VI_DOINGINACT", sizeof(buf));
 	if (vp->v_iflag & VI_OWEINACT)
 		strlcat(buf, "|VI_OWEINACT", sizeof(buf));
 	if (vp->v_iflag & VI_DEFINACT)
 		strlcat(buf, "|VI_DEFINACT", sizeof(buf));
 	flags = vp->v_iflag & ~(VI_TEXT_REF | VI_MOUNT | VI_DOINGINACT |
 	    VI_OWEINACT | VI_DEFINACT);
 	if (flags != 0) {
 		snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags);
 		strlcat(buf, buf2, sizeof(buf));
 	}
 	if (vp->v_mflag & VMP_LAZYLIST)
 		strlcat(buf, "|VMP_LAZYLIST", sizeof(buf));
 	flags = vp->v_mflag & ~(VMP_LAZYLIST);
 	if (flags != 0) {
 		snprintf(buf2, sizeof(buf2), "|VMP(0x%lx)", flags);
 		strlcat(buf, buf2, sizeof(buf));
 	}
 	printf("    flags (%s)\n", buf + 1);
 	if (mtx_owned(VI_MTX(vp)))
 		printf(" VI_LOCKed");
 	if (vp->v_object != NULL)
 		printf("    v_object %p ref %d pages %d "
 		    "cleanbuf %d dirtybuf %d\n",
 		    vp->v_object, vp->v_object->ref_count,
 		    vp->v_object->resident_page_count,
 		    vp->v_bufobj.bo_clean.bv_cnt,
 		    vp->v_bufobj.bo_dirty.bv_cnt);
 	printf("    ");
 	lockmgr_printinfo(vp->v_vnlock);
 	if (vp->v_data != NULL)
 		VOP_PRINT(vp);
 }
 
 #ifdef DDB
 /*
  * List all of the locked vnodes in the system.
  * Called when debugging the kernel.
  */
 DB_SHOW_COMMAND(lockedvnods, lockedvnodes)
 {
 	struct mount *mp;
 	struct vnode *vp;
 
 	/*
 	 * Note: because this is DDB, we can't obey the locking semantics
 	 * for these structures, which means we could catch an inconsistent
 	 * state and dereference a nasty pointer.  Not much to be done
 	 * about that.
 	 */
 	db_printf("Locked vnodes\n");
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 			if (vp->v_type != VMARKER && VOP_ISLOCKED(vp))
 				vn_printf(vp, "vnode ");
 		}
 	}
 }
 
 /*
  * Show details about the given vnode.
  */
 DB_SHOW_COMMAND(vnode, db_show_vnode)
 {
 	struct vnode *vp;
 
 	if (!have_addr)
 		return;
 	vp = (struct vnode *)addr;
 	vn_printf(vp, "vnode ");
 }
 
 /*
  * Show details about the given mount point.
  */
 DB_SHOW_COMMAND(mount, db_show_mount)
 {
 	struct mount *mp;
 	struct vfsopt *opt;
 	struct statfs *sp;
 	struct vnode *vp;
 	char buf[512];
 	uint64_t mflags;
 	u_int flags;
 
 	if (!have_addr) {
 		/* No address given, print short info about all mount points. */
 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 			db_printf("%p %s on %s (%s)\n", mp,
 			    mp->mnt_stat.f_mntfromname,
 			    mp->mnt_stat.f_mntonname,
 			    mp->mnt_stat.f_fstypename);
 			if (db_pager_quit)
 				break;
 		}
 		db_printf("\nMore info: show mount <addr>\n");
 		return;
 	}
 
 	mp = (struct mount *)addr;
 	db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname,
 	    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename);
 
 	buf[0] = '\0';
 	mflags = mp->mnt_flag;
 #define	MNT_FLAG(flag)	do {						\
 	if (mflags & (flag)) {						\
 		if (buf[0] != '\0')					\
 			strlcat(buf, ", ", sizeof(buf));		\
 		strlcat(buf, (#flag) + 4, sizeof(buf));			\
 		mflags &= ~(flag);					\
 	}								\
 } while (0)
 	MNT_FLAG(MNT_RDONLY);
 	MNT_FLAG(MNT_SYNCHRONOUS);
 	MNT_FLAG(MNT_NOEXEC);
 	MNT_FLAG(MNT_NOSUID);
 	MNT_FLAG(MNT_NFS4ACLS);
 	MNT_FLAG(MNT_UNION);
 	MNT_FLAG(MNT_ASYNC);
 	MNT_FLAG(MNT_SUIDDIR);
 	MNT_FLAG(MNT_SOFTDEP);
 	MNT_FLAG(MNT_NOSYMFOLLOW);
 	MNT_FLAG(MNT_GJOURNAL);
 	MNT_FLAG(MNT_MULTILABEL);
 	MNT_FLAG(MNT_ACLS);
 	MNT_FLAG(MNT_NOATIME);
 	MNT_FLAG(MNT_NOCLUSTERR);
 	MNT_FLAG(MNT_NOCLUSTERW);
 	MNT_FLAG(MNT_SUJ);
 	MNT_FLAG(MNT_EXRDONLY);
 	MNT_FLAG(MNT_EXPORTED);
 	MNT_FLAG(MNT_DEFEXPORTED);
 	MNT_FLAG(MNT_EXPORTANON);
 	MNT_FLAG(MNT_EXKERB);
 	MNT_FLAG(MNT_EXPUBLIC);
 	MNT_FLAG(MNT_LOCAL);
 	MNT_FLAG(MNT_QUOTA);
 	MNT_FLAG(MNT_ROOTFS);
 	MNT_FLAG(MNT_USER);
 	MNT_FLAG(MNT_IGNORE);
 	MNT_FLAG(MNT_UPDATE);
 	MNT_FLAG(MNT_DELEXPORT);
 	MNT_FLAG(MNT_RELOAD);
 	MNT_FLAG(MNT_FORCE);
 	MNT_FLAG(MNT_SNAPSHOT);
 	MNT_FLAG(MNT_BYFSID);
 #undef MNT_FLAG
 	if (mflags != 0) {
 		if (buf[0] != '\0')
 			strlcat(buf, ", ", sizeof(buf));
 		snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
 		    "0x%016jx", mflags);
 	}
 	db_printf("    mnt_flag = %s\n", buf);
 
 	buf[0] = '\0';
 	flags = mp->mnt_kern_flag;
 #define	MNT_KERN_FLAG(flag)	do {					\
 	if (flags & (flag)) {						\
 		if (buf[0] != '\0')					\
 			strlcat(buf, ", ", sizeof(buf));		\
 		strlcat(buf, (#flag) + 5, sizeof(buf));			\
 		flags &= ~(flag);					\
 	}								\
 } while (0)
 	MNT_KERN_FLAG(MNTK_UNMOUNTF);
 	MNT_KERN_FLAG(MNTK_ASYNC);
 	MNT_KERN_FLAG(MNTK_SOFTDEP);
 	MNT_KERN_FLAG(MNTK_DRAINING);
 	MNT_KERN_FLAG(MNTK_REFEXPIRE);
 	MNT_KERN_FLAG(MNTK_EXTENDED_SHARED);
 	MNT_KERN_FLAG(MNTK_SHARED_WRITES);
 	MNT_KERN_FLAG(MNTK_NO_IOPF);
 	MNT_KERN_FLAG(MNTK_VGONE_UPPER);
 	MNT_KERN_FLAG(MNTK_VGONE_WAITER);
 	MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT);
 	MNT_KERN_FLAG(MNTK_MARKER);
 	MNT_KERN_FLAG(MNTK_USES_BCACHE);
 	MNT_KERN_FLAG(MNTK_NOASYNC);
 	MNT_KERN_FLAG(MNTK_UNMOUNT);
 	MNT_KERN_FLAG(MNTK_MWAIT);
 	MNT_KERN_FLAG(MNTK_SUSPEND);
 	MNT_KERN_FLAG(MNTK_SUSPEND2);
 	MNT_KERN_FLAG(MNTK_SUSPENDED);
 	MNT_KERN_FLAG(MNTK_LOOKUP_SHARED);
 	MNT_KERN_FLAG(MNTK_NOKNOTE);
 #undef MNT_KERN_FLAG
 	if (flags != 0) {
 		if (buf[0] != '\0')
 			strlcat(buf, ", ", sizeof(buf));
 		snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
 		    "0x%08x", flags);
 	}
 	db_printf("    mnt_kern_flag = %s\n", buf);
 
 	db_printf("    mnt_opt = ");
 	opt = TAILQ_FIRST(mp->mnt_opt);
 	if (opt != NULL) {
 		db_printf("%s", opt->name);
 		opt = TAILQ_NEXT(opt, link);
 		while (opt != NULL) {
 			db_printf(", %s", opt->name);
 			opt = TAILQ_NEXT(opt, link);
 		}
 	}
 	db_printf("\n");
 
 	sp = &mp->mnt_stat;
 	db_printf("    mnt_stat = { version=%u type=%u flags=0x%016jx "
 	    "bsize=%ju iosize=%ju blocks=%ju bfree=%ju bavail=%jd files=%ju "
 	    "ffree=%jd syncwrites=%ju asyncwrites=%ju syncreads=%ju "
 	    "asyncreads=%ju namemax=%u owner=%u fsid=[%d, %d] }\n",
 	    (u_int)sp->f_version, (u_int)sp->f_type, (uintmax_t)sp->f_flags,
 	    (uintmax_t)sp->f_bsize, (uintmax_t)sp->f_iosize,
 	    (uintmax_t)sp->f_blocks, (uintmax_t)sp->f_bfree,
 	    (intmax_t)sp->f_bavail, (uintmax_t)sp->f_files,
 	    (intmax_t)sp->f_ffree, (uintmax_t)sp->f_syncwrites,
 	    (uintmax_t)sp->f_asyncwrites, (uintmax_t)sp->f_syncreads,
 	    (uintmax_t)sp->f_asyncreads, (u_int)sp->f_namemax,
 	    (u_int)sp->f_owner, (int)sp->f_fsid.val[0], (int)sp->f_fsid.val[1]);
 
 	db_printf("    mnt_cred = { uid=%u ruid=%u",
 	    (u_int)mp->mnt_cred->cr_uid, (u_int)mp->mnt_cred->cr_ruid);
 	if (jailed(mp->mnt_cred))
 		db_printf(", jail=%d", mp->mnt_cred->cr_prison->pr_id);
 	db_printf(" }\n");
 	db_printf("    mnt_ref = %d (with %d in the struct)\n",
 	    vfs_mount_fetch_counter(mp, MNT_COUNT_REF), mp->mnt_ref);
 	db_printf("    mnt_gen = %d\n", mp->mnt_gen);
 	db_printf("    mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize);
 	db_printf("    mnt_lazyvnodelistsize = %d\n",
 	    mp->mnt_lazyvnodelistsize);
 	db_printf("    mnt_writeopcount = %d (with %d in the struct)\n",
 	    vfs_mount_fetch_counter(mp, MNT_COUNT_WRITEOPCOUNT), mp->mnt_writeopcount);
 	db_printf("    mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen);
 	db_printf("    mnt_iosize_max = %d\n", mp->mnt_iosize_max);
 	db_printf("    mnt_hashseed = %u\n", mp->mnt_hashseed);
 	db_printf("    mnt_lockref = %d (with %d in the struct)\n",
 	    vfs_mount_fetch_counter(mp, MNT_COUNT_LOCKREF), mp->mnt_lockref);
 	db_printf("    mnt_secondary_writes = %d\n", mp->mnt_secondary_writes);
 	db_printf("    mnt_secondary_accwrites = %d\n",
 	    mp->mnt_secondary_accwrites);
 	db_printf("    mnt_gjprovider = %s\n",
 	    mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL");
 	db_printf("    mnt_vfs_ops = %d\n", mp->mnt_vfs_ops);
 
 	db_printf("\n\nList of active vnodes\n");
 	TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 		if (vp->v_type != VMARKER && vp->v_holdcnt > 0) {
 			vn_printf(vp, "vnode ");
 			if (db_pager_quit)
 				break;
 		}
 	}
 	db_printf("\n\nList of inactive vnodes\n");
 	TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 		if (vp->v_type != VMARKER && vp->v_holdcnt == 0) {
 			vn_printf(vp, "vnode ");
 			if (db_pager_quit)
 				break;
 		}
 	}
 }
 #endif	/* DDB */
 
 /*
  * Fill in a struct xvfsconf based on a struct vfsconf.
  */
 static int
 vfsconf2x(struct sysctl_req *req, struct vfsconf *vfsp)
 {
 	struct xvfsconf xvfsp;
 
 	bzero(&xvfsp, sizeof(xvfsp));
 	strcpy(xvfsp.vfc_name, vfsp->vfc_name);
 	xvfsp.vfc_typenum = vfsp->vfc_typenum;
 	xvfsp.vfc_refcount = vfsp->vfc_refcount;
 	xvfsp.vfc_flags = vfsp->vfc_flags;
 	/*
 	 * These are unused in userland, we keep them
 	 * to not break binary compatibility.
 	 */
 	xvfsp.vfc_vfsops = NULL;
 	xvfsp.vfc_next = NULL;
 	return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp)));
 }
 
 #ifdef COMPAT_FREEBSD32
 struct xvfsconf32 {
 	uint32_t	vfc_vfsops;
 	char		vfc_name[MFSNAMELEN];
 	int32_t		vfc_typenum;
 	int32_t		vfc_refcount;
 	int32_t		vfc_flags;
 	uint32_t	vfc_next;
 };
 
 static int
 vfsconf2x32(struct sysctl_req *req, struct vfsconf *vfsp)
 {
 	struct xvfsconf32 xvfsp;
 
 	bzero(&xvfsp, sizeof(xvfsp));
 	strcpy(xvfsp.vfc_name, vfsp->vfc_name);
 	xvfsp.vfc_typenum = vfsp->vfc_typenum;
 	xvfsp.vfc_refcount = vfsp->vfc_refcount;
 	xvfsp.vfc_flags = vfsp->vfc_flags;
 	return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp)));
 }
 #endif
 
 /*
  * Top level filesystem related information gathering.
  */
 static int
 sysctl_vfs_conflist(SYSCTL_HANDLER_ARGS)
 {
 	struct vfsconf *vfsp;
 	int error;
 
 	error = 0;
 	vfsconf_slock();
 	TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
 #ifdef COMPAT_FREEBSD32
 		if (req->flags & SCTL_MASK32)
 			error = vfsconf2x32(req, vfsp);
 		else
 #endif
 			error = vfsconf2x(req, vfsp);
 		if (error)
 			break;
 	}
 	vfsconf_sunlock();
 	return (error);
 }
 
 SYSCTL_PROC(_vfs, OID_AUTO, conflist, CTLTYPE_OPAQUE | CTLFLAG_RD |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_vfs_conflist,
     "S,xvfsconf", "List of all configured filesystems");
 
 #ifndef BURN_BRIDGES
 static int	sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS);
 
 static int
 vfs_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1 - 1;	/* XXX */
 	u_int namelen = arg2 + 1;	/* XXX */
 	struct vfsconf *vfsp;
 
 	log(LOG_WARNING, "userland calling deprecated sysctl, "
 	    "please rebuild world\n");
 
 #if 1 || defined(COMPAT_PRELITE2)
 	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
 	if (namelen == 1)
 		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
 #endif
 
 	switch (name[1]) {
 	case VFS_MAXTYPENUM:
 		if (namelen != 2)
 			return (ENOTDIR);
 		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
 	case VFS_CONF:
 		if (namelen != 3)
 			return (ENOTDIR);	/* overloaded */
 		vfsconf_slock();
 		TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
 			if (vfsp->vfc_typenum == name[2])
 				break;
 		}
 		vfsconf_sunlock();
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 #ifdef COMPAT_FREEBSD32
 		if (req->flags & SCTL_MASK32)
 			return (vfsconf2x32(req, vfsp));
 		else
 #endif
 			return (vfsconf2x(req, vfsp));
 	}
 	return (EOPNOTSUPP);
 }
 
 static SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD | CTLFLAG_SKIP |
     CTLFLAG_MPSAFE, vfs_sysctl,
     "Generic filesystem");
 
 #if 1 || defined(COMPAT_PRELITE2)
 
 static int
 sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct vfsconf *vfsp;
 	struct ovfsconf ovfs;
 
 	vfsconf_slock();
 	TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
 		bzero(&ovfs, sizeof(ovfs));
 		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
 		strcpy(ovfs.vfc_name, vfsp->vfc_name);
 		ovfs.vfc_index = vfsp->vfc_typenum;
 		ovfs.vfc_refcount = vfsp->vfc_refcount;
 		ovfs.vfc_flags = vfsp->vfc_flags;
 		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
 		if (error != 0) {
 			vfsconf_sunlock();
 			return (error);
 		}
 	}
 	vfsconf_sunlock();
 	return (0);
 }
 
 #endif /* 1 || COMPAT_PRELITE2 */
 #endif /* !BURN_BRIDGES */
 
 #define KINFO_VNODESLOP		10
 #ifdef notyet
 /*
  * Dump vnode list (via sysctl).
  */
 /* ARGSUSED */
 static int
 sysctl_vnode(SYSCTL_HANDLER_ARGS)
 {
 	struct xvnode *xvn;
 	struct mount *mp;
 	struct vnode *vp;
 	int error, len, n;
 
 	/*
 	 * Stale numvnodes access is not fatal here.
 	 */
 	req->lock = 0;
 	len = (numvnodes + KINFO_VNODESLOP) * sizeof *xvn;
 	if (!req->oldptr)
 		/* Make an estimate */
 		return (SYSCTL_OUT(req, 0, len));
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	xvn = malloc(len, M_TEMP, M_ZERO | M_WAITOK);
 	n = 0;
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
 			continue;
 		MNT_ILOCK(mp);
 		TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 			if (n == len)
 				break;
 			vref(vp);
 			xvn[n].xv_size = sizeof *xvn;
 			xvn[n].xv_vnode = vp;
 			xvn[n].xv_id = 0;	/* XXX compat */
 #define XV_COPY(field) xvn[n].xv_##field = vp->v_##field
 			XV_COPY(usecount);
 			XV_COPY(writecount);
 			XV_COPY(holdcnt);
 			XV_COPY(mount);
 			XV_COPY(numoutput);
 			XV_COPY(type);
 #undef XV_COPY
 			xvn[n].xv_flag = vp->v_vflag;
 
 			switch (vp->v_type) {
 			case VREG:
 			case VDIR:
 			case VLNK:
 				break;
 			case VBLK:
 			case VCHR:
 				if (vp->v_rdev == NULL) {
 					vrele(vp);
 					continue;
 				}
 				xvn[n].xv_dev = dev2udev(vp->v_rdev);
 				break;
 			case VSOCK:
 				xvn[n].xv_socket = vp->v_socket;
 				break;
 			case VFIFO:
 				xvn[n].xv_fifo = vp->v_fifoinfo;
 				break;
 			case VNON:
 			case VBAD:
 			default:
 				/* shouldn't happen? */
 				vrele(vp);
 				continue;
 			}
 			vrele(vp);
 			++n;
 		}
 		MNT_IUNLOCK(mp);
 		mtx_lock(&mountlist_mtx);
 		vfs_unbusy(mp);
 		if (n == len)
 			break;
 	}
 	mtx_unlock(&mountlist_mtx);
 
 	error = SYSCTL_OUT(req, xvn, n * sizeof *xvn);
 	free(xvn, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE | CTLFLAG_RD |
     CTLFLAG_MPSAFE, 0, 0, sysctl_vnode, "S,xvnode",
     "");
 #endif
 
 static void
 unmount_or_warn(struct mount *mp)
 {
 	int error;
 
 	error = dounmount(mp, MNT_FORCE, curthread);
 	if (error != 0) {
 		printf("unmount of %s failed (", mp->mnt_stat.f_mntonname);
 		if (error == EBUSY)
 			printf("BUSY)\n");
 		else
 			printf("%d)\n", error);
 	}
 }
 
 /*
  * Unmount all filesystems. The list is traversed in reverse order
  * of mounting to avoid dependencies.
  */
 void
 vfs_unmountall(void)
 {
 	struct mount *mp, *tmp;
 
 	CTR1(KTR_VFS, "%s: unmounting all filesystems", __func__);
 
 	/*
 	 * Since this only runs when rebooting, it is not interlocked.
 	 */
 	TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, tmp) {
 		vfs_ref(mp);
 
 		/*
 		 * Forcibly unmounting "/dev" before "/" would prevent clean
 		 * unmount of the latter.
 		 */
 		if (mp == rootdevmp)
 			continue;
 
 		unmount_or_warn(mp);
 	}
 
 	if (rootdevmp != NULL)
 		unmount_or_warn(rootdevmp);
 }
 
 static void
 vfs_deferred_inactive(struct vnode *vp, int lkflags)
 {
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	VNASSERT((vp->v_iflag & VI_DEFINACT) == 0, vp, ("VI_DEFINACT still set"));
 	if ((vp->v_iflag & VI_OWEINACT) == 0) {
 		vdropl(vp);
 		return;
 	}
 	if (vn_lock(vp, lkflags) == 0) {
 		VI_LOCK(vp);
 		vinactive(vp);
 		VOP_UNLOCK(vp);
 		vdropl(vp);
 		return;
 	}
 	vdefer_inactive_unlocked(vp);
 }
 
 static int
 vfs_periodic_inactive_filter(struct vnode *vp, void *arg)
 {
 
 	return (vp->v_iflag & VI_DEFINACT);
 }
 
 static void __noinline
 vfs_periodic_inactive(struct mount *mp, int flags)
 {
 	struct vnode *vp, *mvp;
 	int lkflags;
 
 	lkflags = LK_EXCLUSIVE | LK_INTERLOCK;
 	if (flags != MNT_WAIT)
 		lkflags |= LK_NOWAIT;
 
 	MNT_VNODE_FOREACH_LAZY(vp, mp, mvp, vfs_periodic_inactive_filter, NULL) {
 		if ((vp->v_iflag & VI_DEFINACT) == 0) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		vp->v_iflag &= ~VI_DEFINACT;
 		vfs_deferred_inactive(vp, lkflags);
 	}
 }
 
 static inline bool
 vfs_want_msync(struct vnode *vp)
 {
 	struct vm_object *obj;
 
 	/*
 	 * This test may be performed without any locks held.
 	 * We rely on vm_object's type stability.
 	 */
 	if (vp->v_vflag & VV_NOSYNC)
 		return (false);
 	obj = vp->v_object;
 	return (obj != NULL && vm_object_mightbedirty(obj));
 }
 
 static int
 vfs_periodic_msync_inactive_filter(struct vnode *vp, void *arg __unused)
 {
 
 	if (vp->v_vflag & VV_NOSYNC)
 		return (false);
 	if (vp->v_iflag & VI_DEFINACT)
 		return (true);
 	return (vfs_want_msync(vp));
 }
 
 static void __noinline
 vfs_periodic_msync_inactive(struct mount *mp, int flags)
 {
 	struct vnode *vp, *mvp;
 	struct vm_object *obj;
 	struct thread *td;
 	int lkflags, objflags;
 	bool seen_defer;
 
 	td = curthread;
 
 	lkflags = LK_EXCLUSIVE | LK_INTERLOCK;
 	if (flags != MNT_WAIT) {
 		lkflags |= LK_NOWAIT;
 		objflags = OBJPC_NOSYNC;
 	} else {
 		objflags = OBJPC_SYNC;
 	}
 
 	MNT_VNODE_FOREACH_LAZY(vp, mp, mvp, vfs_periodic_msync_inactive_filter, NULL) {
 		seen_defer = false;
 		if (vp->v_iflag & VI_DEFINACT) {
 			vp->v_iflag &= ~VI_DEFINACT;
 			seen_defer = true;
 		}
 		if (!vfs_want_msync(vp)) {
 			if (seen_defer)
 				vfs_deferred_inactive(vp, lkflags);
 			else
 				VI_UNLOCK(vp);
 			continue;
 		}
 		if (vget(vp, lkflags, td) == 0) {
 			obj = vp->v_object;
 			if (obj != NULL && (vp->v_vflag & VV_NOSYNC) == 0) {
 				VM_OBJECT_WLOCK(obj);
 				vm_object_page_clean(obj, 0, 0, objflags);
 				VM_OBJECT_WUNLOCK(obj);
 			}
 			vput(vp);
 			if (seen_defer)
 				vdrop(vp);
 		} else {
 			if (seen_defer)
 				vdefer_inactive_unlocked(vp);
 		}
 	}
 }
 
 void
 vfs_periodic(struct mount *mp, int flags)
 {
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 
 	if ((mp->mnt_kern_flag & MNTK_NOMSYNC) != 0)
 		vfs_periodic_inactive(mp, flags);
 	else
 		vfs_periodic_msync_inactive(mp, flags);
 }
 
 static void
 destroy_vpollinfo_free(struct vpollinfo *vi)
 {
 
 	knlist_destroy(&vi->vpi_selinfo.si_note);
 	mtx_destroy(&vi->vpi_lock);
 	uma_zfree(vnodepoll_zone, vi);
 }
 
 static void
 destroy_vpollinfo(struct vpollinfo *vi)
 {
 
 	knlist_clear(&vi->vpi_selinfo.si_note, 1);
 	seldrain(&vi->vpi_selinfo);
 	destroy_vpollinfo_free(vi);
 }
 
 /*
  * Initialize per-vnode helper structure to hold poll-related state.
  */
 void
 v_addpollinfo(struct vnode *vp)
 {
 	struct vpollinfo *vi;
 
 	if (vp->v_pollinfo != NULL)
 		return;
 	vi = uma_zalloc(vnodepoll_zone, M_WAITOK | M_ZERO);
 	mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF);
 	knlist_init(&vi->vpi_selinfo.si_note, vp, vfs_knllock,
 	    vfs_knlunlock, vfs_knl_assert_locked, vfs_knl_assert_unlocked);
 	VI_LOCK(vp);
 	if (vp->v_pollinfo != NULL) {
 		VI_UNLOCK(vp);
 		destroy_vpollinfo_free(vi);
 		return;
 	}
 	vp->v_pollinfo = vi;
 	VI_UNLOCK(vp);
 }
 
 /*
  * Record a process's interest in events which might happen to
  * a vnode.  Because poll uses the historic select-style interface
  * internally, this routine serves as both the ``check for any
  * pending events'' and the ``record my interest in future events''
  * functions.  (These are done together, while the lock is held,
  * to avoid race conditions.)
  */
 int
 vn_pollrecord(struct vnode *vp, struct thread *td, int events)
 {
 
 	v_addpollinfo(vp);
 	mtx_lock(&vp->v_pollinfo->vpi_lock);
 	if (vp->v_pollinfo->vpi_revents & events) {
 		/*
 		 * This leaves events we are not interested
 		 * in available for the other process which
 		 * which presumably had requested them
 		 * (otherwise they would never have been
 		 * recorded).
 		 */
 		events &= vp->v_pollinfo->vpi_revents;
 		vp->v_pollinfo->vpi_revents &= ~events;
 
 		mtx_unlock(&vp->v_pollinfo->vpi_lock);
 		return (events);
 	}
 	vp->v_pollinfo->vpi_events |= events;
 	selrecord(td, &vp->v_pollinfo->vpi_selinfo);
 	mtx_unlock(&vp->v_pollinfo->vpi_lock);
 	return (0);
 }
 
 /*
  * Routine to create and manage a filesystem syncer vnode.
  */
 #define sync_close ((int (*)(struct  vop_close_args *))nullop)
 static int	sync_fsync(struct  vop_fsync_args *);
 static int	sync_inactive(struct  vop_inactive_args *);
 static int	sync_reclaim(struct  vop_reclaim_args *);
 
 static struct vop_vector sync_vnodeops = {
 	.vop_bypass =	VOP_EOPNOTSUPP,
 	.vop_close =	sync_close,		/* close */
 	.vop_fsync =	sync_fsync,		/* fsync */
 	.vop_inactive =	sync_inactive,	/* inactive */
 	.vop_need_inactive = vop_stdneed_inactive, /* need_inactive */
 	.vop_reclaim =	sync_reclaim,	/* reclaim */
 	.vop_lock1 =	vop_stdlock,	/* lock */
 	.vop_unlock =	vop_stdunlock,	/* unlock */
 	.vop_islocked =	vop_stdislocked,	/* islocked */
 };
 VFS_VOP_VECTOR_REGISTER(sync_vnodeops);
 
 /*
  * Create a new filesystem syncer vnode for the specified mount point.
  */
 void
 vfs_allocate_syncvnode(struct mount *mp)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 	static long start, incr, next;
 	int error;
 
 	/* Allocate a new vnode */
 	error = getnewvnode("syncer", mp, &sync_vnodeops, &vp);
 	if (error != 0)
 		panic("vfs_allocate_syncvnode: getnewvnode() failed");
 	vp->v_type = VNON;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	vp->v_vflag |= VV_FORCEINSMQ;
 	error = insmntque(vp, mp);
 	if (error != 0)
 		panic("vfs_allocate_syncvnode: insmntque() failed");
 	vp->v_vflag &= ~VV_FORCEINSMQ;
 	VOP_UNLOCK(vp);
 	/*
 	 * Place the vnode onto the syncer worklist. We attempt to
 	 * scatter them about on the list so that they will go off
 	 * at evenly distributed times even if all the filesystems
 	 * are mounted at once.
 	 */
 	next += incr;
 	if (next == 0 || next > syncer_maxdelay) {
 		start /= 2;
 		incr /= 2;
 		if (start == 0) {
 			start = syncer_maxdelay / 2;
 			incr = syncer_maxdelay;
 		}
 		next = start;
 	}
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
 	vn_syncer_add_to_worklist(bo, syncdelay > 0 ? next % syncdelay : 0);
 	/* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */
 	mtx_lock(&sync_mtx);
 	sync_vnode_count++;
 	if (mp->mnt_syncer == NULL) {
 		mp->mnt_syncer = vp;
 		vp = NULL;
 	}
 	mtx_unlock(&sync_mtx);
 	BO_UNLOCK(bo);
 	if (vp != NULL) {
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		vgone(vp);
 		vput(vp);
 	}
 }
 
 void
 vfs_deallocate_syncvnode(struct mount *mp)
 {
 	struct vnode *vp;
 
 	mtx_lock(&sync_mtx);
 	vp = mp->mnt_syncer;
 	if (vp != NULL)
 		mp->mnt_syncer = NULL;
 	mtx_unlock(&sync_mtx);
 	if (vp != NULL)
 		vrele(vp);
 }
 
 /*
  * Do a lazy sync of the filesystem.
  */
 static int
 sync_fsync(struct vop_fsync_args *ap)
 {
 	struct vnode *syncvp = ap->a_vp;
 	struct mount *mp = syncvp->v_mount;
 	int error, save;
 	struct bufobj *bo;
 
 	/*
 	 * We only need to do something if this is a lazy evaluation.
 	 */
 	if (ap->a_waitfor != MNT_LAZY)
 		return (0);
 
 	/*
 	 * Move ourselves to the back of the sync list.
 	 */
 	bo = &syncvp->v_bufobj;
 	BO_LOCK(bo);
 	vn_syncer_add_to_worklist(bo, syncdelay);
 	BO_UNLOCK(bo);
 
 	/*
 	 * Walk the list of vnodes pushing all that are dirty and
 	 * not already on the sync list.
 	 */
 	if (vfs_busy(mp, MBF_NOWAIT) != 0)
 		return (0);
 	if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) {
 		vfs_unbusy(mp);
 		return (0);
 	}
 	save = curthread_pflags_set(TDP_SYNCIO);
 	/*
 	 * The filesystem at hand may be idle with free vnodes stored in the
 	 * batch.  Return them instead of letting them stay there indefinitely.
 	 */
 	vfs_periodic(mp, MNT_NOWAIT);
 	error = VFS_SYNC(mp, MNT_LAZY);
 	curthread_pflags_restore(save);
 	vn_finished_write(mp);
 	vfs_unbusy(mp);
 	return (error);
 }
 
 /*
  * The syncer vnode is no referenced.
  */
 static int
 sync_inactive(struct vop_inactive_args *ap)
 {
 
 	vgone(ap->a_vp);
 	return (0);
 }
 
 /*
  * The syncer vnode is no longer needed and is being decommissioned.
  *
  * Modifications to the worklist must be protected by sync_mtx.
  */
 static int
 sync_reclaim(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct bufobj *bo;
 
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
 	mtx_lock(&sync_mtx);
 	if (vp->v_mount->mnt_syncer == vp)
 		vp->v_mount->mnt_syncer = NULL;
 	if (bo->bo_flag & BO_ONWORKLST) {
 		LIST_REMOVE(bo, bo_synclist);
 		syncer_worklist_len--;
 		sync_vnode_count--;
 		bo->bo_flag &= ~BO_ONWORKLST;
 	}
 	mtx_unlock(&sync_mtx);
 	BO_UNLOCK(bo);
 
 	return (0);
 }
 
 int
 vn_need_pageq_flush(struct vnode *vp)
 {
 	struct vm_object *obj;
 	int need;
 
 	MPASS(mtx_owned(VI_MTX(vp)));
 	need = 0;
 	if ((obj = vp->v_object) != NULL && (vp->v_vflag & VV_NOSYNC) == 0 &&
 	    vm_object_mightbedirty(obj))
 		need = 1;
 	return (need);
 }
 
 /*
  * Check if vnode represents a disk device
  */
 int
 vn_isdisk(struct vnode *vp, int *errp)
 {
 	int error;
 
 	if (vp->v_type != VCHR) {
 		error = ENOTBLK;
 		goto out;
 	}
 	error = 0;
 	dev_lock();
 	if (vp->v_rdev == NULL)
 		error = ENXIO;
 	else if (vp->v_rdev->si_devsw == NULL)
 		error = ENXIO;
 	else if (!(vp->v_rdev->si_devsw->d_flags & D_DISK))
 		error = ENOTBLK;
 	dev_unlock();
 out:
 	if (errp != NULL)
 		*errp = error;
 	return (error == 0);
 }
 
 /*
  * Common filesystem object access control check routine.  Accepts a
  * vnode's type, "mode", uid and gid, requested access mode, credentials,
  * and optional call-by-reference privused argument allowing vaccess()
  * to indicate to the caller whether privilege was used to satisfy the
  * request (obsoleted).  Returns 0 on success, or an errno on failure.
  */
 int
 vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid,
     accmode_t accmode, struct ucred *cred, int *privused)
 {
 	accmode_t dac_granted;
 	accmode_t priv_granted;
 
 	KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0,
 	    ("invalid bit in accmode"));
 	KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE),
 	    ("VAPPEND without VWRITE"));
 
 	/*
 	 * Look for a normal, non-privileged way to access the file/directory
 	 * as requested.  If it exists, go with that.
 	 */
 
 	if (privused != NULL)
 		*privused = 0;
 
 	dac_granted = 0;
 
 	/* Check the owner. */
 	if (cred->cr_uid == file_uid) {
 		dac_granted |= VADMIN;
 		if (file_mode & S_IXUSR)
 			dac_granted |= VEXEC;
 		if (file_mode & S_IRUSR)
 			dac_granted |= VREAD;
 		if (file_mode & S_IWUSR)
 			dac_granted |= (VWRITE | VAPPEND);
 
 		if ((accmode & dac_granted) == accmode)
 			return (0);
 
 		goto privcheck;
 	}
 
 	/* Otherwise, check the groups (first match) */
 	if (groupmember(file_gid, cred)) {
 		if (file_mode & S_IXGRP)
 			dac_granted |= VEXEC;
 		if (file_mode & S_IRGRP)
 			dac_granted |= VREAD;
 		if (file_mode & S_IWGRP)
 			dac_granted |= (VWRITE | VAPPEND);
 
 		if ((accmode & dac_granted) == accmode)
 			return (0);
 
 		goto privcheck;
 	}
 
 	/* Otherwise, check everyone else. */
 	if (file_mode & S_IXOTH)
 		dac_granted |= VEXEC;
 	if (file_mode & S_IROTH)
 		dac_granted |= VREAD;
 	if (file_mode & S_IWOTH)
 		dac_granted |= (VWRITE | VAPPEND);
 	if ((accmode & dac_granted) == accmode)
 		return (0);
 
 privcheck:
 	/*
 	 * Build a privilege mask to determine if the set of privileges
 	 * satisfies the requirements when combined with the granted mask
 	 * from above.  For each privilege, if the privilege is required,
 	 * bitwise or the request type onto the priv_granted mask.
 	 */
 	priv_granted = 0;
 
 	if (type == VDIR) {
 		/*
 		 * For directories, use PRIV_VFS_LOOKUP to satisfy VEXEC
 		 * requests, instead of PRIV_VFS_EXEC.
 		 */
 		if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) &&
 		    !priv_check_cred(cred, PRIV_VFS_LOOKUP))
 			priv_granted |= VEXEC;
 	} else {
 		/*
 		 * Ensure that at least one execute bit is on. Otherwise,
 		 * a privileged user will always succeed, and we don't want
 		 * this to happen unless the file really is executable.
 		 */
 		if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) &&
 		    (file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0 &&
 		    !priv_check_cred(cred, PRIV_VFS_EXEC))
 			priv_granted |= VEXEC;
 	}
 
 	if ((accmode & VREAD) && ((dac_granted & VREAD) == 0) &&
 	    !priv_check_cred(cred, PRIV_VFS_READ))
 		priv_granted |= VREAD;
 
 	if ((accmode & VWRITE) && ((dac_granted & VWRITE) == 0) &&
 	    !priv_check_cred(cred, PRIV_VFS_WRITE))
 		priv_granted |= (VWRITE | VAPPEND);
 
 	if ((accmode & VADMIN) && ((dac_granted & VADMIN) == 0) &&
 	    !priv_check_cred(cred, PRIV_VFS_ADMIN))
 		priv_granted |= VADMIN;
 
 	if ((accmode & (priv_granted | dac_granted)) == accmode) {
 		/* XXX audit: privilege used */
 		if (privused != NULL)
 			*privused = 1;
 		return (0);
 	}
 
 	return ((accmode & VADMIN) ? EPERM : EACCES);
 }
 
 /*
  * Credential check based on process requesting service, and per-attribute
  * permissions.
  */
 int
 extattr_check_cred(struct vnode *vp, int attrnamespace, struct ucred *cred,
     struct thread *td, accmode_t accmode)
 {
 
 	/*
 	 * Kernel-invoked always succeeds.
 	 */
 	if (cred == NOCRED)
 		return (0);
 
 	/*
 	 * Do not allow privileged processes in jail to directly manipulate
 	 * system attributes.
 	 */
 	switch (attrnamespace) {
 	case EXTATTR_NAMESPACE_SYSTEM:
 		/* Potentially should be: return (EPERM); */
 		return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM));
 	case EXTATTR_NAMESPACE_USER:
 		return (VOP_ACCESS(vp, accmode, cred, td));
 	default:
 		return (EPERM);
 	}
 }
 
 #ifdef DEBUG_VFS_LOCKS
 /*
  * This only exists to suppress warnings from unlocked specfs accesses.  It is
  * no longer ok to have an unlocked VFS.
  */
 #define	IGNORE_LOCK(vp) (KERNEL_PANICKED() || (vp) == NULL ||		\
 	(vp)->v_type == VCHR ||	(vp)->v_type == VBAD)
 
 int vfs_badlock_ddb = 1;	/* Drop into debugger on violation. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_ddb, CTLFLAG_RW, &vfs_badlock_ddb, 0,
     "Drop into debugger on lock violation");
 
 int vfs_badlock_mutex = 1;	/* Check for interlock across VOPs. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_mutex, CTLFLAG_RW, &vfs_badlock_mutex,
     0, "Check for interlock across VOPs");
 
 int vfs_badlock_print = 1;	/* Print lock violations. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_print, CTLFLAG_RW, &vfs_badlock_print,
     0, "Print lock violations");
 
 int vfs_badlock_vnode = 1;	/* Print vnode details on lock violations. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_vnode, CTLFLAG_RW, &vfs_badlock_vnode,
     0, "Print vnode details on lock violations");
 
 #ifdef KDB
 int vfs_badlock_backtrace = 1;	/* Print backtrace at lock violations. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_backtrace, CTLFLAG_RW,
     &vfs_badlock_backtrace, 0, "Print backtrace at lock violations");
 #endif
 
 static void
 vfs_badlock(const char *msg, const char *str, struct vnode *vp)
 {
 
 #ifdef KDB
 	if (vfs_badlock_backtrace)
 		kdb_backtrace();
 #endif
 	if (vfs_badlock_vnode)
 		vn_printf(vp, "vnode ");
 	if (vfs_badlock_print)
 		printf("%s: %p %s\n", str, (void *)vp, msg);
 	if (vfs_badlock_ddb)
 		kdb_enter(KDB_WHY_VFSLOCK, "lock violation");
 }
 
 void
 assert_vi_locked(struct vnode *vp, const char *str)
 {
 
 	if (vfs_badlock_mutex && !mtx_owned(VI_MTX(vp)))
 		vfs_badlock("interlock is not locked but should be", str, vp);
 }
 
 void
 assert_vi_unlocked(struct vnode *vp, const char *str)
 {
 
 	if (vfs_badlock_mutex && mtx_owned(VI_MTX(vp)))
 		vfs_badlock("interlock is locked but should not be", str, vp);
 }
 
 void
 assert_vop_locked(struct vnode *vp, const char *str)
 {
 	int locked;
 
 	if (!IGNORE_LOCK(vp)) {
 		locked = VOP_ISLOCKED(vp);
 		if (locked == 0 || locked == LK_EXCLOTHER)
 			vfs_badlock("is not locked but should be", str, vp);
 	}
 }
 
 void
 assert_vop_unlocked(struct vnode *vp, const char *str)
 {
 
 	if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) == LK_EXCLUSIVE)
 		vfs_badlock("is locked but should not be", str, vp);
 }
 
 void
 assert_vop_elocked(struct vnode *vp, const char *str)
 {
 
 	if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
 		vfs_badlock("is not exclusive locked but should be", str, vp);
 }
 #endif /* DEBUG_VFS_LOCKS */
 
 void
 vop_rename_fail(struct vop_rename_args *ap)
 {
 
 	if (ap->a_tvp != NULL)
 		vput(ap->a_tvp);
 	if (ap->a_tdvp == ap->a_tvp)
 		vrele(ap->a_tdvp);
 	else
 		vput(ap->a_tdvp);
 	vrele(ap->a_fdvp);
 	vrele(ap->a_fvp);
 }
 
 void
 vop_rename_pre(void *ap)
 {
 	struct vop_rename_args *a = ap;
 
 #ifdef DEBUG_VFS_LOCKS
 	if (a->a_tvp)
 		ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME");
 	ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME");
 	ASSERT_VI_UNLOCKED(a->a_fvp, "VOP_RENAME");
 	ASSERT_VI_UNLOCKED(a->a_fdvp, "VOP_RENAME");
 
 	/* Check the source (from). */
 	if (a->a_tdvp->v_vnlock != a->a_fdvp->v_vnlock &&
 	    (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fdvp->v_vnlock))
 		ASSERT_VOP_UNLOCKED(a->a_fdvp, "vop_rename: fdvp locked");
 	if (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fvp->v_vnlock)
 		ASSERT_VOP_UNLOCKED(a->a_fvp, "vop_rename: fvp locked");
 
 	/* Check the target. */
 	if (a->a_tvp)
 		ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked");
 	ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked");
 #endif
 	if (a->a_tdvp != a->a_fdvp)
 		vhold(a->a_fdvp);
 	if (a->a_tvp != a->a_fvp)
 		vhold(a->a_fvp);
 	vhold(a->a_tdvp);
 	if (a->a_tvp)
 		vhold(a->a_tvp);
 }
 
 #ifdef DEBUG_VFS_LOCKS
 void
 vop_strategy_pre(void *ap)
 {
 	struct vop_strategy_args *a;
 	struct buf *bp;
 
 	a = ap;
 	bp = a->a_bp;
 
 	/*
 	 * Cluster ops lock their component buffers but not the IO container.
 	 */
 	if ((bp->b_flags & B_CLUSTER) != 0)
 		return;
 
 	if (!KERNEL_PANICKED() && !BUF_ISLOCKED(bp)) {
 		if (vfs_badlock_print)
 			printf(
 			    "VOP_STRATEGY: bp is not locked but should be\n");
 		if (vfs_badlock_ddb)
 			kdb_enter(KDB_WHY_VFSLOCK, "lock violation");
 	}
 }
 
 void
 vop_lock_pre(void *ap)
 {
 	struct vop_lock1_args *a = ap;
 
 	if ((a->a_flags & LK_INTERLOCK) == 0)
 		ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK");
 	else
 		ASSERT_VI_LOCKED(a->a_vp, "VOP_LOCK");
 }
 
 void
 vop_lock_post(void *ap, int rc)
 {
 	struct vop_lock1_args *a = ap;
 
 	ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK");
 	if (rc == 0 && (a->a_flags & LK_EXCLOTHER) == 0)
 		ASSERT_VOP_LOCKED(a->a_vp, "VOP_LOCK");
 }
 
 void
 vop_unlock_pre(void *ap)
 {
 	struct vop_unlock_args *a = ap;
 
 	ASSERT_VOP_LOCKED(a->a_vp, "VOP_UNLOCK");
 }
 
 void
 vop_unlock_post(void *ap, int rc)
 {
 	return;
 }
 
 void
 vop_need_inactive_pre(void *ap)
 {
 	struct vop_need_inactive_args *a = ap;
 
 	ASSERT_VI_LOCKED(a->a_vp, "VOP_NEED_INACTIVE");
 }
 
 void
 vop_need_inactive_post(void *ap, int rc)
 {
 	struct vop_need_inactive_args *a = ap;
 
 	ASSERT_VI_LOCKED(a->a_vp, "VOP_NEED_INACTIVE");
 }
 #endif
 
 void
 vop_create_post(void *ap, int rc)
 {
 	struct vop_create_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE);
 }
 
 void
 vop_deleteextattr_post(void *ap, int rc)
 {
 	struct vop_deleteextattr_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB);
 }
 
 void
 vop_link_post(void *ap, int rc)
 {
 	struct vop_link_args *a = ap;
 
 	if (!rc) {
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_LINK);
 		VFS_KNOTE_LOCKED(a->a_tdvp, NOTE_WRITE);
 	}
 }
 
 void
 vop_mkdir_post(void *ap, int rc)
 {
 	struct vop_mkdir_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK);
 }
 
 void
 vop_mknod_post(void *ap, int rc)
 {
 	struct vop_mknod_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE);
 }
 
 void
 vop_reclaim_post(void *ap, int rc)
 {
 	struct vop_reclaim_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_REVOKE);
 }
 
 void
 vop_remove_post(void *ap, int rc)
 {
 	struct vop_remove_args *a = ap;
 
 	if (!rc) {
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE);
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE);
 	}
 }
 
 void
 vop_rename_post(void *ap, int rc)
 {
 	struct vop_rename_args *a = ap;
 	long hint;
 
 	if (!rc) {
 		hint = NOTE_WRITE;
 		if (a->a_fdvp == a->a_tdvp) {
 			if (a->a_tvp != NULL && a->a_tvp->v_type == VDIR)
 				hint |= NOTE_LINK;
 			VFS_KNOTE_UNLOCKED(a->a_fdvp, hint);
 			VFS_KNOTE_UNLOCKED(a->a_tdvp, hint);
 		} else {
 			hint |= NOTE_EXTEND;
 			if (a->a_fvp->v_type == VDIR)
 				hint |= NOTE_LINK;
 			VFS_KNOTE_UNLOCKED(a->a_fdvp, hint);
 
 			if (a->a_fvp->v_type == VDIR && a->a_tvp != NULL &&
 			    a->a_tvp->v_type == VDIR)
 				hint &= ~NOTE_LINK;
 			VFS_KNOTE_UNLOCKED(a->a_tdvp, hint);
 		}
 
 		VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME);
 		if (a->a_tvp)
 			VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE);
 	}
 	if (a->a_tdvp != a->a_fdvp)
 		vdrop(a->a_fdvp);
 	if (a->a_tvp != a->a_fvp)
 		vdrop(a->a_fvp);
 	vdrop(a->a_tdvp);
 	if (a->a_tvp)
 		vdrop(a->a_tvp);
 }
 
 void
 vop_rmdir_post(void *ap, int rc)
 {
 	struct vop_rmdir_args *a = ap;
 
 	if (!rc) {
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK);
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE);
 	}
 }
 
 void
 vop_setattr_post(void *ap, int rc)
 {
 	struct vop_setattr_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB);
 }
 
 void
 vop_setextattr_post(void *ap, int rc)
 {
 	struct vop_setextattr_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB);
 }
 
 void
 vop_symlink_post(void *ap, int rc)
 {
 	struct vop_symlink_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE);
 }
 
 void
 vop_open_post(void *ap, int rc)
 {
 	struct vop_open_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_OPEN);
 }
 
 void
 vop_close_post(void *ap, int rc)
 {
 	struct vop_close_args *a = ap;
 
 	if (!rc && (a->a_cred != NOCRED || /* filter out revokes */
 	    !VN_IS_DOOMED(a->a_vp))) {
 		VFS_KNOTE_LOCKED(a->a_vp, (a->a_fflag & FWRITE) != 0 ?
 		    NOTE_CLOSE_WRITE : NOTE_CLOSE);
 	}
 }
 
 void
 vop_read_post(void *ap, int rc)
 {
 	struct vop_read_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ);
 }
 
 void
 vop_readdir_post(void *ap, int rc)
 {
 	struct vop_readdir_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ);
 }
 
 static struct knlist fs_knlist;
 
 static void
 vfs_event_init(void *arg)
 {
 	knlist_init_mtx(&fs_knlist, NULL);
 }
 /* XXX - correct order? */
 SYSINIT(vfs_knlist, SI_SUB_VFS, SI_ORDER_ANY, vfs_event_init, NULL);
 
 void
 vfs_event_signal(fsid_t *fsid, uint32_t event, intptr_t data __unused)
 {
 
 	KNOTE_UNLOCKED(&fs_knlist, event);
 }
 
 static int	filt_fsattach(struct knote *kn);
 static void	filt_fsdetach(struct knote *kn);
 static int	filt_fsevent(struct knote *kn, long hint);
 
 struct filterops fs_filtops = {
 	.f_isfd = 0,
 	.f_attach = filt_fsattach,
 	.f_detach = filt_fsdetach,
 	.f_event = filt_fsevent
 };
 
 static int
 filt_fsattach(struct knote *kn)
 {
 
 	kn->kn_flags |= EV_CLEAR;
 	knlist_add(&fs_knlist, kn, 0);
 	return (0);
 }
 
 static void
 filt_fsdetach(struct knote *kn)
 {
 
 	knlist_remove(&fs_knlist, kn, 0);
 }
 
 static int
 filt_fsevent(struct knote *kn, long hint)
 {
 
 	kn->kn_fflags |= hint;
 	return (kn->kn_fflags != 0);
 }
 
 static int
 sysctl_vfs_ctl(SYSCTL_HANDLER_ARGS)
 {
 	struct vfsidctl vc;
 	int error;
 	struct mount *mp;
 
 	error = SYSCTL_IN(req, &vc, sizeof(vc));
 	if (error)
 		return (error);
 	if (vc.vc_vers != VFS_CTL_VERS1)
 		return (EINVAL);
 	mp = vfs_getvfs(&vc.vc_fsid);
 	if (mp == NULL)
 		return (ENOENT);
 	/* ensure that a specific sysctl goes to the right filesystem. */
 	if (strcmp(vc.vc_fstypename, "*") != 0 &&
 	    strcmp(vc.vc_fstypename, mp->mnt_vfc->vfc_name) != 0) {
 		vfs_rel(mp);
 		return (EINVAL);
 	}
 	VCTLTOREQ(&vc, req);
 	error = VFS_SYSCTL(mp, vc.vc_op, req);
 	vfs_rel(mp);
 	return (error);
 }
 
 SYSCTL_PROC(_vfs, OID_AUTO, ctl, CTLTYPE_OPAQUE | CTLFLAG_MPSAFE | CTLFLAG_WR,
     NULL, 0, sysctl_vfs_ctl, "",
     "Sysctl by fsid");
 
 /*
  * Function to initialize a va_filerev field sensibly.
  * XXX: Wouldn't a random number make a lot more sense ??
  */
 u_quad_t
 init_va_filerev(void)
 {
 	struct bintime bt;
 
 	getbinuptime(&bt);
 	return (((u_quad_t)bt.sec << 32LL) | (bt.frac >> 32LL));
 }
 
 static int	filt_vfsread(struct knote *kn, long hint);
 static int	filt_vfswrite(struct knote *kn, long hint);
 static int	filt_vfsvnode(struct knote *kn, long hint);
 static void	filt_vfsdetach(struct knote *kn);
 static struct filterops vfsread_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_vfsdetach,
 	.f_event = filt_vfsread
 };
 static struct filterops vfswrite_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_vfsdetach,
 	.f_event = filt_vfswrite
 };
 static struct filterops vfsvnode_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_vfsdetach,
 	.f_event = filt_vfsvnode
 };
 
 static void
 vfs_knllock(void *arg)
 {
 	struct vnode *vp = arg;
 
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 }
 
 static void
 vfs_knlunlock(void *arg)
 {
 	struct vnode *vp = arg;
 
 	VOP_UNLOCK(vp);
 }
 
 static void
 vfs_knl_assert_locked(void *arg)
 {
 #ifdef DEBUG_VFS_LOCKS
 	struct vnode *vp = arg;
 
 	ASSERT_VOP_LOCKED(vp, "vfs_knl_assert_locked");
 #endif
 }
 
 static void
 vfs_knl_assert_unlocked(void *arg)
 {
 #ifdef DEBUG_VFS_LOCKS
 	struct vnode *vp = arg;
 
 	ASSERT_VOP_UNLOCKED(vp, "vfs_knl_assert_unlocked");
 #endif
 }
 
 int
 vfs_kqfilter(struct vop_kqfilter_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct knote *kn = ap->a_kn;
 	struct knlist *knl;
 
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		kn->kn_fop = &vfsread_filtops;
 		break;
 	case EVFILT_WRITE:
 		kn->kn_fop = &vfswrite_filtops;
 		break;
 	case EVFILT_VNODE:
 		kn->kn_fop = &vfsvnode_filtops;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	kn->kn_hook = (caddr_t)vp;
 
 	v_addpollinfo(vp);
 	if (vp->v_pollinfo == NULL)
 		return (ENOMEM);
 	knl = &vp->v_pollinfo->vpi_selinfo.si_note;
 	vhold(vp);
 	knlist_add(knl, kn, 0);
 
 	return (0);
 }
 
 /*
  * Detach knote from vnode
  */
 static void
 filt_vfsdetach(struct knote *kn)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 
 	KASSERT(vp->v_pollinfo != NULL, ("Missing v_pollinfo"));
 	knlist_remove(&vp->v_pollinfo->vpi_selinfo.si_note, kn, 0);
 	vdrop(vp);
 }
 
 /*ARGSUSED*/
 static int
 filt_vfsread(struct knote *kn, long hint)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 	struct vattr va;
 	int res;
 
 	/*
 	 * filesystem is gone, so set the EOF flag and schedule
 	 * the knote for deletion.
 	 */
 	if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD)) {
 		VI_LOCK(vp);
 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 		VI_UNLOCK(vp);
 		return (1);
 	}
 
 	if (VOP_GETATTR(vp, &va, curthread->td_ucred))
 		return (0);
 
 	VI_LOCK(vp);
 	kn->kn_data = va.va_size - kn->kn_fp->f_offset;
 	res = (kn->kn_sfflags & NOTE_FILE_POLL) != 0 || kn->kn_data != 0;
 	VI_UNLOCK(vp);
 	return (res);
 }
 
 /*ARGSUSED*/
 static int
 filt_vfswrite(struct knote *kn, long hint)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 
 	VI_LOCK(vp);
 
 	/*
 	 * filesystem is gone, so set the EOF flag and schedule
 	 * the knote for deletion.
 	 */
 	if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD))
 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 
 	kn->kn_data = 0;
 	VI_UNLOCK(vp);
 	return (1);
 }
 
 static int
 filt_vfsvnode(struct knote *kn, long hint)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 	int res;
 
 	VI_LOCK(vp);
 	if (kn->kn_sfflags & hint)
 		kn->kn_fflags |= hint;
 	if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD)) {
 		kn->kn_flags |= EV_EOF;
 		VI_UNLOCK(vp);
 		return (1);
 	}
 	res = (kn->kn_fflags != 0);
 	VI_UNLOCK(vp);
 	return (res);
 }
 
 /*
  * Returns whether the directory is empty or not.
  * If it is empty, the return value is 0; otherwise
  * the return value is an error value (which may
  * be ENOTEMPTY).
  */
 int
 vfs_emptydir(struct vnode *vp)
 {
 	struct uio uio;
 	struct iovec iov;
 	struct dirent *dirent, *dp, *endp;
 	int error, eof;
 
 	error = 0;
 	eof = 0;
 
 	ASSERT_VOP_LOCKED(vp, "vfs_emptydir");
 
 	dirent = malloc(sizeof(struct dirent), M_TEMP, M_WAITOK);
 	iov.iov_base = dirent;
 	iov.iov_len = sizeof(struct dirent);
 
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = 0;
 	uio.uio_resid = sizeof(struct dirent);
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_READ;
 	uio.uio_td = curthread;
 
 	while (eof == 0 && error == 0) {
 		error = VOP_READDIR(vp, &uio, curthread->td_ucred, &eof,
 		    NULL, NULL);
 		if (error != 0)
 			break;
 		endp = (void *)((uint8_t *)dirent +
 		    sizeof(struct dirent) - uio.uio_resid);
 		for (dp = dirent; dp < endp;
 		     dp = (void *)((uint8_t *)dp + GENERIC_DIRSIZ(dp))) {
 			if (dp->d_type == DT_WHT)
 				continue;
 			if (dp->d_namlen == 0)
 				continue;
 			if (dp->d_type != DT_DIR &&
 			    dp->d_type != DT_UNKNOWN) {
 				error = ENOTEMPTY;
 				break;
 			}
 			if (dp->d_namlen > 2) {
 				error = ENOTEMPTY;
 				break;
 			}
 			if (dp->d_namlen == 1 &&
 			    dp->d_name[0] != '.') {
 				error = ENOTEMPTY;
 				break;
 			}
 			if (dp->d_namlen == 2 &&
 			    dp->d_name[1] != '.') {
 				error = ENOTEMPTY;
 				break;
 			}
 			uio.uio_resid = sizeof(struct dirent);
 		}
 	}
 	free(dirent, M_TEMP);
 	return (error);
 }
 
 int
 vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off)
 {
 	int error;
 
 	if (dp->d_reclen > ap->a_uio->uio_resid)
 		return (ENAMETOOLONG);
 	error = uiomove(dp, dp->d_reclen, ap->a_uio);
 	if (error) {
 		if (ap->a_ncookies != NULL) {
 			if (ap->a_cookies != NULL)
 				free(ap->a_cookies, M_TEMP);
 			ap->a_cookies = NULL;
 			*ap->a_ncookies = 0;
 		}
 		return (error);
 	}
 	if (ap->a_ncookies == NULL)
 		return (0);
 
 	KASSERT(ap->a_cookies,
 	    ("NULL ap->a_cookies value with non-NULL ap->a_ncookies!"));
 
 	*ap->a_cookies = realloc(*ap->a_cookies,
 	    (*ap->a_ncookies + 1) * sizeof(u_long), M_TEMP, M_WAITOK | M_ZERO);
 	(*ap->a_cookies)[*ap->a_ncookies] = off;
 	*ap->a_ncookies += 1;
 	return (0);
 }
 
 /*
  * Mark for update the access time of the file if the filesystem
  * supports VOP_MARKATIME.  This functionality is used by execve and
  * mmap, so we want to avoid the I/O implied by directly setting
  * va_atime for the sake of efficiency.
  */
 void
 vfs_mark_atime(struct vnode *vp, struct ucred *cred)
 {
 	struct mount *mp;
 
 	mp = vp->v_mount;
 	ASSERT_VOP_LOCKED(vp, "vfs_mark_atime");
 	if (mp != NULL && (mp->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
 		(void)VOP_MARKATIME(vp);
 }
 
 /*
  * The purpose of this routine is to remove granularity from accmode_t,
  * reducing it into standard unix access bits - VEXEC, VREAD, VWRITE,
  * VADMIN and VAPPEND.
  *
  * If it returns 0, the caller is supposed to continue with the usual
  * access checks using 'accmode' as modified by this routine.  If it
  * returns nonzero value, the caller is supposed to return that value
  * as errno.
  *
  * Note that after this routine runs, accmode may be zero.
  */
 int
 vfs_unixify_accmode(accmode_t *accmode)
 {
 	/*
 	 * There is no way to specify explicit "deny" rule using
 	 * file mode or POSIX.1e ACLs.
 	 */
 	if (*accmode & VEXPLICIT_DENY) {
 		*accmode = 0;
 		return (0);
 	}
 
 	/*
 	 * None of these can be translated into usual access bits.
 	 * Also, the common case for NFSv4 ACLs is to not contain
 	 * either of these bits. Caller should check for VWRITE
 	 * on the containing directory instead.
 	 */
 	if (*accmode & (VDELETE_CHILD | VDELETE))
 		return (EPERM);
 
 	if (*accmode & VADMIN_PERMS) {
 		*accmode &= ~VADMIN_PERMS;
 		*accmode |= VADMIN;
 	}
 
 	/*
 	 * There is no way to deny VREAD_ATTRIBUTES, VREAD_ACL
 	 * or VSYNCHRONIZE using file mode or POSIX.1e ACL.
 	 */
 	*accmode &= ~(VSTAT_PERMS | VSYNCHRONIZE);
 
 	return (0);
 }
 
 /*
  * Clear out a doomed vnode (if any) and replace it with a new one as long
  * as the fs is not being unmounted. Return the root vnode to the caller.
  */
 static int __noinline
 vfs_cache_root_fallback(struct mount *mp, int flags, struct vnode **vpp)
 {
 	struct vnode *vp;
 	int error;
 
 restart:
 	if (mp->mnt_rootvnode != NULL) {
 		MNT_ILOCK(mp);
 		vp = mp->mnt_rootvnode;
 		if (vp != NULL) {
 			if (!VN_IS_DOOMED(vp)) {
 				vrefact(vp);
 				MNT_IUNLOCK(mp);
 				error = vn_lock(vp, flags);
 				if (error == 0) {
 					*vpp = vp;
 					return (0);
 				}
 				vrele(vp);
 				goto restart;
 			}
 			/*
 			 * Clear the old one.
 			 */
 			mp->mnt_rootvnode = NULL;
 		}
 		MNT_IUNLOCK(mp);
 		if (vp != NULL) {
 			/*
 			 * Paired with a fence in vfs_op_thread_exit().
 			 */
 			atomic_thread_fence_acq();
 			vfs_op_barrier_wait(mp);
 			vrele(vp);
 		}
 	}
 	error = VFS_CACHEDROOT(mp, flags, vpp);
 	if (error != 0)
 		return (error);
 	if (mp->mnt_vfs_ops == 0) {
 		MNT_ILOCK(mp);
 		if (mp->mnt_vfs_ops != 0) {
 			MNT_IUNLOCK(mp);
 			return (0);
 		}
 		if (mp->mnt_rootvnode == NULL) {
 			vrefact(*vpp);
 			mp->mnt_rootvnode = *vpp;
 		} else {
 			if (mp->mnt_rootvnode != *vpp) {
 				if (!VN_IS_DOOMED(mp->mnt_rootvnode)) {
 					panic("%s: mismatch between vnode returned "
 					    " by VFS_CACHEDROOT and the one cached "
 					    " (%p != %p)",
 					    __func__, *vpp, mp->mnt_rootvnode);
 				}
 			}
 		}
 		MNT_IUNLOCK(mp);
 	}
 	return (0);
 }
 
 int
 vfs_cache_root(struct mount *mp, int flags, struct vnode **vpp)
 {
 	struct vnode *vp;
 	int error;
 
 	if (!vfs_op_thread_enter(mp))
 		return (vfs_cache_root_fallback(mp, flags, vpp));
 	vp = (struct vnode *)atomic_load_ptr(&mp->mnt_rootvnode);
 	if (vp == NULL || VN_IS_DOOMED(vp)) {
 		vfs_op_thread_exit(mp);
 		return (vfs_cache_root_fallback(mp, flags, vpp));
 	}
 	vrefact(vp);
 	vfs_op_thread_exit(mp);
 	error = vn_lock(vp, flags);
 	if (error != 0) {
 		vrele(vp);
 		return (vfs_cache_root_fallback(mp, flags, vpp));
 	}
 	*vpp = vp;
 	return (0);
 }
 
 struct vnode *
 vfs_cache_root_clear(struct mount *mp)
 {
 	struct vnode *vp;
 
 	/*
 	 * ops > 0 guarantees there is nobody who can see this vnode
 	 */
 	MPASS(mp->mnt_vfs_ops > 0);
 	vp = mp->mnt_rootvnode;
 	mp->mnt_rootvnode = NULL;
 	return (vp);
 }
 
 void
 vfs_cache_root_set(struct mount *mp, struct vnode *vp)
 {
 
 	MPASS(mp->mnt_vfs_ops > 0);
 	vrefact(vp);
 	mp->mnt_rootvnode = vp;
 }
 
 /*
  * These are helper functions for filesystems to traverse all
  * their vnodes.  See MNT_VNODE_FOREACH_ALL() in sys/mount.h.
  *
  * This interface replaces MNT_VNODE_FOREACH.
  */
 
 
 struct vnode *
 __mnt_vnode_next_all(struct vnode **mvp, struct mount *mp)
 {
 	struct vnode *vp;
 
 	if (should_yield())
 		kern_yield(PRI_USER);
 	MNT_ILOCK(mp);
 	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 	for (vp = TAILQ_NEXT(*mvp, v_nmntvnodes); vp != NULL;
 	    vp = TAILQ_NEXT(vp, v_nmntvnodes)) {
 		/* Allow a racy peek at VIRF_DOOMED to save a lock acquisition. */
 		if (vp->v_type == VMARKER || VN_IS_DOOMED(vp))
 			continue;
 		VI_LOCK(vp);
 		if (VN_IS_DOOMED(vp)) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		break;
 	}
 	if (vp == NULL) {
 		__mnt_vnode_markerfree_all(mvp, mp);
 		/* MNT_IUNLOCK(mp); -- done in above function */
 		mtx_assert(MNT_MTX(mp), MA_NOTOWNED);
 		return (NULL);
 	}
 	TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 	TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 	MNT_IUNLOCK(mp);
 	return (vp);
 }
 
 struct vnode *
 __mnt_vnode_first_all(struct vnode **mvp, struct mount *mp)
 {
 	struct vnode *vp;
 
 	*mvp = vn_alloc_marker(mp);
 	MNT_ILOCK(mp);
 	MNT_REF(mp);
 
 	TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 		/* Allow a racy peek at VIRF_DOOMED to save a lock acquisition. */
 		if (vp->v_type == VMARKER || VN_IS_DOOMED(vp))
 			continue;
 		VI_LOCK(vp);
 		if (VN_IS_DOOMED(vp)) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		break;
 	}
 	if (vp == NULL) {
 		MNT_REL(mp);
 		MNT_IUNLOCK(mp);
 		vn_free_marker(*mvp);
 		*mvp = NULL;
 		return (NULL);
 	}
 	TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 	MNT_IUNLOCK(mp);
 	return (vp);
 }
 
 void
 __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp)
 {
 
 	if (*mvp == NULL) {
 		MNT_IUNLOCK(mp);
 		return;
 	}
 
 	mtx_assert(MNT_MTX(mp), MA_OWNED);
 
 	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 	TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 	MNT_REL(mp);
 	MNT_IUNLOCK(mp);
 	vn_free_marker(*mvp);
 	*mvp = NULL;
 }
 
 /*
  * These are helper functions for filesystems to traverse their
  * lazy vnodes.  See MNT_VNODE_FOREACH_LAZY() in sys/mount.h
  */
 static void
 mnt_vnode_markerfree_lazy(struct vnode **mvp, struct mount *mp)
 {
 
 	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 
 	MNT_ILOCK(mp);
 	MNT_REL(mp);
 	MNT_IUNLOCK(mp);
 	vn_free_marker(*mvp);
 	*mvp = NULL;
 }
 
 /*
  * Relock the mp mount vnode list lock with the vp vnode interlock in the
  * conventional lock order during mnt_vnode_next_lazy iteration.
  *
  * On entry, the mount vnode list lock is held and the vnode interlock is not.
  * The list lock is dropped and reacquired.  On success, both locks are held.
  * On failure, the mount vnode list lock is held but the vnode interlock is
  * not, and the procedure may have yielded.
  */
 static bool
 mnt_vnode_next_lazy_relock(struct vnode *mvp, struct mount *mp,
     struct vnode *vp)
 {
 	const struct vnode *tmp;
 	bool held, ret;
 
 	VNASSERT(mvp->v_mount == mp && mvp->v_type == VMARKER &&
 	    TAILQ_NEXT(mvp, v_lazylist) != NULL, mvp,
 	    ("%s: bad marker", __func__));
 	VNASSERT(vp->v_mount == mp && vp->v_type != VMARKER, vp,
 	    ("%s: inappropriate vnode", __func__));
 	ASSERT_VI_UNLOCKED(vp, __func__);
 	mtx_assert(&mp->mnt_listmtx, MA_OWNED);
 
 	ret = false;
 
 	TAILQ_REMOVE(&mp->mnt_lazyvnodelist, mvp, v_lazylist);
 	TAILQ_INSERT_BEFORE(vp, mvp, v_lazylist);
 
 	/*
 	 * Use a hold to prevent vp from disappearing while the mount vnode
 	 * list lock is dropped and reacquired.  Normally a hold would be
 	 * acquired with vhold(), but that might try to acquire the vnode
 	 * interlock, which would be a LOR with the mount vnode list lock.
 	 */
 	held = refcount_acquire_if_not_zero(&vp->v_holdcnt);
 	mtx_unlock(&mp->mnt_listmtx);
 	if (!held)
 		goto abort;
 	VI_LOCK(vp);
 	if (!refcount_release_if_not_last(&vp->v_holdcnt)) {
 		vdropl(vp);
 		goto abort;
 	}
 	mtx_lock(&mp->mnt_listmtx);
 
 	/*
 	 * Determine whether the vnode is still the next one after the marker,
 	 * excepting any other markers.  If the vnode has not been doomed by
 	 * vgone() then the hold should have ensured that it remained on the
 	 * lazy list.  If it has been doomed but is still on the lazy list,
 	 * don't abort, but rather skip over it (avoid spinning on doomed
 	 * vnodes).
 	 */
 	tmp = mvp;
 	do {
 		tmp = TAILQ_NEXT(tmp, v_lazylist);
 	} while (tmp != NULL && tmp->v_type == VMARKER);
 	if (tmp != vp) {
 		mtx_unlock(&mp->mnt_listmtx);
 		VI_UNLOCK(vp);
 		goto abort;
 	}
 
 	ret = true;
 	goto out;
 abort:
 	maybe_yield();
 	mtx_lock(&mp->mnt_listmtx);
 out:
 	if (ret)
 		ASSERT_VI_LOCKED(vp, __func__);
 	else
 		ASSERT_VI_UNLOCKED(vp, __func__);
 	mtx_assert(&mp->mnt_listmtx, MA_OWNED);
 	return (ret);
 }
 
 static struct vnode *
 mnt_vnode_next_lazy(struct vnode **mvp, struct mount *mp, mnt_lazy_cb_t *cb,
     void *cbarg)
 {
 	struct vnode *vp, *nvp;
 
 	mtx_assert(&mp->mnt_listmtx, MA_OWNED);
 	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 restart:
 	vp = TAILQ_NEXT(*mvp, v_lazylist);
 	while (vp != NULL) {
 		if (vp->v_type == VMARKER) {
 			vp = TAILQ_NEXT(vp, v_lazylist);
 			continue;
 		}
 		/*
 		 * See if we want to process the vnode. Note we may encounter a
 		 * long string of vnodes we don't care about and hog the list
 		 * as a result. Check for it and requeue the marker.
 		 */
 		if (VN_IS_DOOMED(vp) || !cb(vp, cbarg)) {
 			if (!should_yield()) {
 				vp = TAILQ_NEXT(vp, v_lazylist);
 				continue;
 			}
 			TAILQ_REMOVE(&mp->mnt_lazyvnodelist, *mvp,
 			    v_lazylist);
 			TAILQ_INSERT_AFTER(&mp->mnt_lazyvnodelist, vp, *mvp,
 			    v_lazylist);
 			mtx_unlock(&mp->mnt_listmtx);
 			kern_yield(PRI_USER);
 			mtx_lock(&mp->mnt_listmtx);
 			goto restart;
 		}
 		/*
 		 * Try-lock because this is the wrong lock order.  If that does
 		 * not succeed, drop the mount vnode list lock and try to
 		 * reacquire it and the vnode interlock in the right order.
 		 */
 		if (!VI_TRYLOCK(vp) &&
 		    !mnt_vnode_next_lazy_relock(*mvp, mp, vp))
 			goto restart;
 		KASSERT(vp->v_type != VMARKER, ("locked marker %p", vp));
 		KASSERT(vp->v_mount == mp || vp->v_mount == NULL,
 		    ("alien vnode on the lazy list %p %p", vp, mp));
 		if (vp->v_mount == mp && !VN_IS_DOOMED(vp))
 			break;
 		nvp = TAILQ_NEXT(vp, v_lazylist);
 		VI_UNLOCK(vp);
 		vp = nvp;
 	}
 	TAILQ_REMOVE(&mp->mnt_lazyvnodelist, *mvp, v_lazylist);
 
 	/* Check if we are done */
 	if (vp == NULL) {
 		mtx_unlock(&mp->mnt_listmtx);
 		mnt_vnode_markerfree_lazy(mvp, mp);
 		return (NULL);
 	}
 	TAILQ_INSERT_AFTER(&mp->mnt_lazyvnodelist, vp, *mvp, v_lazylist);
 	mtx_unlock(&mp->mnt_listmtx);
 	ASSERT_VI_LOCKED(vp, "lazy iter");
 	return (vp);
 }
 
 struct vnode *
 __mnt_vnode_next_lazy(struct vnode **mvp, struct mount *mp, mnt_lazy_cb_t *cb,
     void *cbarg)
 {
 
 	if (should_yield())
 		kern_yield(PRI_USER);
 	mtx_lock(&mp->mnt_listmtx);
 	return (mnt_vnode_next_lazy(mvp, mp, cb, cbarg));
 }
 
 struct vnode *
 __mnt_vnode_first_lazy(struct vnode **mvp, struct mount *mp, mnt_lazy_cb_t *cb,
     void *cbarg)
 {
 	struct vnode *vp;
+
+	if (TAILQ_EMPTY(&mp->mnt_lazyvnodelist))
+		return (NULL);
 
 	*mvp = vn_alloc_marker(mp);
 	MNT_ILOCK(mp);
 	MNT_REF(mp);
 	MNT_IUNLOCK(mp);
 
 	mtx_lock(&mp->mnt_listmtx);
 	vp = TAILQ_FIRST(&mp->mnt_lazyvnodelist);
 	if (vp == NULL) {
 		mtx_unlock(&mp->mnt_listmtx);
 		mnt_vnode_markerfree_lazy(mvp, mp);
 		return (NULL);
 	}
 	TAILQ_INSERT_BEFORE(vp, *mvp, v_lazylist);
 	return (mnt_vnode_next_lazy(mvp, mp, cb, cbarg));
 }
 
 void
 __mnt_vnode_markerfree_lazy(struct vnode **mvp, struct mount *mp)
 {
 
 	if (*mvp == NULL)
 		return;
 
 	mtx_lock(&mp->mnt_listmtx);
 	TAILQ_REMOVE(&mp->mnt_lazyvnodelist, *mvp, v_lazylist);
 	mtx_unlock(&mp->mnt_listmtx);
 	mnt_vnode_markerfree_lazy(mvp, mp);
 }
Index: projects/clang1000-import/sys/netinet/ip_divert.c
===================================================================
--- projects/clang1000-import/sys/netinet/ip_divert.c	(revision 357178)
+++ projects/clang1000-import/sys/netinet/ip_divert.c	(revision 357179)
@@ -1,835 +1,837 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_sctp.h"
 #ifndef INET
 #error "IPDIVERT requires INET"
 #endif
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <net/vnet.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h> 
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif
 #ifdef SCTP
 #include <netinet/sctp_crc32.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 /*
  * Divert sockets
  */
 
 /*
  * Allocate enough space to hold a full IP packet
  */
 #define	DIVSNDQ		(65536 + 100)
 #define	DIVRCVQ		(65536 + 100)
 
 /*
  * Divert sockets work in conjunction with ipfw or other packet filters,
  * see the divert(4) manpage for features.
  * Packets are selected by the packet filter and tagged with an
  * MTAG_IPFW_RULE tag carrying the 'divert port' number (as set by
  * the packet filter) and information on the matching filter rule for
  * subsequent reinjection. The divert_port is used to put the packet
  * on the corresponding divert socket, while the rule number is passed
  * up (at least partially) as the sin_port in the struct sockaddr.
  *
  * Packets written to the divert socket carry in sin_addr a
  * destination address, and in sin_port the number of the filter rule
  * after which to continue processing.
  * If the destination address is INADDR_ANY, the packet is treated as
  * as outgoing and sent to ip_output(); otherwise it is treated as
  * incoming and sent to ip_input().
  * Further, sin_zero carries some information on the interface,
  * which can be used in the reinject -- see comments in the code.
  *
  * On reinjection, processing in ip_input() and ip_output()
  * will be exactly the same as for the original packet, except that
  * packet filter processing will start at the rule number after the one
  * written in the sin_port (ipfw does not allow a rule #0, so sin_port=0
  * will apply the entire ruleset to the packet).
  */
 
 /* Internal variables. */
 VNET_DEFINE_STATIC(struct inpcbhead, divcb);
 VNET_DEFINE_STATIC(struct inpcbinfo, divcbinfo);
 
 #define	V_divcb				VNET(divcb)
 #define	V_divcbinfo			VNET(divcbinfo)
 
 static u_long	div_sendspace = DIVSNDQ;	/* XXX sysctl ? */
 static u_long	div_recvspace = DIVRCVQ;	/* XXX sysctl ? */
 
 static eventhandler_tag ip_divert_event_tag;
 
 static int div_output_inbound(int fmaily, struct socket *so, struct mbuf *m,
     struct sockaddr_in *sin);
 static int div_output_outbound(int family, struct socket *so, struct mbuf *m);
 
 /*
  * Initialize divert connection block queue.
  */
 static void
 div_zone_change(void *tag)
 {
 
 	uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets);
 }
 
 static int
 div_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp = mem;
 
 	INP_LOCK_INIT(inp, "inp", "divinp");
 	return (0);
 }
 
 static void
 div_init(void)
 {
 
 	/*
 	 * XXX We don't use the hash list for divert IP, but it's easier to
 	 * allocate one-entry hash lists than it is to check all over the
 	 * place for hashbase == NULL.
 	 */
 	in_pcbinfo_init(&V_divcbinfo, "div", &V_divcb, 1, 1, "divcb",
 	    div_inpcb_init, IPI_HASHFIELDS_NONE);
 }
 
 static void
 div_destroy(void *unused __unused)
 {
 
 	in_pcbinfo_destroy(&V_divcbinfo);
 }
 VNET_SYSUNINIT(divert, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
     div_destroy, NULL);
 
 /*
  * IPPROTO_DIVERT is not in the real IP protocol number space; this
  * function should never be called.  Just in case, drop any packets.
  */
 static int
 div_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 
 	KMOD_IPSTAT_INC(ips_noproto);
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 /*
  * Divert a packet by passing it up to the divert socket at port 'port'.
  *
  * Setup generic address and protocol structures for div_input routine,
  * then pass them along with mbuf chain.
  */
 static void
 divert_packet(struct mbuf *m, bool incoming)
 {
 	struct ip *ip;
 	struct inpcb *inp;
 	struct socket *sa;
 	u_int16_t nport;
 	struct sockaddr_in divsrc;
 	struct m_tag *mtag;
 
 	NET_EPOCH_ASSERT();
 
 	mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL);
 	if (mtag == NULL) {
 		m_freem(m);
 		return;
 	}
 	/* Assure header */
 	if (m->m_len < sizeof(struct ip) &&
 	    (m = m_pullup(m, sizeof(struct ip))) == NULL)
 		return;
 	ip = mtod(m, struct ip *);
 
 	/* Delayed checksums are currently not compatible with divert. */
 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 		in_delayed_cksum(m);
 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #ifdef SCTP
 	if (m->m_pkthdr.csum_flags & CSUM_SCTP) {
 		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
 		m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 #endif
 	bzero(&divsrc, sizeof(divsrc));
 	divsrc.sin_len = sizeof(divsrc);
 	divsrc.sin_family = AF_INET;
 	/* record matching rule, in host format */
 	divsrc.sin_port = ((struct ipfw_rule_ref *)(mtag+1))->rulenum;
 	/*
 	 * Record receive interface address, if any.
 	 * But only for incoming packets.
 	 */
 	if (incoming) {
 		struct ifaddr *ifa;
 		struct ifnet *ifp;
 
 		/* Sanity check */
 		M_ASSERTPKTHDR(m);
 
 		/* Find IP address for receive interface */
 		ifp = m->m_pkthdr.rcvif;
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			divsrc.sin_addr =
 			    ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr;
 			break;
 		}
 	}
 	/*
 	 * Record the incoming interface name whenever we have one.
 	 */
 	if (m->m_pkthdr.rcvif) {
 		/*
 		 * Hide the actual interface name in there in the 
 		 * sin_zero array. XXX This needs to be moved to a
 		 * different sockaddr type for divert, e.g.
 		 * sockaddr_div with multiple fields like 
 		 * sockaddr_dl. Presently we have only 7 bytes
 		 * but that will do for now as most interfaces
 		 * are 4 or less + 2 or less bytes for unit.
 		 * There is probably a faster way of doing this,
 		 * possibly taking it from the sockaddr_dl on the iface.
 		 * This solves the problem of a P2P link and a LAN interface
 		 * having the same address, which can result in the wrong
 		 * interface being assigned to the packet when fed back
 		 * into the divert socket. Theoretically if the daemon saves
 		 * and re-uses the sockaddr_in as suggested in the man pages,
 		 * this iface name will come along for the ride.
 		 * (see div_output for the other half of this.)
 		 */ 
 		strlcpy(divsrc.sin_zero, m->m_pkthdr.rcvif->if_xname,
 		    sizeof(divsrc.sin_zero));
 	}
 
 	/* Put packet on socket queue, if any */
 	sa = NULL;
 	nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info));
 	CK_LIST_FOREACH(inp, &V_divcb, inp_list) {
 		/* XXX why does only one socket match? */
 		if (inp->inp_lport == nport) {
 			INP_RLOCK(inp);
 			sa = inp->inp_socket;
 			SOCKBUF_LOCK(&sa->so_rcv);
 			if (sbappendaddr_locked(&sa->so_rcv,
 			    (struct sockaddr *)&divsrc, m,
 			    (struct mbuf *)0) == 0) {
 				SOCKBUF_UNLOCK(&sa->so_rcv);
 				sa = NULL;	/* force mbuf reclaim below */
 			} else
 				sorwakeup_locked(sa);
 			INP_RUNLOCK(inp);
 			break;
 		}
 	}
 	if (sa == NULL) {
 		m_freem(m);
 		KMOD_IPSTAT_INC(ips_noproto);
 		KMOD_IPSTAT_DEC(ips_delivered);
         }
 }
 
 /*
  * Deliver packet back into the IP processing machinery.
  *
  * If no address specified, or address is 0.0.0.0, send to ip_output();
  * otherwise, send to ip_input() and mark as having been received on
  * the interface with that address.
  */
 static int
 div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
     struct mbuf *control)
 {
 	struct epoch_tracker et;
 	const struct ip *ip;
 	struct m_tag *mtag;
 	struct ipfw_rule_ref *dt;
 	int error, family;
 
 	/*
 	 * An mbuf may hasn't come from userland, but we pretend
 	 * that it has.
 	 */
 	m->m_pkthdr.rcvif = NULL;
 	m->m_nextpkt = NULL;
 	M_SETFIB(m, so->so_fibnum);
 
 	if (control)
 		m_freem(control);		/* XXX */
 
 	mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL);
 	if (mtag == NULL) {
 		/* this should be normal */
 		mtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
 		    sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
 		if (mtag == NULL) {
 			m_freem(m);
 			return (ENOBUFS);
 		}
 		m_tag_prepend(m, mtag);
 	}
 	dt = (struct ipfw_rule_ref *)(mtag+1);
 
 	/* Loopback avoidance and state recovery */
 	if (sin) {
 		int i;
 
 		/* set the starting point. We provide a non-zero slot,
 		 * but a non_matching chain_id to skip that info and use
 		 * the rulenum/rule_id.
 		 */
 		dt->slot = 1; /* dummy, chain_id is invalid */
 		dt->chain_id = 0;
 		dt->rulenum = sin->sin_port+1; /* host format ? */
 		dt->rule_id = 0;
 		/* XXX: broken for IPv6 */
 		/*
 		 * Find receive interface with the given name, stuffed
 		 * (if it exists) in the sin_zero[] field.
 		 * The name is user supplied data so don't trust its size
 		 * or that it is zero terminated.
 		 */
 		for (i = 0; i < sizeof(sin->sin_zero) && sin->sin_zero[i]; i++)
 			;
 		if ( i > 0 && i < sizeof(sin->sin_zero))
 			m->m_pkthdr.rcvif = ifunit(sin->sin_zero);
 	}
 
 	ip = mtod(m, struct ip *);
 	switch (ip->ip_v) {
 	case IPVERSION:
 		family = AF_INET;
 		break;
+#ifdef INET6
 	case IPV6_VERSION >> 4:
 		family = AF_INET6;
 		break;
+#endif
 	default:
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 
 	/* Reinject packet into the system as incoming or outgoing */
 	NET_EPOCH_ENTER(et);
 	if (!sin || sin->sin_addr.s_addr == 0) {
 		dt->info |= IPFW_IS_DIVERT | IPFW_INFO_OUT;
 		error = div_output_outbound(family, so, m);
 	} else {
 		dt->info |= IPFW_IS_DIVERT | IPFW_INFO_IN;
 		error = div_output_inbound(family, so, m, sin);
 	}
 	NET_EPOCH_EXIT(et);
 
 	if (error != 0)
 		m_freem(m);
 
 	return (error);
 }
 
 /*
  * Sends mbuf @m to the wire via ip[6]_output().
  *
  * Returns 0 on success, @m is consumed.
  * On failure, returns error code. It is caller responsibility to free @m.
  */
 static int
 div_output_outbound(int family, struct socket *so, struct mbuf *m)
 {
 	struct ip *const ip = mtod(m, struct ip *);
 	struct mbuf *options;
 	struct inpcb *inp;
 	int error;
 
 	inp = sotoinpcb(so);
 	INP_RLOCK(inp);
 	switch (family) {
 	case AF_INET:
 		/*
 		 * Don't allow both user specified and setsockopt
 		 * options, and don't allow packet length sizes that
 		 * will crash.
 		 */
 		if ((((ip->ip_hl << 2) != sizeof(struct ip)) &&
 		    inp->inp_options != NULL) ||
 		    ((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) {
 			INP_RUNLOCK(inp);
 			return (EINVAL);
 		}
 		break;
 #ifdef INET6
 	case AF_INET6:
 	    {
 		struct ip6_hdr *const ip6 = mtod(m, struct ip6_hdr *);
 
 		/* Don't allow packet length sizes that will crash */
 		if (((u_short)ntohs(ip6->ip6_plen) > m->m_pkthdr.len)) {
 			INP_RUNLOCK(inp);
 			return (EINVAL);
 		}
 		break;
 	    }
 #endif
 	}
 
 	/* Send packet to output processing */
 	KMOD_IPSTAT_INC(ips_rawout);		/* XXX */
 
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 	/*
 	 * Get ready to inject the packet into ip_output().
 	 * Just in case socket options were specified on the
 	 * divert socket, we duplicate them.  This is done
 	 * to avoid having to hold the PCB locks over the call
 	 * to ip_output(), as doing this results in a number of
 	 * lock ordering complexities.
 	 *
 	 * Note that we set the multicast options argument for
 	 * ip_output() to NULL since it should be invariant that
 	 * they are not present.
 	 */
 	KASSERT(inp->inp_moptions == NULL,
 	    ("multicast options set on a divert socket"));
 	/*
 	 * XXXCSJP: It is unclear to me whether or not it makes
 	 * sense for divert sockets to have options.  However,
 	 * for now we will duplicate them with the INP locks
 	 * held so we can use them in ip_output() without
 	 * requring a reference to the pcb.
 	 */
 	options = NULL;
 	if (inp->inp_options != NULL) {
 		options = m_dup(inp->inp_options, M_NOWAIT);
 		if (options == NULL) {
 			INP_RUNLOCK(inp);
 			return (ENOBUFS);
 		}
 	}
 	INP_RUNLOCK(inp);
 
 	error = 0;
 	switch (family) {
 	case AF_INET:
 		error = ip_output(m, options, NULL,
 		    ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0)
 		    | IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL);
 		break;
 #ifdef INET6
 	case AF_INET6:
 		error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 		break;
 #endif
 	}
 	if (options != NULL)
 		m_freem(options);
 
 	return (error);
 }
 
 /*
  * Schedules mbuf @m for local processing via IPv4/IPv6 netisr queue.
  *
  * Returns 0 on success, @m is consumed.
  * Returns error code on failure. It is caller responsibility to free @m.
  */
 static int
 div_output_inbound(int family, struct socket *so, struct mbuf *m,
     struct sockaddr_in *sin)
 {
 	const struct ip *ip;
 	struct ifaddr *ifa;
 
 	if (m->m_pkthdr.rcvif == NULL) {
 		/*
 		 * No luck with the name, check by IP address.
 		 * Clear the port and the ifname to make sure
 		 * there are no distractions for ifa_ifwithaddr.
 		 */
 
 		/* XXX: broken for IPv6 */
 		bzero(sin->sin_zero, sizeof(sin->sin_zero));
 		sin->sin_port = 0;
 		ifa = ifa_ifwithaddr((struct sockaddr *) sin);
 		if (ifa == NULL)
 			return (EADDRNOTAVAIL);
 		m->m_pkthdr.rcvif = ifa->ifa_ifp;
 	}
 #ifdef MAC
 	mac_socket_create_mbuf(so, m);
 #endif
 	/* Send packet to input processing via netisr */
 	switch (family) {
 	case AF_INET:
 		ip = mtod(m, struct ip *);
 		/*
 		 * Restore M_BCAST flag when destination address is
 		 * broadcast. It is expected by ip_tryforward().
 		 */
 		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)))
 			m->m_flags |= M_MCAST;
 		else if (in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
 			m->m_flags |= M_BCAST;
 		netisr_queue_src(NETISR_IP, (uintptr_t)so, m);
 		break;
 #ifdef INET6
 	case AF_INET6:
 		netisr_queue_src(NETISR_IPV6, (uintptr_t)so, m);
 		break;
 #endif
 	default:
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 static int
 div_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	int error;
 
 	inp  = sotoinpcb(so);
 	KASSERT(inp == NULL, ("div_attach: inp != NULL"));
 	if (td != NULL) {
 		error = priv_check(td, PRIV_NETINET_DIVERT);
 		if (error)
 			return (error);
 	}
 	error = soreserve(so, div_sendspace, div_recvspace);
 	if (error)
 		return error;
 	INP_INFO_WLOCK(&V_divcbinfo);
 	error = in_pcballoc(so, &V_divcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(&V_divcbinfo);
 		return error;
 	}
 	inp = (struct inpcb *)so->so_pcb;
 	INP_INFO_WUNLOCK(&V_divcbinfo);
 	inp->inp_ip_p = proto;
 	inp->inp_vflag |= INP_IPV4;
 	inp->inp_flags |= INP_HDRINCL;
 	INP_WUNLOCK(inp);
 	return 0;
 }
 
 static void
 div_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("div_detach: inp == NULL"));
 	INP_INFO_WLOCK(&V_divcbinfo);
 	INP_WLOCK(inp);
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(&V_divcbinfo);
 }
 
 static int
 div_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("div_bind: inp == NULL"));
 	/* in_pcbbind assumes that nam is a sockaddr_in
 	 * and in_pcbbind requires a valid address. Since divert
 	 * sockets don't we need to make sure the address is
 	 * filled in properly.
 	 * XXX -- divert should not be abusing in_pcbind
 	 * and should probably have its own family.
 	 */
 	if (nam->sa_family != AF_INET)
 		return EAFNOSUPPORT;
 	((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY;
 	INP_INFO_WLOCK(&V_divcbinfo);
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(&V_divcbinfo);
 	error = in_pcbbind(inp, nam, td->td_ucred);
 	INP_HASH_WUNLOCK(&V_divcbinfo);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_divcbinfo);
 	return error;
 }
 
 static int
 div_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("div_shutdown: inp == NULL"));
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return 0;
 }
 
 static int
 div_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 
 	/* Packet must have a header (but that's about it) */
 	if (m->m_len < sizeof (struct ip) &&
 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
 		KMOD_IPSTAT_INC(ips_toosmall);
 		m_freem(m);
 		return EINVAL;
 	}
 
 	/* Send packet */
 	return div_output(so, m, (struct sockaddr_in *)nam, control);
 }
 
 static void
 div_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
         struct in_addr faddr;
 
 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
         	return;
 	if (PRC_IS_REDIRECT(cmd))
 		return;
 }
 
 static int
 div_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	struct xinpgen xig;
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	int error;
 
 	if (req->newptr != 0)
 		return EPERM;
 
 	if (req->oldptr == 0) {
 		int n;
 
 		n = V_divcbinfo.ipi_count;
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
 		return 0;
 	}
 
 	if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
 		return (error);
 
 	bzero(&xig, sizeof(xig));
 	xig.xig_len = sizeof xig;
 	xig.xig_count = V_divcbinfo.ipi_count;
 	xig.xig_gen = V_divcbinfo.ipi_gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return error;
 
 	NET_EPOCH_ENTER(et);
 	for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead);
 	    inp != NULL;
 	    inp = CK_LIST_NEXT(inp, inp_list)) {
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= xig.xig_gen) {
 			struct xinpcb xi;
 
 			in_pcbtoxinpcb(inp, &xi);
 			INP_RUNLOCK(inp);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
 		} else
 			INP_RUNLOCK(inp);
 	}
 	NET_EPOCH_EXIT(et);
 
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.
 		 * If the generation differs from what we told
 		 * her before, she knows that something happened
 		 * while we were processing this request, and it
 		 * might be necessary to retry.
 		 */
 		xig.xig_gen = V_divcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_divcbinfo.ipi_count;
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 
 	return (error);
 }
 
 #ifdef SYSCTL_NODE
 static SYSCTL_NODE(_net_inet, IPPROTO_DIVERT, divert, CTLFLAG_RW, 0,
     "IPDIVERT");
 SYSCTL_PROC(_net_inet_divert, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD,
     NULL, 0, div_pcblist, "S,xinpcb", "List of active divert sockets");
 #endif
 
 struct pr_usrreqs div_usrreqs = {
 	.pru_attach =		div_attach,
 	.pru_bind =		div_bind,
 	.pru_control =		in_control,
 	.pru_detach =		div_detach,
 	.pru_peeraddr =		in_getpeeraddr,
 	.pru_send =		div_send,
 	.pru_shutdown =		div_shutdown,
 	.pru_sockaddr =		in_getsockaddr,
 	.pru_sosetlabel =	in_pcbsosetlabel
 };
 
 struct protosw div_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_protocol =		IPPROTO_DIVERT,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		div_input,
 	.pr_ctlinput =		div_ctlinput,
 	.pr_ctloutput =		ip_ctloutput,
 	.pr_init =		div_init,
 	.pr_usrreqs =		&div_usrreqs
 };
 
 static int
 div_modevent(module_t mod, int type, void *unused)
 {
 	int err = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		/*
 		 * Protocol will be initialized by pf_proto_register().
 		 * We don't have to register ip_protox because we are not
 		 * a true IP protocol that goes over the wire.
 		 */
 		err = pf_proto_register(PF_INET, &div_protosw);
 		if (err != 0)
 			return (err);
 		ip_divert_ptr = divert_packet;
 		ip_divert_event_tag = EVENTHANDLER_REGISTER(maxsockets_change,
 		    div_zone_change, NULL, EVENTHANDLER_PRI_ANY);
 		break;
 	case MOD_QUIESCE:
 		/*
 		 * IPDIVERT may normally not be unloaded because of the
 		 * potential race conditions.  Tell kldunload we can't be
 		 * unloaded unless the unload is forced.
 		 */
 		err = EPERM;
 		break;
 	case MOD_UNLOAD:
 		/*
 		 * Forced unload.
 		 *
 		 * Module ipdivert can only be unloaded if no sockets are
 		 * connected.  Maybe this can be changed later to forcefully
 		 * disconnect any open sockets.
 		 *
 		 * XXXRW: Note that there is a slight race here, as a new
 		 * socket open request could be spinning on the lock and then
 		 * we destroy the lock.
 		 */
 		INP_INFO_WLOCK(&V_divcbinfo);
 		if (V_divcbinfo.ipi_count != 0) {
 			err = EBUSY;
 			INP_INFO_WUNLOCK(&V_divcbinfo);
 			break;
 		}
 		ip_divert_ptr = NULL;
 		err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW);
 		INP_INFO_WUNLOCK(&V_divcbinfo);
 #ifndef VIMAGE
 		div_destroy(NULL);
 #endif
 		EVENTHANDLER_DEREGISTER(maxsockets_change, ip_divert_event_tag);
 		break;
 	default:
 		err = EOPNOTSUPP;
 		break;
 	}
 	return err;
 }
 
 static moduledata_t ipdivertmod = {
         "ipdivert",
         div_modevent,
         0
 };
 
 DECLARE_MODULE(ipdivert, ipdivertmod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
 MODULE_DEPEND(ipdivert, ipfw, 3, 3, 3);
 MODULE_VERSION(ipdivert, 1);
Index: projects/clang1000-import/sys/sys/tree.h
===================================================================
--- projects/clang1000-import/sys/sys/tree.h	(revision 357178)
+++ projects/clang1000-import/sys/sys/tree.h	(revision 357179)
@@ -1,825 +1,813 @@
 /*	$NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $	*/
 /*	$OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $	*/
 /* $FreeBSD$ */
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright 2002 Niels Provos <provos@citi.umich.edu>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #ifndef	_SYS_TREE_H_
 #define	_SYS_TREE_H_
 
 #include <sys/cdefs.h>
 
 /*
  * This file defines data structures for different types of trees:
  * splay trees and red-black trees.
  *
  * A splay tree is a self-organizing data structure.  Every operation
  * on the tree causes a splay to happen.  The splay moves the requested
  * node to the root of the tree and partly rebalances it.
  *
  * This has the benefit that request locality causes faster lookups as
  * the requested nodes move to the top of the tree.  On the other hand,
  * every lookup causes memory writes.
  *
  * The Balance Theorem bounds the total access time for m operations
  * and n inserts on an initially empty tree as O((m + n)lg n).  The
  * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
  *
  * A red-black tree is a binary search tree with the node color as an
  * extra attribute.  It fulfills a set of conditions:
  *	- every search path from the root to a leaf consists of the
  *	  same number of black nodes,
  *	- each red node (except for the root) has a black parent,
  *	- each leaf node is black.
  *
  * Every operation on a red-black tree is bounded as O(lg n).
  * The maximum height of a red-black tree is 2lg (n+1).
  */
 
 #define SPLAY_HEAD(name, type)						\
 struct name {								\
 	struct type *sph_root; /* root of the tree */			\
 }
 
 #define SPLAY_INITIALIZER(root)						\
 	{ NULL }
 
 #define SPLAY_INIT(root) do {						\
 	(root)->sph_root = NULL;					\
 } while (/*CONSTCOND*/ 0)
 
 #define SPLAY_ENTRY(type)						\
 struct {								\
 	struct type *spe_left; /* left element */			\
 	struct type *spe_right; /* right element */			\
 }
 
 #define SPLAY_LEFT(elm, field)		(elm)->field.spe_left
 #define SPLAY_RIGHT(elm, field)		(elm)->field.spe_right
 #define SPLAY_ROOT(head)		(head)->sph_root
 #define SPLAY_EMPTY(head)		(SPLAY_ROOT(head) == NULL)
 
 /* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
 #define SPLAY_ROTATE_RIGHT(head, tmp, field) do {			\
 	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field);	\
 	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
 	(head)->sph_root = tmp;						\
 } while (/*CONSTCOND*/ 0)
 	
 #define SPLAY_ROTATE_LEFT(head, tmp, field) do {			\
 	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field);	\
 	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
 	(head)->sph_root = tmp;						\
 } while (/*CONSTCOND*/ 0)
 
 #define SPLAY_LINKLEFT(head, tmp, field) do {				\
 	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
 	tmp = (head)->sph_root;						\
 	(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);		\
 } while (/*CONSTCOND*/ 0)
 
 #define SPLAY_LINKRIGHT(head, tmp, field) do {				\
 	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
 	tmp = (head)->sph_root;						\
 	(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);	\
 } while (/*CONSTCOND*/ 0)
 
 #define SPLAY_ASSEMBLE(head, node, left, right, field) do {		\
 	SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field);	\
 	SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
 	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field);	\
 	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field);	\
 } while (/*CONSTCOND*/ 0)
 
 /* Generates prototypes and inline functions */
 
 #define SPLAY_PROTOTYPE(name, type, field, cmp)				\
 void name##_SPLAY(struct name *, struct type *);			\
 void name##_SPLAY_MINMAX(struct name *, int);				\
 struct type *name##_SPLAY_INSERT(struct name *, struct type *);		\
 struct type *name##_SPLAY_REMOVE(struct name *, struct type *);		\
 									\
 /* Finds the node with the same key as elm */				\
 static __unused __inline struct type *					\
 name##_SPLAY_FIND(struct name *head, struct type *elm)			\
 {									\
 	if (SPLAY_EMPTY(head))						\
 		return(NULL);						\
 	name##_SPLAY(head, elm);					\
 	if ((cmp)(elm, (head)->sph_root) == 0)				\
 		return (head->sph_root);				\
 	return (NULL);							\
 }									\
 									\
 static __unused __inline struct type *					\
 name##_SPLAY_NEXT(struct name *head, struct type *elm)			\
 {									\
 	name##_SPLAY(head, elm);					\
 	if (SPLAY_RIGHT(elm, field) != NULL) {				\
 		elm = SPLAY_RIGHT(elm, field);				\
 		while (SPLAY_LEFT(elm, field) != NULL) {		\
 			elm = SPLAY_LEFT(elm, field);			\
 		}							\
 	} else								\
 		elm = NULL;						\
 	return (elm);							\
 }									\
 									\
 static __unused __inline struct type *					\
 name##_SPLAY_MIN_MAX(struct name *head, int val)			\
 {									\
 	name##_SPLAY_MINMAX(head, val);					\
         return (SPLAY_ROOT(head));					\
 }
 
 /* Main splay operation.
  * Moves node close to the key of elm to top
  */
 #define SPLAY_GENERATE(name, type, field, cmp)				\
 struct type *								\
 name##_SPLAY_INSERT(struct name *head, struct type *elm)		\
 {									\
     if (SPLAY_EMPTY(head)) {						\
 	    SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL;	\
     } else {								\
 	    int __comp;							\
 	    name##_SPLAY(head, elm);					\
 	    __comp = (cmp)(elm, (head)->sph_root);			\
 	    if(__comp < 0) {						\
 		    SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
 		    SPLAY_RIGHT(elm, field) = (head)->sph_root;		\
 		    SPLAY_LEFT((head)->sph_root, field) = NULL;		\
 	    } else if (__comp > 0) {					\
 		    SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
 		    SPLAY_LEFT(elm, field) = (head)->sph_root;		\
 		    SPLAY_RIGHT((head)->sph_root, field) = NULL;	\
 	    } else							\
 		    return ((head)->sph_root);				\
     }									\
     (head)->sph_root = (elm);						\
     return (NULL);							\
 }									\
 									\
 struct type *								\
 name##_SPLAY_REMOVE(struct name *head, struct type *elm)		\
 {									\
 	struct type *__tmp;						\
 	if (SPLAY_EMPTY(head))						\
 		return (NULL);						\
 	name##_SPLAY(head, elm);					\
 	if ((cmp)(elm, (head)->sph_root) == 0) {			\
 		if (SPLAY_LEFT((head)->sph_root, field) == NULL) {	\
 			(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
 		} else {						\
 			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
 			(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
 			name##_SPLAY(head, elm);			\
 			SPLAY_RIGHT((head)->sph_root, field) = __tmp;	\
 		}							\
 		return (elm);						\
 	}								\
 	return (NULL);							\
 }									\
 									\
 void									\
 name##_SPLAY(struct name *head, struct type *elm)			\
 {									\
 	struct type __node, *__left, *__right, *__tmp;			\
 	int __comp;							\
 \
 	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
 	__left = __right = &__node;					\
 \
 	while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) {		\
 		if (__comp < 0) {					\
 			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
 			if (__tmp == NULL)				\
 				break;					\
 			if ((cmp)(elm, __tmp) < 0){			\
 				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
 				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
 					break;				\
 			}						\
 			SPLAY_LINKLEFT(head, __right, field);		\
 		} else if (__comp > 0) {				\
 			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
 			if (__tmp == NULL)				\
 				break;					\
 			if ((cmp)(elm, __tmp) > 0){			\
 				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
 				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
 					break;				\
 			}						\
 			SPLAY_LINKRIGHT(head, __left, field);		\
 		}							\
 	}								\
 	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
 }									\
 									\
 /* Splay with either the minimum or the maximum element			\
  * Used to find minimum or maximum element in tree.			\
  */									\
 void name##_SPLAY_MINMAX(struct name *head, int __comp) \
 {									\
 	struct type __node, *__left, *__right, *__tmp;			\
 \
 	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
 	__left = __right = &__node;					\
 \
 	while (1) {							\
 		if (__comp < 0) {					\
 			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
 			if (__tmp == NULL)				\
 				break;					\
 			if (__comp < 0){				\
 				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
 				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
 					break;				\
 			}						\
 			SPLAY_LINKLEFT(head, __right, field);		\
 		} else if (__comp > 0) {				\
 			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
 			if (__tmp == NULL)				\
 				break;					\
 			if (__comp > 0) {				\
 				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
 				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
 					break;				\
 			}						\
 			SPLAY_LINKRIGHT(head, __left, field);		\
 		}							\
 	}								\
 	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
 }
 
 #define SPLAY_NEGINF	-1
 #define SPLAY_INF	1
 
 #define SPLAY_INSERT(name, x, y)	name##_SPLAY_INSERT(x, y)
 #define SPLAY_REMOVE(name, x, y)	name##_SPLAY_REMOVE(x, y)
 #define SPLAY_FIND(name, x, y)		name##_SPLAY_FIND(x, y)
 #define SPLAY_NEXT(name, x, y)		name##_SPLAY_NEXT(x, y)
 #define SPLAY_MIN(name, x)		(SPLAY_EMPTY(x) ? NULL	\
 					: name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
 #define SPLAY_MAX(name, x)		(SPLAY_EMPTY(x) ? NULL	\
 					: name##_SPLAY_MIN_MAX(x, SPLAY_INF))
 
 #define SPLAY_FOREACH(x, name, head)					\
 	for ((x) = SPLAY_MIN(name, head);				\
 	     (x) != NULL;						\
 	     (x) = SPLAY_NEXT(name, head, x))
 
 /* Macros that define a red-black tree */
 #define RB_HEAD(name, type)						\
 struct name {								\
 	struct type *rbh_root; /* root of the tree */			\
 }
 
 #define RB_INITIALIZER(root)						\
 	{ NULL }
 
 #define RB_INIT(root) do {						\
 	(root)->rbh_root = NULL;					\
 } while (/*CONSTCOND*/ 0)
 
 #define RB_BLACK	0
 #define RB_RED		1
 #define RB_ENTRY(type)							\
 struct {								\
 	struct type *rbe_left;		/* left element */		\
 	struct type *rbe_right;		/* right element */		\
 	struct type *rbe_parent;	/* parent element */		\
 	int rbe_color;			/* node color */		\
 }
 
 #define RB_LEFT(elm, field)		(elm)->field.rbe_left
 #define RB_RIGHT(elm, field)		(elm)->field.rbe_right
 #define RB_PARENT(elm, field)		(elm)->field.rbe_parent
 #define RB_COLOR(elm, field)		(elm)->field.rbe_color
 #define RB_ROOT(head)			(head)->rbh_root
 #define RB_EMPTY(head)			(RB_ROOT(head) == NULL)
 
 #define RB_SET(elm, parent, field) do {					\
 	RB_PARENT(elm, field) = parent;					\
 	RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL;		\
 	RB_COLOR(elm, field) = RB_RED;					\
 } while (/*CONSTCOND*/ 0)
 
 #define RB_SET_BLACKRED(black, red, field) do {				\
 	RB_COLOR(black, field) = RB_BLACK;				\
 	RB_COLOR(red, field) = RB_RED;					\
 } while (/*CONSTCOND*/ 0)
 
+/*
+ * Something to be invoked in a loop at the root of every modified subtree,
+ * from the bottom up to the root, to update augmented node data.
+ */
 #ifndef RB_AUGMENT
-#define RB_AUGMENT(x)	do {} while (0)
+#define RB_AUGMENT(x)	break
 #endif
 
 #define RB_ROTATE_LEFT(head, elm, tmp, field) do {			\
 	(tmp) = RB_RIGHT(elm, field);					\
 	if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != NULL) {	\
 		RB_PARENT(RB_LEFT(tmp, field), field) = (elm);		\
 	}								\
-	RB_AUGMENT(elm);						\
 	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) {	\
 		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
 			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
 		else							\
 			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
 	} else								\
 		(head)->rbh_root = (tmp);				\
 	RB_LEFT(tmp, field) = (elm);					\
 	RB_PARENT(elm, field) = (tmp);					\
-	RB_AUGMENT(tmp);						\
-	if ((RB_PARENT(tmp, field)))					\
-		RB_AUGMENT(RB_PARENT(tmp, field));			\
+	RB_AUGMENT(elm);						\
 } while (/*CONSTCOND*/ 0)
 
 #define RB_ROTATE_RIGHT(head, elm, tmp, field) do {			\
 	(tmp) = RB_LEFT(elm, field);					\
 	if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != NULL) {	\
 		RB_PARENT(RB_RIGHT(tmp, field), field) = (elm);		\
 	}								\
-	RB_AUGMENT(elm);						\
 	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) {	\
 		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
 			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
 		else							\
 			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
 	} else								\
 		(head)->rbh_root = (tmp);				\
 	RB_RIGHT(tmp, field) = (elm);					\
 	RB_PARENT(elm, field) = (tmp);					\
-	RB_AUGMENT(tmp);						\
-	if ((RB_PARENT(tmp, field)))					\
-		RB_AUGMENT(RB_PARENT(tmp, field));			\
+	RB_AUGMENT(elm);						\
 } while (/*CONSTCOND*/ 0)
 
 /* Generates prototypes and inline functions */
 #define	RB_PROTOTYPE(name, type, field, cmp)				\
 	RB_PROTOTYPE_INTERNAL(name, type, field, cmp,)
 #define	RB_PROTOTYPE_STATIC(name, type, field, cmp)			\
 	RB_PROTOTYPE_INTERNAL(name, type, field, cmp, __unused static)
 #define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr)		\
 	RB_PROTOTYPE_INSERT_COLOR(name, type, attr);			\
 	RB_PROTOTYPE_REMOVE_COLOR(name, type, attr);			\
 	RB_PROTOTYPE_INSERT(name, type, attr);				\
 	RB_PROTOTYPE_REMOVE(name, type, attr);				\
 	RB_PROTOTYPE_FIND(name, type, attr);				\
 	RB_PROTOTYPE_NFIND(name, type, attr);				\
 	RB_PROTOTYPE_NEXT(name, type, attr);				\
 	RB_PROTOTYPE_PREV(name, type, attr);				\
 	RB_PROTOTYPE_MINMAX(name, type, attr);				\
 	RB_PROTOTYPE_REINSERT(name, type, attr);
 #define RB_PROTOTYPE_INSERT_COLOR(name, type, attr)			\
 	attr void name##_RB_INSERT_COLOR(struct name *, struct type *)
 #define RB_PROTOTYPE_REMOVE_COLOR(name, type, attr)			\
 	attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *)
 #define RB_PROTOTYPE_REMOVE(name, type, attr)				\
 	attr struct type *name##_RB_REMOVE(struct name *, struct type *)
 #define RB_PROTOTYPE_INSERT(name, type, attr)				\
 	attr struct type *name##_RB_INSERT(struct name *, struct type *)
 #define RB_PROTOTYPE_FIND(name, type, attr)				\
 	attr struct type *name##_RB_FIND(struct name *, struct type *)
 #define RB_PROTOTYPE_NFIND(name, type, attr)				\
 	attr struct type *name##_RB_NFIND(struct name *, struct type *)
 #define RB_PROTOTYPE_NEXT(name, type, attr)				\
 	attr struct type *name##_RB_NEXT(struct type *)
 #define RB_PROTOTYPE_PREV(name, type, attr)				\
 	attr struct type *name##_RB_PREV(struct type *)
 #define RB_PROTOTYPE_MINMAX(name, type, attr)				\
 	attr struct type *name##_RB_MINMAX(struct name *, int)
 #define RB_PROTOTYPE_REINSERT(name, type, attr)			\
 	attr struct type *name##_RB_REINSERT(struct name *, struct type *)
 
 /* Main rb operation.
  * Moves node close to the key of elm to top
  */
 #define	RB_GENERATE(name, type, field, cmp)				\
 	RB_GENERATE_INTERNAL(name, type, field, cmp,)
 #define	RB_GENERATE_STATIC(name, type, field, cmp)			\
 	RB_GENERATE_INTERNAL(name, type, field, cmp, __unused static)
 #define RB_GENERATE_INTERNAL(name, type, field, cmp, attr)		\
 	RB_GENERATE_INSERT_COLOR(name, type, field, attr)		\
 	RB_GENERATE_REMOVE_COLOR(name, type, field, attr)		\
 	RB_GENERATE_INSERT(name, type, field, cmp, attr)		\
 	RB_GENERATE_REMOVE(name, type, field, attr)			\
 	RB_GENERATE_FIND(name, type, field, cmp, attr)			\
 	RB_GENERATE_NFIND(name, type, field, cmp, attr)			\
 	RB_GENERATE_NEXT(name, type, field, attr)			\
 	RB_GENERATE_PREV(name, type, field, attr)			\
 	RB_GENERATE_MINMAX(name, type, field, attr)			\
 	RB_GENERATE_REINSERT(name, type, field, cmp, attr)
 
 
 #define RB_GENERATE_INSERT_COLOR(name, type, field, attr)		\
 attr void								\
 name##_RB_INSERT_COLOR(struct name *head, struct type *elm)		\
 {									\
 	struct type *parent, *gparent, *tmp;				\
 	while ((parent = RB_PARENT(elm, field)) != NULL &&		\
 	    RB_COLOR(parent, field) == RB_RED) {			\
 		gparent = RB_PARENT(parent, field);			\
 		if (parent == RB_LEFT(gparent, field)) {		\
 			tmp = RB_RIGHT(gparent, field);			\
 			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
 				RB_COLOR(tmp, field) = RB_BLACK;	\
 				RB_SET_BLACKRED(parent, gparent, field);\
 				elm = gparent;				\
 				continue;				\
 			}						\
 			if (RB_RIGHT(parent, field) == elm) {		\
 				RB_ROTATE_LEFT(head, parent, tmp, field);\
 				tmp = parent;				\
 				parent = elm;				\
 				elm = tmp;				\
 			}						\
 			RB_SET_BLACKRED(parent, gparent, field);	\
 			RB_ROTATE_RIGHT(head, gparent, tmp, field);	\
 		} else {						\
 			tmp = RB_LEFT(gparent, field);			\
 			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
 				RB_COLOR(tmp, field) = RB_BLACK;	\
 				RB_SET_BLACKRED(parent, gparent, field);\
 				elm = gparent;				\
 				continue;				\
 			}						\
 			if (RB_LEFT(parent, field) == elm) {		\
 				RB_ROTATE_RIGHT(head, parent, tmp, field);\
 				tmp = parent;				\
 				parent = elm;				\
 				elm = tmp;				\
 			}						\
 			RB_SET_BLACKRED(parent, gparent, field);	\
 			RB_ROTATE_LEFT(head, gparent, tmp, field);	\
 		}							\
 	}								\
 	RB_COLOR(head->rbh_root, field) = RB_BLACK;			\
 }
 
 #define RB_GENERATE_REMOVE_COLOR(name, type, field, attr)		\
 attr void								\
 name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
 {									\
 	struct type *tmp;						\
 	while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) &&	\
 	    elm != RB_ROOT(head)) {					\
 		if (RB_LEFT(parent, field) == elm) {			\
 			tmp = RB_RIGHT(parent, field);			\
 			if (RB_COLOR(tmp, field) == RB_RED) {		\
 				RB_SET_BLACKRED(tmp, parent, field);	\
 				RB_ROTATE_LEFT(head, parent, tmp, field);\
 				tmp = RB_RIGHT(parent, field);		\
 			}						\
 			if ((RB_LEFT(tmp, field) == NULL ||		\
 			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
 			    (RB_RIGHT(tmp, field) == NULL ||		\
 			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
 				RB_COLOR(tmp, field) = RB_RED;		\
 				elm = parent;				\
 				parent = RB_PARENT(elm, field);		\
 			} else {					\
 				if (RB_RIGHT(tmp, field) == NULL ||	\
 				    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\
 					struct type *oleft;		\
 					if ((oleft = RB_LEFT(tmp, field)) \
 					    != NULL)			\
 						RB_COLOR(oleft, field) = RB_BLACK;\
 					RB_COLOR(tmp, field) = RB_RED;	\
 					RB_ROTATE_RIGHT(head, tmp, oleft, field);\
 					tmp = RB_RIGHT(parent, field);	\
 				}					\
 				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
 				RB_COLOR(parent, field) = RB_BLACK;	\
 				if (RB_RIGHT(tmp, field))		\
 					RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\
 				RB_ROTATE_LEFT(head, parent, tmp, field);\
 				elm = RB_ROOT(head);			\
 				break;					\
 			}						\
 		} else {						\
 			tmp = RB_LEFT(parent, field);			\
 			if (RB_COLOR(tmp, field) == RB_RED) {		\
 				RB_SET_BLACKRED(tmp, parent, field);	\
 				RB_ROTATE_RIGHT(head, parent, tmp, field);\
 				tmp = RB_LEFT(parent, field);		\
 			}						\
 			if ((RB_LEFT(tmp, field) == NULL ||		\
 			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
 			    (RB_RIGHT(tmp, field) == NULL ||		\
 			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
 				RB_COLOR(tmp, field) = RB_RED;		\
 				elm = parent;				\
 				parent = RB_PARENT(elm, field);		\
 			} else {					\
 				if (RB_LEFT(tmp, field) == NULL ||	\
 				    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\
 					struct type *oright;		\
 					if ((oright = RB_RIGHT(tmp, field)) \
 					    != NULL)			\
 						RB_COLOR(oright, field) = RB_BLACK;\
 					RB_COLOR(tmp, field) = RB_RED;	\
 					RB_ROTATE_LEFT(head, tmp, oright, field);\
 					tmp = RB_LEFT(parent, field);	\
 				}					\
 				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
 				RB_COLOR(parent, field) = RB_BLACK;	\
 				if (RB_LEFT(tmp, field))		\
 					RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\
 				RB_ROTATE_RIGHT(head, parent, tmp, field);\
 				elm = RB_ROOT(head);			\
 				break;					\
 			}						\
 		}							\
 	}								\
 	if (elm)							\
 		RB_COLOR(elm, field) = RB_BLACK;			\
 }
 
 #define RB_GENERATE_REMOVE(name, type, field, attr)			\
 attr struct type *							\
 name##_RB_REMOVE(struct name *head, struct type *elm)			\
 {									\
 	struct type *child, *parent, *old = elm;			\
 	int color;							\
 	if (RB_LEFT(elm, field) == NULL)				\
 		child = RB_RIGHT(elm, field);				\
 	else if (RB_RIGHT(elm, field) == NULL)				\
 		child = RB_LEFT(elm, field);				\
 	else {								\
-		struct type *left;					\
-		elm = RB_RIGHT(elm, field);				\
-		while ((left = RB_LEFT(elm, field)) != NULL)		\
-			elm = left;					\
-		child = RB_RIGHT(elm, field);				\
-		parent = RB_PARENT(elm, field);				\
-		color = RB_COLOR(elm, field);				\
-		if (child)						\
-			RB_PARENT(child, field) = parent;		\
-		if (parent) {						\
-			if (RB_LEFT(parent, field) == elm)		\
-				RB_LEFT(parent, field) = child;		\
+		elm = RB_RIGHT(old, field);				\
+		if ((child = RB_LEFT(elm, field)) == NULL) {		\
+			child = RB_RIGHT(elm, field);			\
+			RB_RIGHT(old, field) = child;			\
+			RB_PARENT(elm, field) = elm;			\
+		} else {						\
+			do						\
+				elm = child;				\
+			while ((child = RB_LEFT(elm, field)) != NULL);	\
+			child = RB_RIGHT(elm, field);			\
+			RB_PARENT(RB_RIGHT(old, field), field) = elm;	\
+		}							\
+		RB_PARENT(RB_LEFT(old, field), field) = elm;		\
+		parent = RB_PARENT(old, field);				\
+		if (parent != NULL) {					\
+			if (RB_LEFT(parent, field) == old)		\
+				RB_LEFT(parent, field) = elm;		\
 			else						\
-				RB_RIGHT(parent, field) = child;	\
-			RB_AUGMENT(parent);				\
+				RB_RIGHT(parent, field) = elm;		\
 		} else							\
-			RB_ROOT(head) = child;				\
-		if (RB_PARENT(elm, field) == old)			\
-			parent = elm;					\
-		(elm)->field = (old)->field;				\
-		if (RB_PARENT(old, field)) {				\
-			if (RB_LEFT(RB_PARENT(old, field), field) == old)\
-				RB_LEFT(RB_PARENT(old, field), field) = elm;\
-			else						\
-				RB_RIGHT(RB_PARENT(old, field), field) = elm;\
-			RB_AUGMENT(RB_PARENT(old, field));		\
-		} else							\
 			RB_ROOT(head) = elm;				\
-		RB_PARENT(RB_LEFT(old, field), field) = elm;		\
-		if (RB_RIGHT(old, field))				\
-			RB_PARENT(RB_RIGHT(old, field), field) = elm;	\
-		if (parent) {						\
-			left = parent;					\
-			do {						\
-				RB_AUGMENT(left);			\
-			} while ((left = RB_PARENT(left, field)) != NULL); \
-		}							\
-		goto color;						\
 	}								\
 	parent = RB_PARENT(elm, field);					\
 	color = RB_COLOR(elm, field);					\
-	if (child)							\
+	if (child != NULL)						\
 		RB_PARENT(child, field) = parent;			\
-	if (parent) {							\
+	if (parent != NULL) {						\
 		if (RB_LEFT(parent, field) == elm)			\
 			RB_LEFT(parent, field) = child;			\
 		else							\
 			RB_RIGHT(parent, field) = child;		\
-		RB_AUGMENT(parent);					\
 	} else								\
 		RB_ROOT(head) = child;					\
-color:									\
+	if (elm != old)							\
+		(elm)->field = (old)->field;				\
 	if (color == RB_BLACK)						\
 		name##_RB_REMOVE_COLOR(head, parent, child);		\
+	while (parent != NULL) {					\
+		RB_AUGMENT(parent);					\
+		parent = RB_PARENT(parent, field);			\
+	}								\
 	return (old);							\
-}									\
+}
 
 #define RB_GENERATE_INSERT(name, type, field, cmp, attr)		\
 /* Inserts a node into the RB tree */					\
 attr struct type *							\
 name##_RB_INSERT(struct name *head, struct type *elm)			\
 {									\
 	struct type *tmp;						\
 	struct type *parent = NULL;					\
 	int comp = 0;							\
 	tmp = RB_ROOT(head);						\
 	while (tmp) {							\
 		parent = tmp;						\
 		comp = (cmp)(elm, parent);				\
 		if (comp < 0)						\
 			tmp = RB_LEFT(tmp, field);			\
 		else if (comp > 0)					\
 			tmp = RB_RIGHT(tmp, field);			\
 		else							\
 			return (tmp);					\
 	}								\
 	RB_SET(elm, parent, field);					\
 	if (parent != NULL) {						\
 		if (comp < 0)						\
 			RB_LEFT(parent, field) = elm;			\
 		else							\
 			RB_RIGHT(parent, field) = elm;			\
-		RB_AUGMENT(parent);					\
 	} else								\
 		RB_ROOT(head) = elm;					\
 	name##_RB_INSERT_COLOR(head, elm);				\
+	while (elm != NULL) {						\
+		RB_AUGMENT(elm);					\
+		elm = RB_PARENT(elm, field);				\
+	}								\
 	return (NULL);							\
 }
 
 #define RB_GENERATE_FIND(name, type, field, cmp, attr)			\
 /* Finds the node with the same key as elm */				\
 attr struct type *							\
 name##_RB_FIND(struct name *head, struct type *elm)			\
 {									\
 	struct type *tmp = RB_ROOT(head);				\
 	int comp;							\
 	while (tmp) {							\
 		comp = cmp(elm, tmp);					\
 		if (comp < 0)						\
 			tmp = RB_LEFT(tmp, field);			\
 		else if (comp > 0)					\
 			tmp = RB_RIGHT(tmp, field);			\
 		else							\
 			return (tmp);					\
 	}								\
 	return (NULL);							\
 }
 
 #define RB_GENERATE_NFIND(name, type, field, cmp, attr)			\
 /* Finds the first node greater than or equal to the search key */	\
 attr struct type *							\
 name##_RB_NFIND(struct name *head, struct type *elm)			\
 {									\
 	struct type *tmp = RB_ROOT(head);				\
 	struct type *res = NULL;					\
 	int comp;							\
 	while (tmp) {							\
 		comp = cmp(elm, tmp);					\
 		if (comp < 0) {						\
 			res = tmp;					\
 			tmp = RB_LEFT(tmp, field);			\
 		}							\
 		else if (comp > 0)					\
 			tmp = RB_RIGHT(tmp, field);			\
 		else							\
 			return (tmp);					\
 	}								\
 	return (res);							\
 }
 
 #define RB_GENERATE_NEXT(name, type, field, attr)			\
 /* ARGSUSED */								\
 attr struct type *							\
 name##_RB_NEXT(struct type *elm)					\
 {									\
 	if (RB_RIGHT(elm, field)) {					\
 		elm = RB_RIGHT(elm, field);				\
 		while (RB_LEFT(elm, field))				\
 			elm = RB_LEFT(elm, field);			\
 	} else {							\
 		if (RB_PARENT(elm, field) &&				\
 		    (elm == RB_LEFT(RB_PARENT(elm, field), field)))	\
 			elm = RB_PARENT(elm, field);			\
 		else {							\
 			while (RB_PARENT(elm, field) &&			\
 			    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
 				elm = RB_PARENT(elm, field);		\
 			elm = RB_PARENT(elm, field);			\
 		}							\
 	}								\
 	return (elm);							\
 }
 
 #define RB_GENERATE_PREV(name, type, field, attr)			\
 /* ARGSUSED */								\
 attr struct type *							\
 name##_RB_PREV(struct type *elm)					\
 {									\
 	if (RB_LEFT(elm, field)) {					\
 		elm = RB_LEFT(elm, field);				\
 		while (RB_RIGHT(elm, field))				\
 			elm = RB_RIGHT(elm, field);			\
 	} else {							\
 		if (RB_PARENT(elm, field) &&				\
 		    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))	\
 			elm = RB_PARENT(elm, field);			\
 		else {							\
 			while (RB_PARENT(elm, field) &&			\
 			    (elm == RB_LEFT(RB_PARENT(elm, field), field)))\
 				elm = RB_PARENT(elm, field);		\
 			elm = RB_PARENT(elm, field);			\
 		}							\
 	}								\
 	return (elm);							\
 }
 
 #define RB_GENERATE_MINMAX(name, type, field, attr)			\
 attr struct type *							\
 name##_RB_MINMAX(struct name *head, int val)				\
 {									\
 	struct type *tmp = RB_ROOT(head);				\
 	struct type *parent = NULL;					\
 	while (tmp) {							\
 		parent = tmp;						\
 		if (val < 0)						\
 			tmp = RB_LEFT(tmp, field);			\
 		else							\
 			tmp = RB_RIGHT(tmp, field);			\
 	}								\
 	return (parent);						\
 }
 
 #define	RB_GENERATE_REINSERT(name, type, field, cmp, attr)		\
 attr struct type *							\
 name##_RB_REINSERT(struct name *head, struct type *elm)			\
 {									\
 	struct type *cmpelm;						\
 	if (((cmpelm = RB_PREV(name, head, elm)) != NULL &&		\
 	    cmp(cmpelm, elm) >= 0) ||					\
 	    ((cmpelm = RB_NEXT(name, head, elm)) != NULL &&		\
 	    cmp(elm, cmpelm) >= 0)) {					\
 		/* XXXLAS: Remove/insert is heavy handed. */		\
 		RB_REMOVE(name, head, elm);				\
 		return (RB_INSERT(name, head, elm));			\
 	}								\
 	return (NULL);							\
 }									\
 
 #define RB_NEGINF	-1
 #define RB_INF	1
 
 #define RB_INSERT(name, x, y)	name##_RB_INSERT(x, y)
 #define RB_REMOVE(name, x, y)	name##_RB_REMOVE(x, y)
 #define RB_FIND(name, x, y)	name##_RB_FIND(x, y)
 #define RB_NFIND(name, x, y)	name##_RB_NFIND(x, y)
 #define RB_NEXT(name, x, y)	name##_RB_NEXT(y)
 #define RB_PREV(name, x, y)	name##_RB_PREV(y)
 #define RB_MIN(name, x)		name##_RB_MINMAX(x, RB_NEGINF)
 #define RB_MAX(name, x)		name##_RB_MINMAX(x, RB_INF)
 #define RB_REINSERT(name, x, y)	name##_RB_REINSERT(x, y)
 
 #define RB_FOREACH(x, name, head)					\
 	for ((x) = RB_MIN(name, head);					\
 	     (x) != NULL;						\
 	     (x) = name##_RB_NEXT(x))
 
 #define RB_FOREACH_FROM(x, name, y)					\
 	for ((x) = (y);							\
 	    ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL);	\
 	     (x) = (y))
 
 #define RB_FOREACH_SAFE(x, name, head, y)				\
 	for ((x) = RB_MIN(name, head);					\
 	    ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL);	\
 	     (x) = (y))
 
 #define RB_FOREACH_REVERSE(x, name, head)				\
 	for ((x) = RB_MAX(name, head);					\
 	     (x) != NULL;						\
 	     (x) = name##_RB_PREV(x))
 
 #define RB_FOREACH_REVERSE_FROM(x, name, y)				\
 	for ((x) = (y);							\
 	    ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL);	\
 	     (x) = (y))
 
 #define RB_FOREACH_REVERSE_SAFE(x, name, head, y)			\
 	for ((x) = RB_MAX(name, head);					\
 	    ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL);	\
 	     (x) = (y))
 
 #endif	/* _SYS_TREE_H_ */
Index: projects/clang1000-import/sys/tools/vnode_if.awk
===================================================================
--- projects/clang1000-import/sys/tools/vnode_if.awk	(revision 357178)
+++ projects/clang1000-import/sys/tools/vnode_if.awk	(revision 357179)
@@ -1,476 +1,478 @@
 #!/usr/bin/awk -f
 
 #-
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # Copyright (c) 1992, 1993
 #	The Regents of the University of California.  All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 # 3. Neither the name of the University nor the names of its contributors
 #    may be used to endorse or promote products derived from this software
 #    without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 
 #
 #	@(#)vnode_if.sh	8.1 (Berkeley) 6/10/93
 # $FreeBSD$
 #
 # Script to produce VFS front-end sugar.
 #
 # usage: vnode_if.awk <srcfile> [-c | -h | -p | -q]
 #	(where <srcfile> is currently /sys/kern/vnode_if.src)
 #	The source file must have a .src extension
 #
 
 function usage()
 {
 	print "usage: vnode_if.awk <srcfile> [-c|-h|-p|-q]";
 	exit 1;
 }
 
 function die(msg, what)
 {
 	printf srcfile "(" fnr "): " > "/dev/stderr";
 	printf msg "\n", what > "/dev/stderr";
 	exit 1;
 }
 
 function t_spc(type)
 {
 	# Append a space if the type is not a pointer
 	return (type ~ /\*$/) ? type : type " ";
 }
 
 # These are just for convenience ...
 function printc(s) {print s > cfile;}
 function printh(s) {print s > hfile;}
 function printp(s) {print s > pfile;}
 function printq(s) {print s > qfile;}
 
 function add_debug_code(name, arg, pos, ind)
 {
 	if (arg == "vpp")
 		star = "*";
 	else
 		star = "";
 	if (lockdata[name, arg, pos] && (lockdata[name, arg, pos] != "-")) {
 		printc(ind"ASSERT_VI_UNLOCKED("star"a->a_"arg", \""uname"\");");
 		# Add assertions for locking
 		if (lockdata[name, arg, pos] == "L")
 			printc(ind"ASSERT_VOP_LOCKED(" star "a->a_"arg", \""uname"\");");
 		else if (lockdata[name, arg, pos] == "U")
 			printc(ind"ASSERT_VOP_UNLOCKED(" star "a->a_"arg", \""uname"\");");
 		else if (lockdata[name, arg, pos] == "E")
 			printc(ind"ASSERT_VOP_ELOCKED(" star "a->a_"arg", \""uname"\");");
 		else if (0) {
 			# XXX More checks!
 		}
 	}
 }
 
 function add_pre(name)
 {
 	if (lockdata[name, "pre"]) {
 		printc("\t"lockdata[name, "pre"]"(a);");
 	}
 }
 
 function add_post(name)
 {
 	if (lockdata[name, "post"]) {
 		printc("\t"lockdata[name, "post"]"(a, rc);");
 	}
 }
 
 function find_arg_with_type (type)
 {
 	for (jj = 0; jj < numargs; jj++) {
 		if (types[jj] == type) {
 			return "VOPARG_OFFSETOF(struct " \
 			    name "_args,a_" args[jj] ")";
 		}
 	}
 
 	return "VDESC_NO_OFFSET";
 }
 
 BEGIN{
 
 # Process the command line
 for (i = 1; i < ARGC; i++) {
 	arg = ARGV[i];
 	if (arg !~ /^-[chpq]+$/ && arg !~ /\.src$/)
 		usage();
 	if (arg ~ /^-.*c/)
 		cfile = "vnode_if.c";
 	if (arg ~ /^-.*h/)
 		hfile = "vnode_if.h";
 	if (arg ~ /^-.*p/)
 		pfile = "vnode_if_newproto.h";
 	if (arg ~ /^-.*q/)
 		qfile = "vnode_if_typedef.h";
 	if (arg ~ /\.src$/)
 		srcfile = arg;
 }
 ARGC = 1;
 
 if (!cfile && !hfile && !pfile && !qfile)
 	exit 0;
 
 if (!srcfile)
 	usage();
 
 # Avoid a literal generated file tag here.
 generated = "@" "generated";
 
 common_head = \
     "/*\n" \
     " * This file is " generated " automatically.\n" \
     " * Do not modify anything in here by hand.\n" \
     " *\n" \
     " * Created from $FreeBSD$\n" \
     " */\n" \
     "\n";
 
 if (pfile) {
 	printp(common_head)
 	printp("struct vop_vector {")
 	printp("\tstruct vop_vector\t*vop_default;")
 	printp("\tvop_bypass_t\t*vop_bypass;")
 }
 
 if (qfile) {
 	printq(common_head)
 }
 
 if (hfile) {
 	printh(common_head "extern struct vnodeop_desc vop_default_desc;");
 	printh("#include \"vnode_if_typedef.h\"")
 	printh("#include \"vnode_if_newproto.h\"")
 }
 
 if (cfile) {
 	printc(common_head \
 	    "#include <sys/param.h>\n" \
 	    "#include <sys/event.h>\n" \
 	    "#include <sys/kernel.h>\n" \
 	    "#include <sys/mount.h>\n" \
 	    "#include <sys/sdt.h>\n" \
 	    "#include <sys/signalvar.h>\n" \
 	    "#include <sys/systm.h>\n" \
 	    "#include <sys/vnode.h>\n" \
 	    "\n" \
 	    "SDT_PROVIDER_DECLARE(vfs);\n" \
 	    "\n" \
 	    "struct vnodeop_desc vop_default_desc = {\n" \
 	    "	\"default\",\n" \
 	    "	0,\n" \
 	    "   0,\n" \
 	    "	(vop_bypass_t *)vop_panic,\n" \
 	    "	NULL,\n" \
 	    "	VDESC_NO_OFFSET,\n" \
 	    "	VDESC_NO_OFFSET,\n" \
 	    "	VDESC_NO_OFFSET,\n" \
 	    "	VDESC_NO_OFFSET,\n" \
 	    "};\n");
 }
 
 while ((getline < srcfile) > 0) {
 	fnr++;
 	if (NF == 0)
 		continue;
 	if ($1 ~ /^%%/) {
 		if (NF != 6 ||
 		    $2 !~ /^[a-z_]+$/  ||  $3 !~ /^[a-z]+$/  ||
 		    $4 !~ /^.$/  ||  $5 !~ /^.$/  ||  $6 !~ /^.$/) {
 			die("Invalid %s construction", "%%");
 			continue;
 		}
 		lockdata["vop_" $2, $3, "Entry"] = $4;
 		lockdata["vop_" $2, $3, "OK"]    = $5;
 		lockdata["vop_" $2, $3, "Error"] = $6;			
 		continue;
 	}
 
 	if ($1 ~ /^%!/) {
 		if (NF != 4 ||
 		    ($3 != "pre" && $3 != "post")) {
 			die("Invalid %s construction", "%!");
 			continue;
 		}
 		lockdata["vop_" $2, $3] = $4;
 		continue;
 	}
 	if ($1 ~ /^#/)
 		continue;
 
 	# Get the function name.
 	name = $1;
 	uname = toupper(name);
 
 	# Get the function arguments.
 	for (numargs = 0; ; ++numargs) {
 		if ((getline < srcfile) <= 0) {
 			die("Unable to read through the arguments for \"%s\"",
 			    name);
 		}
 		fnr++;
 		if ($1 ~ /^\};/)
 			break;
 
 		# Delete comments, if any.
 		gsub (/\/\*.*\*\//, "");
 
 		# Condense whitespace and delete leading/trailing space.
 		gsub(/[[:space:]]+/, " ");
 		sub(/^ /, "");
 		sub(/ $/, "");
 
 		# Pick off direction.
 		if ($1 != "INOUT" && $1 != "IN" && $1 != "OUT")
 			die("No IN/OUT direction for \"%s\".", $0);
 		dirs[numargs] = $1;
 		sub(/^[A-Z]* /, "");
 
 		if ((reles[numargs] = $1) == "WILLRELE")
 			sub(/^[A-Z]* /, "");
 		else
 			reles[numargs] = "WONTRELE";
 
 		# kill trailing ;
 		if (sub(/;$/, "") < 1)
 			die("Missing end-of-line ; in \"%s\".", $0);
 
 		# pick off variable name
 		if ((argp = match($0, /[A-Za-z0-9_]+$/)) < 1)
 			die("Missing var name \"a_foo\" in \"%s\".", $0);
 		args[numargs] = substr($0, argp);
 		$0 = substr($0, 1, argp - 1);
 
 		# what is left must be type
 		# remove trailing space (if any)
 		sub(/ $/, "");
 		types[numargs] = $0;
 	}
 	if (numargs > 4)
 		ctrargs = 4;
 	else
 		ctrargs = numargs;
 	ctrstr = ctrargs "(KTR_VOP, \"VOP\", \"" uname "\", (uintptr_t)a,\n\t    "; 
 	ctrstr = ctrstr "\"" args[0] ":0x%jX\", (uintptr_t)a->a_" args[0];
 	for (i = 1; i < ctrargs; ++i)
 		ctrstr = ctrstr ", \"" args[i] ":0x%jX\", a->a_" args[i];
 	ctrstr = ctrstr ");";
 
 	if (pfile) {
 		printp("\t"name"_t\t*"name";")
 	}
 	if (qfile) {
 		printq("struct "name"_args;")
 		printq("typedef int "name"_t(struct "name"_args *);\n")
 	}
 
 	if (hfile) {
 		# Print out the vop_F_args structure.
 		printh("struct "name"_args {\n\tstruct vop_generic_args a_gen;");
 		for (i = 0; i < numargs; ++i)
 			printh("\t" t_spc(types[i]) "a_" args[i] ";");
 		printh("};");
 		printh("");
 
 		# Print out extern declaration.
 		printh("extern struct vnodeop_desc " name "_desc;");
 		printh("");
 
 		# Print out function prototypes.
 		printh("int " uname "_AP(struct " name "_args *);");
 		printh("int " uname "_APV(struct vop_vector *vop, struct " name "_args *);");
 		printh("");
 		printh("static __inline int " uname "(");
 		for (i = 0; i < numargs; ++i) {
 			printh("\t" t_spc(types[i]) args[i] \
 			    (i < numargs - 1 ? "," : ")"));
 		}
 		printh("{");
 		printh("\tstruct " name "_args a;");
 		printh("");
 		printh("\ta.a_gen.a_desc = &" name "_desc;");
 		for (i = 0; i < numargs; ++i)
 			printh("\ta.a_" args[i] " = " args[i] ";");
 		printh("\treturn (" uname "_APV("args[0]"->v_op, &a));");
 		printh("}");
 
 		printh("");
 	}
 
 	if (cfile) {
 		funcarr[name] = 1;
 		# Print out the vop_F_vp_offsets structure.  This all depends
 		# on naming conventions and nothing else.
 		printc("static int " name "_vp_offsets[] = {");
 		# as a side effect, figure out the releflags
 		releflags = "";
 		vpnum = 0;
 		for (i = 0; i < numargs; i++) {
 			if (types[i] == "struct vnode *") {
 				printc("\tVOPARG_OFFSETOF(struct " name \
 				    "_args,a_" args[i] "),");
 				if (reles[i] == "WILLRELE") {
 					releflags = releflags \
 					    "|VDESC_VP" vpnum "_WILLRELE";
 				}
 				vpnum++;
 			}
 		}
 
 		sub(/^\|/, "", releflags);
 		printc("\tVDESC_NO_OFFSET");
 		printc("};");
 
 		printc("\n");
 		printc("SDT_PROBE_DEFINE2(vfs, vop, " name ", entry, \"struct vnode *\", \"struct " name "_args *\");\n");
 		printc("SDT_PROBE_DEFINE3(vfs, vop, " name ", return, \"struct vnode *\", \"struct " name "_args *\", \"int\");\n");
 
 		# Print out function.
 		printc("\nint\n" uname "_AP(struct " name "_args *a)");
 		printc("{");
 		printc("");
 		printc("\treturn(" uname "_APV(a->a_" args[0] "->v_op, a));");
 		printc("}");
 		printc("\nint\n" uname "_APV(struct vop_vector *vop, struct " name "_args *a)");
 		printc("{");
 		printc("\tint rc;");
 		printc("");
 		printc("\tVNASSERT(a->a_gen.a_desc == &" name "_desc, a->a_" args[0]",");
 		printc("\t    (\"Wrong a_desc in " name "(%p, %p)\", a->a_" args[0]", a));");
 		printc("\tVNASSERT(vop != NULL, a->a_" args[0]", (\"No "name"(%p, %p)\", a->a_" args[0]", a));")
 		printc("\tKTR_START" ctrstr);
 		add_pre(name);
 		for (i = 0; i < numargs; ++i)
 			add_debug_code(name, args[i], "Entry", "\t");
-		printc("\tif (__predict_true(!SDT_PROBES_ENABLED() && vop->"name" != NULL)) {");
+		printc("\tif (!SDT_PROBES_ENABLED()) {");
 		printc("\t\trc = vop->"name"(a);")
 		printc("\t} else {")
 		printc("\t\tSDT_PROBE2(vfs, vop, " name ", entry, a->a_" args[0] ", a);");
-		printc("\t\tif (vop->"name" != NULL)")
-		printc("\t\t\trc = vop->"name"(a);")
-		printc("\t\telse")
-		printc("\t\t\trc = vop->vop_bypass(&a->a_gen);")
+		printc("\t\trc = vop->"name"(a);")
 		printc("\t\tSDT_PROBE3(vfs, vop, " name ", return, a->a_" args[0] ", a, rc);");
 		printc("\t}")
 		printc("\tif (rc == 0) {");
 		for (i = 0; i < numargs; ++i)
 			add_debug_code(name, args[i], "OK", "\t\t");
 		printc("\t} else {");
 		for (i = 0; i < numargs; ++i)
 			add_debug_code(name, args[i], "Error", "\t\t");
 		printc("\t}");
 		add_post(name);
 		printc("\tKTR_STOP" ctrstr);
 		printc("\treturn (rc);");
 		printc("}\n");
 
 		# Print out the vnodeop_desc structure.
 		printc("struct vnodeop_desc " name "_desc = {");
 		# printable name
 		printc("\t\"" name "\",");
 		# flags
 		vppwillrele = "";
 		for (i = 0; i < numargs; i++) {
 			if (types[i] == "struct vnode **" && \
 			    reles[i] == "WILLRELE") {
 				vppwillrele = "|VDESC_VPP_WILLRELE";
 			}
 		}
 
 		if (!releflags)
 			releflags = "0";
 		printc("\t" releflags vppwillrele ",");
 
 		# index in struct vop_vector
 		printc("\t__offsetof(struct vop_vector, " name "),");
 		# function to call
 		printc("\t(vop_bypass_t *)" uname "_AP,");
 		# vp offsets
 		printc("\t" name "_vp_offsets,");
 		# vpp (if any)
 		printc("\t" find_arg_with_type("struct vnode **") ",");
 		# cred (if any)
 		printc("\t" find_arg_with_type("struct ucred *") ",");
 		# thread (if any)
 		printc("\t" find_arg_with_type("struct thread *") ",");
 		# componentname
 		printc("\t" find_arg_with_type("struct componentname *") ",");
 		# transport layer information
 		printc("};\n");
 	}
 }
 
 if (cfile) {
 	printc("void");
 	printc("vfs_vector_op_register(struct vop_vector *orig_vop)");
 	printc("{");
 	printc("\tstruct vop_vector *vop;");
 	printc("");
 	printc("\tif (orig_vop->registered)");
 	printc("\t\tpanic(\"%s: vop_vector %p already registered\",")
 	printc("\t\t    __func__, orig_vop);");
 	printc("");
 	for (name in funcarr) {
 		printc("\tvop = orig_vop;");
 		printc("\twhile (vop != NULL && \\");
 		printc("\t    vop->"name" == NULL && vop->vop_bypass == NULL)")
 		printc("\t\tvop = vop->vop_default;")
 		printc("\tif (vop != NULL)");
 		printc("\t\torig_vop->"name" = vop->"name";");
 		printc("");
 	}
 	printc("\tvop = orig_vop;");
 	printc("\twhile (vop != NULL && vop->vop_bypass == NULL)")
 	printc("\t\tvop = vop->vop_default;")
 	printc("\tif (vop != NULL)");
 	printc("\t\torig_vop->vop_bypass = vop->vop_bypass;");
+	printc("");
+	for (name in funcarr) {
+		printc("\tif (orig_vop->"name" == NULL)");
+		printc("\t\torig_vop->"name" = (void *)orig_vop->vop_bypass;");
+	}
 	printc("");
 	printc("\torig_vop->registered = true;");
 	printc("}")
 }
 
 if (hfile) {
 	printh("void vfs_vector_op_register(struct vop_vector *orig_vop);");
 }
 
 if (pfile) {
 	printp("\tbool\tregistered;")
 	printp("};")
 }
  
 if (hfile)
 	close(hfile);
 if (cfile)
 	close(cfile);
 if (pfile)
 	close(pfile);
 close(srcfile);
 
 exit 0;
 
 }
Index: projects/clang1000-import/sys/ufs/ffs/ffs_vfsops.c
===================================================================
--- projects/clang1000-import/sys/ufs/ffs/ffs_vfsops.c	(revision 357178)
+++ projects/clang1000-import/sys/ufs/ffs/ffs_vfsops.c	(revision 357179)
@@ -1,2398 +1,2402 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_quota.h"
 #include "opt_ufs.h"
 #include "opt_ffs.h"
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/gsb_crc32.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/taskqueue.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/ioccom.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/vmmeter.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/gjournal.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
 #include <vm/vm.h>
 #include <vm/uma.h>
 #include <vm/vm_page.h>
 
 #include <geom/geom.h>
 #include <geom/geom_vfs.h>
 
 #include <ddb/ddb.h>
 
 static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
 
 static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
 		    ufs2_daddr_t);
 static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
 static int	ffs_sync_lazy(struct mount *mp);
 static int	ffs_use_bread(void *devfd, off_t loc, void **bufp, int size);
 static int	ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size);
 
 static vfs_init_t ffs_init;
 static vfs_uninit_t ffs_uninit;
 static vfs_extattrctl_t ffs_extattrctl;
 static vfs_cmount_t ffs_cmount;
 static vfs_unmount_t ffs_unmount;
 static vfs_mount_t ffs_mount;
 static vfs_statfs_t ffs_statfs;
 static vfs_fhtovp_t ffs_fhtovp;
 static vfs_sync_t ffs_sync;
 
 static struct vfsops ufs_vfsops = {
 	.vfs_extattrctl =	ffs_extattrctl,
 	.vfs_fhtovp =		ffs_fhtovp,
 	.vfs_init =		ffs_init,
 	.vfs_mount =		ffs_mount,
 	.vfs_cmount =		ffs_cmount,
 	.vfs_quotactl =		ufs_quotactl,
 	.vfs_root =		vfs_cache_root,
 	.vfs_cachedroot =	ufs_root,
 	.vfs_statfs =		ffs_statfs,
 	.vfs_sync =		ffs_sync,
 	.vfs_uninit =		ffs_uninit,
 	.vfs_unmount =		ffs_unmount,
 	.vfs_vget =		ffs_vget,
 	.vfs_susp_clean =	process_deferred_inactive,
 };
 
 VFS_SET(ufs_vfsops, ufs, 0);
 MODULE_VERSION(ufs, 1);
 
 static b_strategy_t ffs_geom_strategy;
 static b_write_t ffs_bufwrite;
 
 static struct buf_ops ffs_ops = {
 	.bop_name =	"FFS",
 	.bop_write =	ffs_bufwrite,
 	.bop_strategy =	ffs_geom_strategy,
 	.bop_sync =	bufsync,
 #ifdef NO_FFS_SNAPSHOT
 	.bop_bdflush =	bufbdflush,
 #else
 	.bop_bdflush =	ffs_bdflush,
 #endif
 };
 
 /*
  * Note that userquota and groupquota options are not currently used
  * by UFS/FFS code and generally mount(8) does not pass those options
  * from userland, but they can be passed by loader(8) via
  * vfs.root.mountfrom.options.
  */
 static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
     "noclusterw", "noexec", "export", "force", "from", "groupquota",
     "multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir",
     "nosymfollow", "sync", "union", "userquota", "untrusted", NULL };
 
 static int
 ffs_mount(struct mount *mp)
 {
 	struct vnode *devvp;
 	struct thread *td;
 	struct ufsmount *ump = NULL;
 	struct fs *fs;
 	pid_t fsckpid = 0;
 	int error, error1, flags;
 	uint64_t mntorflags, saved_mnt_flag;
 	accmode_t accmode;
 	struct nameidata ndp;
 	char *fspec;
 
 	td = curthread;
 	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
 		return (EINVAL);
 	if (uma_inode == NULL) {
 		uma_inode = uma_zcreate("FFS inode",
 		    sizeof(struct inode), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		uma_ufs1 = uma_zcreate("FFS1 dinode",
 		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		uma_ufs2 = uma_zcreate("FFS2 dinode",
 		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 	}
 
 	vfs_deleteopt(mp->mnt_optnew, "groupquota");
 	vfs_deleteopt(mp->mnt_optnew, "userquota");
 
 	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
 	if (error)
 		return (error);
 
 	mntorflags = 0;
 	if (vfs_getopt(mp->mnt_optnew, "untrusted", NULL, NULL) == 0)
 		mntorflags |= MNT_UNTRUSTED;
 
 	if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
 		mntorflags |= MNT_ACLS;
 
 	if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
 		mntorflags |= MNT_SNAPSHOT;
 		/*
 		 * Once we have set the MNT_SNAPSHOT flag, do not
 		 * persist "snapshot" in the options list.
 		 */
 		vfs_deleteopt(mp->mnt_optnew, "snapshot");
 		vfs_deleteopt(mp->mnt_opt, "snapshot");
 	}
 
 	if (vfs_getopt(mp->mnt_optnew, "fsckpid", NULL, NULL) == 0 &&
 	    vfs_scanopt(mp->mnt_optnew, "fsckpid", "%d", &fsckpid) == 1) {
 		/*
 		 * Once we have set the restricted PID, do not
 		 * persist "fsckpid" in the options list.
 		 */
 		vfs_deleteopt(mp->mnt_optnew, "fsckpid");
 		vfs_deleteopt(mp->mnt_opt, "fsckpid");
 		if (mp->mnt_flag & MNT_UPDATE) {
 			if (VFSTOUFS(mp)->um_fs->fs_ronly == 0 &&
 			     vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
 				vfs_mount_error(mp,
 				    "Checker enable: Must be read-only");
 				return (EINVAL);
 			}
 		} else if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
 			vfs_mount_error(mp,
 			    "Checker enable: Must be read-only");
 			return (EINVAL);
 		}
 		/* Set to -1 if we are done */
 		if (fsckpid == 0)
 			fsckpid = -1;
 	}
 
 	if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) {
 		if (mntorflags & MNT_ACLS) {
 			vfs_mount_error(mp,
 			    "\"acls\" and \"nfsv4acls\" options "
 			    "are mutually exclusive");
 			return (EINVAL);
 		}
 		mntorflags |= MNT_NFS4ACLS;
 	}
 
 	MNT_ILOCK(mp);
 	mp->mnt_flag |= mntorflags;
 	MNT_IUNLOCK(mp);
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		ump = VFSTOUFS(mp);
 		fs = ump->um_fs;
 		devvp = ump->um_devvp;
 		if (fsckpid == -1 && ump->um_fsckpid > 0) {
 			if ((error = ffs_flushfiles(mp, WRITECLOSE, td)) != 0 ||
 			    (error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0)
 				return (error);
 			g_topology_lock();
 			/*
 			 * Return to normal read-only mode.
 			 */
 			error = g_access(ump->um_cp, 0, -1, 0);
 			g_topology_unlock();
 			ump->um_fsckpid = 0;
 		}
 		if (fs->fs_ronly == 0 &&
 		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 			/*
 			 * Flush any dirty data and suspend filesystem.
 			 */
 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 				return (error);
 			error = vfs_write_suspend_umnt(mp);
 			if (error != 0)
 				return (error);
 			/*
 			 * Check for and optionally get rid of files open
 			 * for writing.
 			 */
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			if (MOUNTEDSOFTDEP(mp)) {
 				error = softdep_flushfiles(mp, flags, td);
 			} else {
 				error = ffs_flushfiles(mp, flags, td);
 			}
 			if (error) {
 				vfs_write_resume(mp, 0);
 				return (error);
 			}
 			if (fs->fs_pendingblocks != 0 ||
 			    fs->fs_pendinginodes != 0) {
 				printf("WARNING: %s Update error: blocks %jd "
 				    "files %d\n", fs->fs_fsmnt, 
 				    (intmax_t)fs->fs_pendingblocks,
 				    fs->fs_pendinginodes);
 				fs->fs_pendingblocks = 0;
 				fs->fs_pendinginodes = 0;
 			}
 			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
 				fs->fs_clean = 1;
 			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 				fs->fs_ronly = 0;
 				fs->fs_clean = 0;
 				vfs_write_resume(mp, 0);
 				return (error);
 			}
 			if (MOUNTEDSOFTDEP(mp))
 				softdep_unmount(mp);
 			g_topology_lock();
 			/*
 			 * Drop our write and exclusive access.
 			 */
 			g_access(ump->um_cp, 0, -1, -1);
 			g_topology_unlock();
 			fs->fs_ronly = 1;
 			MNT_ILOCK(mp);
 			mp->mnt_flag |= MNT_RDONLY;
 			MNT_IUNLOCK(mp);
 			/*
 			 * Allow the writers to note that filesystem
 			 * is ro now.
 			 */
 			vfs_write_resume(mp, 0);
 		}
 		if ((mp->mnt_flag & MNT_RELOAD) &&
 		    (error = ffs_reload(mp, td, 0)) != 0)
 			return (error);
 		if (fs->fs_ronly &&
 		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 			/*
 			 * If we are running a checker, do not allow upgrade.
 			 */
 			if (ump->um_fsckpid > 0) {
 				vfs_mount_error(mp,
 				    "Active checker, cannot upgrade to write");
 				return (EINVAL);
 			}
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
 			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 			error = VOP_ACCESS(devvp, VREAD | VWRITE,
 			    td->td_ucred, td);
 			if (error)
 				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 			if (error) {
 				VOP_UNLOCK(devvp);
 				return (error);
 			}
 			VOP_UNLOCK(devvp);
 			fs->fs_flags &= ~FS_UNCLEAN;
 			if (fs->fs_clean == 0) {
 				fs->fs_flags |= FS_UNCLEAN;
 				if ((mp->mnt_flag & MNT_FORCE) ||
 				    ((fs->fs_flags &
 				     (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
 				     (fs->fs_flags & FS_DOSOFTDEP))) {
 					printf("WARNING: %s was not properly "
 					   "dismounted\n", fs->fs_fsmnt);
 				} else {
 					vfs_mount_error(mp,
 					   "R/W mount of %s denied. %s.%s",
 					   fs->fs_fsmnt,
 					   "Filesystem is not clean - run fsck",
 					   (fs->fs_flags & FS_SUJ) == 0 ? "" :
 					   " Forced mount will invalidate"
 					   " journal contents");
 					return (EPERM);
 				}
 			}
 			g_topology_lock();
 			/*
 			 * Request exclusive write access.
 			 */
 			error = g_access(ump->um_cp, 0, 1, 1);
 			g_topology_unlock();
 			if (error)
 				return (error);
 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 				return (error);
 			error = vfs_write_suspend_umnt(mp);
 			if (error != 0)
 				return (error);
 			fs->fs_ronly = 0;
 			MNT_ILOCK(mp);
 			saved_mnt_flag = MNT_RDONLY;
 			if (MOUNTEDSOFTDEP(mp) && (mp->mnt_flag &
 			    MNT_ASYNC) != 0)
 				saved_mnt_flag |= MNT_ASYNC;
 			mp->mnt_flag &= ~saved_mnt_flag;
 			MNT_IUNLOCK(mp);
 			fs->fs_mtime = time_second;
 			/* check to see if we need to start softdep */
 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
 			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
 				fs->fs_ronly = 1;
 				MNT_ILOCK(mp);
 				mp->mnt_flag |= saved_mnt_flag;
 				MNT_IUNLOCK(mp);
 				vfs_write_resume(mp, 0);
 				return (error);
 			}
 			fs->fs_clean = 0;
 			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 				fs->fs_ronly = 1;
 				MNT_ILOCK(mp);
 				mp->mnt_flag |= saved_mnt_flag;
 				MNT_IUNLOCK(mp);
 				vfs_write_resume(mp, 0);
 				return (error);
 			}
 			if (fs->fs_snapinum[0] != 0)
 				ffs_snapshot_mount(mp);
 			vfs_write_resume(mp, 0);
 		}
 		/*
 		 * Soft updates is incompatible with "async",
 		 * so if we are doing softupdates stop the user
 		 * from setting the async flag in an update.
 		 * Softdep_mount() clears it in an initial mount
 		 * or ro->rw remount.
 		 */
 		if (MOUNTEDSOFTDEP(mp)) {
 			/* XXX: Reset too late ? */
 			MNT_ILOCK(mp);
 			mp->mnt_flag &= ~MNT_ASYNC;
 			MNT_IUNLOCK(mp);
 		}
 		/*
 		 * Keep MNT_ACLS flag if it is stored in superblock.
 		 */
 		if ((fs->fs_flags & FS_ACLS) != 0) {
 			/* XXX: Set too late ? */
 			MNT_ILOCK(mp);
 			mp->mnt_flag |= MNT_ACLS;
 			MNT_IUNLOCK(mp);
 		}
 
 		if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
 			/* XXX: Set too late ? */
 			MNT_ILOCK(mp);
 			mp->mnt_flag |= MNT_NFS4ACLS;
 			MNT_IUNLOCK(mp);
 		}
 		/*
 		 * If this is a request from fsck to clean up the filesystem,
 		 * then allow the specified pid to proceed.
 		 */
 		if (fsckpid > 0) {
 			if (ump->um_fsckpid != 0) {
 				vfs_mount_error(mp,
 				    "Active checker already running on %s",
 				    fs->fs_fsmnt);
 				return (EINVAL);
 			}
 			KASSERT(MOUNTEDSOFTDEP(mp) == 0,
 			    ("soft updates enabled on read-only file system"));
 			g_topology_lock();
 			/*
 			 * Request write access.
 			 */
 			error = g_access(ump->um_cp, 0, 1, 0);
 			g_topology_unlock();
 			if (error) {
 				vfs_mount_error(mp,
 				    "Checker activation failed on %s",
 				    fs->fs_fsmnt);
 				return (error);
 			}
 			ump->um_fsckpid = fsckpid;
 			if (fs->fs_snapinum[0] != 0)
 				ffs_snapshot_mount(mp);
 			fs->fs_mtime = time_second;
 			fs->fs_fmod = 1;
 			fs->fs_clean = 0;
 			(void) ffs_sbupdate(ump, MNT_WAIT, 0);
 		}
 
 		/*
 		 * If this is a snapshot request, take the snapshot.
 		 */
 		if (mp->mnt_flag & MNT_SNAPSHOT)
 			return (ffs_snapshot(mp, fspec));
 
 		/*
 		 * Must not call namei() while owning busy ref.
 		 */
 		vfs_unbusy(mp);
 	}
 
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible disk device.
 	 */
 	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
 	error = namei(&ndp);
 	if ((mp->mnt_flag & MNT_UPDATE) != 0) {
 		/*
 		 * Unmount does not start if MNT_UPDATE is set.  Mount
 		 * update busies mp before setting MNT_UPDATE.  We
 		 * must be able to retain our busy ref succesfully,
 		 * without sleep.
 		 */
 		error1 = vfs_busy(mp, MBF_NOWAIT);
 		MPASS(error1 == 0);
 	}
 	if (error != 0)
 		return (error);
 	NDFREE(&ndp, NDF_ONLY_PNBUF);
 	devvp = ndp.ni_vp;
 	if (!vn_isdisk(devvp, &error)) {
 		vput(devvp);
 		return (error);
 	}
 
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 */
 	accmode = VREAD;
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		accmode |= VWRITE;
 	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
 	if (error)
 		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 	if (error) {
 		vput(devvp);
 		return (error);
 	}
 
 	if (mp->mnt_flag & MNT_UPDATE) {
 		/*
 		 * Update only
 		 *
 		 * If it's not the same vnode, or at least the same device
 		 * then it's not correct.
 		 */
 
 		if (devvp->v_rdev != ump->um_devvp->v_rdev)
 			error = EINVAL;	/* needs translation */
 		vput(devvp);
 		if (error)
 			return (error);
 	} else {
 		/*
 		 * New mount
 		 *
 		 * We need the name for the mount point (also used for
 		 * "last mounted on") copied in. If an error occurs,
 		 * the mount point is discarded by the upper level code.
 		 * Note that vfs_mount_alloc() populates f_mntonname for us.
 		 */
 		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
 			vrele(devvp);
 			return (error);
 		}
 		if (fsckpid > 0) {
 			KASSERT(MOUNTEDSOFTDEP(mp) == 0,
 			    ("soft updates enabled on read-only file system"));
 			ump = VFSTOUFS(mp);
 			fs = ump->um_fs;
 			g_topology_lock();
 			/*
 			 * Request write access.
 			 */
 			error = g_access(ump->um_cp, 0, 1, 0);
 			g_topology_unlock();
 			if (error) {
 				printf("WARNING: %s: Checker activation "
 				    "failed\n", fs->fs_fsmnt);
 			} else { 
 				ump->um_fsckpid = fsckpid;
 				if (fs->fs_snapinum[0] != 0)
 					ffs_snapshot_mount(mp);
 				fs->fs_mtime = time_second;
 				fs->fs_clean = 0;
 				(void) ffs_sbupdate(ump, MNT_WAIT, 0);
 			}
 		}
 	}
 	vfs_mountedfrom(mp, fspec);
 	return (0);
 }
 
 /*
  * Compatibility with old mount system call.
  */
 
 static int
 ffs_cmount(struct mntarg *ma, void *data, uint64_t flags)
 {
 	struct ufs_args args;
 	struct export_args exp;
 	int error;
 
 	if (data == NULL)
 		return (EINVAL);
 	error = copyin(data, &args, sizeof args);
 	if (error)
 		return (error);
 	vfs_oexport_conv(&args.export, &exp);
 
 	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
 	ma = mount_arg(ma, "export", &exp, sizeof(exp));
 	error = kernel_mount(ma, flags);
 
 	return (error);
 }
 
 /*
  * Reload all incore data for a filesystem (used after running fsck on
  * the root filesystem and finding things to fix). If the 'force' flag
  * is 0, the filesystem must be mounted read-only.
  *
  * Things to do to update the mount:
  *	1) invalidate all cached meta-data.
  *	2) re-read superblock from disk.
  *	3) re-read summary information from disk.
  *	4) invalidate all inactive vnodes.
  *	5) clear MNTK_SUSPEND2 and MNTK_SUSPENDED flags, allowing secondary
  *	   writers, if requested.
  *	6) invalidate all cached file data.
  *	7) re-read inode data for all active vnodes.
  */
 int
 ffs_reload(struct mount *mp, struct thread *td, int flags)
 {
 	struct vnode *vp, *mvp, *devvp;
 	struct inode *ip;
 	void *space;
 	struct buf *bp;
 	struct fs *fs, *newfs;
 	struct ufsmount *ump;
 	ufs2_daddr_t sblockloc;
 	int i, blks, error;
 	u_long size;
 	int32_t *lp;
 
 	ump = VFSTOUFS(mp);
 
 	MNT_ILOCK(mp);
 	if ((mp->mnt_flag & MNT_RDONLY) == 0 && (flags & FFSR_FORCE) == 0) {
 		MNT_IUNLOCK(mp);
 		return (EINVAL);
 	}
 	MNT_IUNLOCK(mp);
 	
 	/*
 	 * Step 1: invalidate all cached meta-data.
 	 */
 	devvp = VFSTOUFS(mp)->um_devvp;
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 	if (vinvalbuf(devvp, 0, 0, 0) != 0)
 		panic("ffs_reload: dirty1");
 	VOP_UNLOCK(devvp);
 
 	/*
 	 * Step 2: re-read superblock from disk.
 	 */
 	fs = VFSTOUFS(mp)->um_fs;
 	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
 	    NOCRED, &bp)) != 0)
 		return (error);
 	newfs = (struct fs *)bp->b_data;
 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
 	     newfs->fs_magic != FS_UFS2_MAGIC) ||
 	    newfs->fs_bsize > MAXBSIZE ||
 	    newfs->fs_bsize < sizeof(struct fs)) {
 			brelse(bp);
 			return (EIO);		/* XXX needs translation */
 	}
 	/*
 	 * Copy pointer fields back into superblock before copying in	XXX
 	 * new superblock. These should really be in the ufsmount.	XXX
 	 * Note that important parameters (eg fs_ncg) are unchanged.
 	 */
 	newfs->fs_csp = fs->fs_csp;
 	newfs->fs_maxcluster = fs->fs_maxcluster;
 	newfs->fs_contigdirs = fs->fs_contigdirs;
 	newfs->fs_active = fs->fs_active;
 	newfs->fs_ronly = fs->fs_ronly;
 	sblockloc = fs->fs_sblockloc;
 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 	brelse(bp);
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
 	UFS_LOCK(ump);
 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 		printf("WARNING: %s: reload pending error: blocks %jd "
 		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 		    fs->fs_pendinginodes);
 		fs->fs_pendingblocks = 0;
 		fs->fs_pendinginodes = 0;
 	}
 	UFS_UNLOCK(ump);
 
 	/*
 	 * Step 3: re-read summary information from disk.
 	 */
 	size = fs->fs_cssize;
 	blks = howmany(size, fs->fs_fsize);
 	if (fs->fs_contigsumsize > 0)
 		size += fs->fs_ncg * sizeof(int32_t);
 	size += fs->fs_ncg * sizeof(u_int8_t);
 	free(fs->fs_csp, M_UFSMNT);
 	space = malloc(size, M_UFSMNT, M_WAITOK);
 	fs->fs_csp = space;
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
 		if (i + fs->fs_frag > blks)
 			size = (blks - i) * fs->fs_fsize;
 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 		    NOCRED, &bp);
 		if (error)
 			return (error);
 		bcopy(bp->b_data, space, (u_int)size);
 		space = (char *)space + size;
 		brelse(bp);
 	}
 	/*
 	 * We no longer know anything about clusters per cylinder group.
 	 */
 	if (fs->fs_contigsumsize > 0) {
 		fs->fs_maxcluster = lp = space;
 		for (i = 0; i < fs->fs_ncg; i++)
 			*lp++ = fs->fs_contigsumsize;
 		space = lp;
 	}
 	size = fs->fs_ncg * sizeof(u_int8_t);
 	fs->fs_contigdirs = (u_int8_t *)space;
 	bzero(fs->fs_contigdirs, size);
 	if ((flags & FFSR_UNSUSPEND) != 0) {
 		MNT_ILOCK(mp);
 		mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2);
 		wakeup(&mp->mnt_flag);
 		MNT_IUNLOCK(mp);
 	}
 
 loop:
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		/*
 		 * Skip syncer vnode.
 		 */
 		if (vp->v_type == VNON) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		/*
 		 * Step 4: invalidate all cached file data.
 		 */
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			goto loop;
 		}
 		if (vinvalbuf(vp, 0, 0, 0))
 			panic("ffs_reload: dirty2");
 		/*
 		 * Step 5: re-read inode data for all active vnodes.
 		 */
 		ip = VTOI(vp);
 		error =
 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 		    (int)fs->fs_bsize, NOCRED, &bp);
 		if (error) {
 			vput(vp);
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			return (error);
 		}
 		if ((error = ffs_load_inode(bp, ip, fs, ip->i_number)) != 0) {
 			brelse(bp);
 			vput(vp);
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			return (error);
 		}
 		ip->i_effnlink = ip->i_nlink;
 		brelse(bp);
 		vput(vp);
 	}
 	return (0);
 }
 
 /*
  * Common code for mount and mountroot
  */
 static int
 ffs_mountfs(devvp, mp, td)
 	struct vnode *devvp;
 	struct mount *mp;
 	struct thread *td;
 {
 	struct ufsmount *ump;
 	struct fs *fs;
 	struct cdev *dev;
 	int error, i, len, ronly;
 	struct ucred *cred;
 	struct g_consumer *cp;
 	struct mount *nmp;
 	int candelete;
 	off_t loc;
 
 	fs = NULL;
 	ump = NULL;
 	cred = td ? td->td_ucred : NOCRED;
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 
 	KASSERT(devvp->v_type == VCHR, ("reclaimed devvp"));
 	dev = devvp->v_rdev;
 	if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0,
 	    (uintptr_t)mp) == 0) {
 		VOP_UNLOCK(devvp);
 		return (EBUSY);
 	}
 	g_topology_lock();
 	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
 	g_topology_unlock();
 	if (error != 0) {
 		atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
 		VOP_UNLOCK(devvp);
 		return (error);
 	}
 	dev_ref(dev);
 	devvp->v_bufobj.bo_ops = &ffs_ops;
 	VOP_UNLOCK(devvp);
 	if (dev->si_iosize_max != 0)
 		mp->mnt_iosize_max = dev->si_iosize_max;
 	if (mp->mnt_iosize_max > MAXPHYS)
 		mp->mnt_iosize_max = MAXPHYS;
 	if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
 		error = EINVAL;
 		vfs_mount_error(mp,
 		    "Invalid sectorsize %d for superblock size %d",
 		    cp->provider->sectorsize, SBLOCKSIZE);
 		goto out;
 	}
 	/* fetch the superblock and summary information */
 	loc = STDSB;
 	if ((mp->mnt_flag & MNT_ROOTFS) != 0)
 		loc = STDSB_NOHASHFAIL;
 	if ((error = ffs_sbget(devvp, &fs, loc, M_UFSMNT, ffs_use_bread)) != 0)
 		goto out;
 	/* none of these types of check-hashes are maintained by this kernel */
 	fs->fs_metackhash &= ~(CK_INDIR | CK_DIR);
 	/* no support for any undefined flags */
 	fs->fs_flags &= FS_SUPPORTED;
 	fs->fs_flags &= ~FS_UNCLEAN;
 	if (fs->fs_clean == 0) {
 		fs->fs_flags |= FS_UNCLEAN;
 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
 		    ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
 		     (fs->fs_flags & FS_DOSOFTDEP))) {
 			printf("WARNING: %s was not properly dismounted\n",
 			    fs->fs_fsmnt);
 		} else {
 			vfs_mount_error(mp, "R/W mount of %s denied. %s%s",
 			    fs->fs_fsmnt, "Filesystem is not clean - run fsck.",
 			    (fs->fs_flags & FS_SUJ) == 0 ? "" :
 			    " Forced mount will invalidate journal contents");
 			error = EPERM;
 			goto out;
 		}
 		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
 		    (mp->mnt_flag & MNT_FORCE)) {
 			printf("WARNING: %s: lost blocks %jd files %d\n",
 			    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 			    fs->fs_pendinginodes);
 			fs->fs_pendingblocks = 0;
 			fs->fs_pendinginodes = 0;
 		}
 	}
 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 		printf("WARNING: %s: mount pending error: blocks %jd "
 		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 		    fs->fs_pendinginodes);
 		fs->fs_pendingblocks = 0;
 		fs->fs_pendinginodes = 0;
 	}
 	if ((fs->fs_flags & FS_GJOURNAL) != 0) {
 #ifdef UFS_GJOURNAL
 		/*
 		 * Get journal provider name.
 		 */
 		len = 1024;
 		mp->mnt_gjprovider = malloc((u_long)len, M_UFSMNT, M_WAITOK);
 		if (g_io_getattr("GJOURNAL::provider", cp, &len,
 		    mp->mnt_gjprovider) == 0) {
 			mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, len,
 			    M_UFSMNT, M_WAITOK);
 			MNT_ILOCK(mp);
 			mp->mnt_flag |= MNT_GJOURNAL;
 			MNT_IUNLOCK(mp);
 		} else {
 			printf("WARNING: %s: GJOURNAL flag on fs "
 			    "but no gjournal provider below\n",
 			    mp->mnt_stat.f_mntonname);
 			free(mp->mnt_gjprovider, M_UFSMNT);
 			mp->mnt_gjprovider = NULL;
 		}
 #else
 		printf("WARNING: %s: GJOURNAL flag on fs but no "
 		    "UFS_GJOURNAL support\n", mp->mnt_stat.f_mntonname);
 #endif
 	} else {
 		mp->mnt_gjprovider = NULL;
 	}
 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
 	ump->um_cp = cp;
 	ump->um_bo = &devvp->v_bufobj;
 	ump->um_fs = fs;
 	if (fs->fs_magic == FS_UFS1_MAGIC) {
 		ump->um_fstype = UFS1;
 		ump->um_balloc = ffs_balloc_ufs1;
 	} else {
 		ump->um_fstype = UFS2;
 		ump->um_balloc = ffs_balloc_ufs2;
 	}
 	ump->um_blkatoff = ffs_blkatoff;
 	ump->um_truncate = ffs_truncate;
 	ump->um_update = ffs_update;
 	ump->um_valloc = ffs_valloc;
 	ump->um_vfree = ffs_vfree;
 	ump->um_ifree = ffs_ifree;
 	ump->um_rdonly = ffs_rdonly;
 	ump->um_snapgone = ffs_snapgone;
 	if ((mp->mnt_flag & MNT_UNTRUSTED) != 0)
 		ump->um_check_blkno = ffs_check_blkno;
 	else
 		ump->um_check_blkno = NULL;
 	mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
 	ffs_oldfscompat_read(fs, ump, fs->fs_sblockloc);
 	fs->fs_ronly = ronly;
 	fs->fs_active = NULL;
 	mp->mnt_data = ump;
 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
 	nmp = NULL;
 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
 	    (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
 		if (nmp)
 			vfs_rel(nmp);
 		vfs_getnewfsid(mp);
 	}
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 	MNT_ILOCK(mp);
 	mp->mnt_flag |= MNT_LOCAL;
 	MNT_IUNLOCK(mp);
 	if ((fs->fs_flags & FS_MULTILABEL) != 0) {
 #ifdef MAC
 		MNT_ILOCK(mp);
 		mp->mnt_flag |= MNT_MULTILABEL;
 		MNT_IUNLOCK(mp);
 #else
 		printf("WARNING: %s: multilabel flag on fs but "
 		    "no MAC support\n", mp->mnt_stat.f_mntonname);
 #endif
 	}
 	if ((fs->fs_flags & FS_ACLS) != 0) {
 #ifdef UFS_ACL
 		MNT_ILOCK(mp);
 
 		if (mp->mnt_flag & MNT_NFS4ACLS)
 			printf("WARNING: %s: ACLs flag on fs conflicts with "
 			    "\"nfsv4acls\" mount option; option ignored\n",
 			    mp->mnt_stat.f_mntonname);
 		mp->mnt_flag &= ~MNT_NFS4ACLS;
 		mp->mnt_flag |= MNT_ACLS;
 
 		MNT_IUNLOCK(mp);
 #else
 		printf("WARNING: %s: ACLs flag on fs but no ACLs support\n",
 		    mp->mnt_stat.f_mntonname);
 #endif
 	}
 	if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
 #ifdef UFS_ACL
 		MNT_ILOCK(mp);
 
 		if (mp->mnt_flag & MNT_ACLS)
 			printf("WARNING: %s: NFSv4 ACLs flag on fs conflicts "
 			    "with \"acls\" mount option; option ignored\n",
 			    mp->mnt_stat.f_mntonname);
 		mp->mnt_flag &= ~MNT_ACLS;
 		mp->mnt_flag |= MNT_NFS4ACLS;
 
 		MNT_IUNLOCK(mp);
 #else
 		printf("WARNING: %s: NFSv4 ACLs flag on fs but no "
 		    "ACLs support\n", mp->mnt_stat.f_mntonname);
 #endif
 	}
 	if ((fs->fs_flags & FS_TRIM) != 0) {
 		len = sizeof(int);
 		if (g_io_getattr("GEOM::candelete", cp, &len,
 		    &candelete) == 0) {
 			if (candelete)
 				ump->um_flags |= UM_CANDELETE;
 			else
 				printf("WARNING: %s: TRIM flag on fs but disk "
 				    "does not support TRIM\n",
 				    mp->mnt_stat.f_mntonname);
 		} else {
 			printf("WARNING: %s: TRIM flag on fs but disk does "
 			    "not confirm that it supports TRIM\n",
 			    mp->mnt_stat.f_mntonname);
 		}
 		if (((ump->um_flags) & UM_CANDELETE) != 0) {
 			ump->um_trim_tq = taskqueue_create("trim", M_WAITOK,
 			    taskqueue_thread_enqueue, &ump->um_trim_tq);
 			taskqueue_start_threads(&ump->um_trim_tq, 1, PVFS,
 			    "%s trim", mp->mnt_stat.f_mntonname);
 			ump->um_trimhash = hashinit(MAXTRIMIO, M_TRIM,
 			    &ump->um_trimlisthashsize);
 		}
 	}
 
 	ump->um_mountp = mp;
 	ump->um_dev = dev;
 	ump->um_devvp = devvp;
 	ump->um_nindir = fs->fs_nindir;
 	ump->um_bptrtodb = fs->fs_fsbtodb;
 	ump->um_seqinc = fs->fs_frag;
 	for (i = 0; i < MAXQUOTAS; i++)
 		ump->um_quotas[i] = NULLVP;
 #ifdef UFS_EXTATTR
 	ufs_extattr_uepm_init(&ump->um_extattr);
 #endif
 	/*
 	 * Set FS local "last mounted on" information (NULL pad)
 	 */
 	bzero(fs->fs_fsmnt, MAXMNTLEN);
 	strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
 	mp->mnt_stat.f_iosize = fs->fs_bsize;
 
 	if (mp->mnt_flag & MNT_ROOTFS) {
 		/*
 		 * Root mount; update timestamp in mount structure.
 		 * this will be used by the common root mount code
 		 * to update the system clock.
 		 */
 		mp->mnt_time = fs->fs_time;
 	}
 
 	if (ronly == 0) {
 		fs->fs_mtime = time_second;
 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
 			ffs_flushfiles(mp, FORCECLOSE, td);
 			goto out;
 		}
 		if (fs->fs_snapinum[0] != 0)
 			ffs_snapshot_mount(mp);
 		fs->fs_fmod = 1;
 		fs->fs_clean = 0;
 		(void) ffs_sbupdate(ump, MNT_WAIT, 0);
 	}
 	/*
 	 * Initialize filesystem state information in mount struct.
 	 */
 	MNT_ILOCK(mp);
 	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
 	    MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS | MNTK_USES_BCACHE;
 	MNT_IUNLOCK(mp);
 #ifdef UFS_EXTATTR
 #ifdef UFS_EXTATTR_AUTOSTART
 	/*
 	 *
 	 * Auto-starting does the following:
 	 *	- check for /.attribute in the fs, and extattr_start if so
 	 *	- for each file in .attribute, enable that file with
 	 * 	  an attribute of the same name.
 	 * Not clear how to report errors -- probably eat them.
 	 * This would all happen while the filesystem was busy/not
 	 * available, so would effectively be "atomic".
 	 */
 	(void) ufs_extattr_autostart(mp, td);
 #endif /* !UFS_EXTATTR_AUTOSTART */
 #endif /* !UFS_EXTATTR */
 	return (0);
 out:
 	if (fs != NULL) {
 		free(fs->fs_csp, M_UFSMNT);
 		free(fs, M_UFSMNT);
 	}
 	if (cp != NULL) {
 		g_topology_lock();
 		g_vfs_close(cp);
 		g_topology_unlock();
 	}
 	if (ump) {
 		mtx_destroy(UFS_MTX(ump));
 		if (mp->mnt_gjprovider != NULL) {
 			free(mp->mnt_gjprovider, M_UFSMNT);
 			mp->mnt_gjprovider = NULL;
 		}
 		free(ump, M_UFSMNT);
 		mp->mnt_data = NULL;
 	}
 	atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
 	dev_rel(dev);
 	return (error);
 }
 
 /*
  * A read function for use by filesystem-layer routines.
  */
 static int
 ffs_use_bread(void *devfd, off_t loc, void **bufp, int size)
 {
 	struct buf *bp;
 	int error;
 
 	KASSERT(*bufp == NULL, ("ffs_use_bread: non-NULL *bufp %p\n", *bufp));
 	*bufp = malloc(size, M_UFSMNT, M_WAITOK);
 	if ((error = bread((struct vnode *)devfd, btodb(loc), size, NOCRED,
 	    &bp)) != 0)
 		return (error);
 	bcopy(bp->b_data, *bufp, size);
 	bp->b_flags |= B_INVAL | B_NOCACHE;
 	brelse(bp);
 	return (0);
 }
 
 #include <sys/sysctl.h>
 static int bigcgs = 0;
 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
 
 /*
  * Sanity checks for loading old filesystem superblocks.
  * See ffs_oldfscompat_write below for unwound actions.
  *
  * XXX - Parts get retired eventually.
  * Unfortunately new bits get added.
  */
 static void
 ffs_oldfscompat_read(fs, ump, sblockloc)
 	struct fs *fs;
 	struct ufsmount *ump;
 	ufs2_daddr_t sblockloc;
 {
 	off_t maxfilesize;
 
 	/*
 	 * If not yet done, update fs_flags location and value of fs_sblockloc.
 	 */
 	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
 		fs->fs_flags = fs->fs_old_flags;
 		fs->fs_old_flags |= FS_FLAGS_UPDATED;
 		fs->fs_sblockloc = sblockloc;
 	}
 	/*
 	 * If not yet done, update UFS1 superblock with new wider fields.
 	 */
 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
 		fs->fs_maxbsize = fs->fs_bsize;
 		fs->fs_time = fs->fs_old_time;
 		fs->fs_size = fs->fs_old_size;
 		fs->fs_dsize = fs->fs_old_dsize;
 		fs->fs_csaddr = fs->fs_old_csaddr;
 		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
 		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
 		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
 		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
 	}
 	if (fs->fs_magic == FS_UFS1_MAGIC &&
 	    fs->fs_old_inodefmt < FS_44INODEFMT) {
 		fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
 		fs->fs_qbmask = ~fs->fs_bmask;
 		fs->fs_qfmask = ~fs->fs_fmask;
 	}
 	if (fs->fs_magic == FS_UFS1_MAGIC) {
 		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
 		maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
 		if (fs->fs_maxfilesize > maxfilesize)
 			fs->fs_maxfilesize = maxfilesize;
 	}
 	/* Compatibility for old filesystems */
 	if (fs->fs_avgfilesize <= 0)
 		fs->fs_avgfilesize = AVFILESIZ;
 	if (fs->fs_avgfpdir <= 0)
 		fs->fs_avgfpdir = AFPDIR;
 	if (bigcgs) {
 		fs->fs_save_cgsize = fs->fs_cgsize;
 		fs->fs_cgsize = fs->fs_bsize;
 	}
 }
 
 /*
  * Unwinding superblock updates for old filesystems.
  * See ffs_oldfscompat_read above for details.
  *
  * XXX - Parts get retired eventually.
  * Unfortunately new bits get added.
  */
 void
 ffs_oldfscompat_write(fs, ump)
 	struct fs *fs;
 	struct ufsmount *ump;
 {
 
 	/*
 	 * Copy back UFS2 updated fields that UFS1 inspects.
 	 */
 	if (fs->fs_magic == FS_UFS1_MAGIC) {
 		fs->fs_old_time = fs->fs_time;
 		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
 		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
 		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
 		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
 		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
 	}
 	if (bigcgs) {
 		fs->fs_cgsize = fs->fs_save_cgsize;
 		fs->fs_save_cgsize = 0;
 	}
 }
 
 /*
  * unmount system call
  */
 static int
 ffs_unmount(mp, mntflags)
 	struct mount *mp;
 	int mntflags;
 {
 	struct thread *td;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct fs *fs;
 	int error, flags, susp;
 #ifdef UFS_EXTATTR
 	int e_restart;
 #endif
 
 	flags = 0;
 	td = curthread;
 	fs = ump->um_fs;
 	susp = 0;
 	if (mntflags & MNT_FORCE) {
 		flags |= FORCECLOSE;
 		susp = fs->fs_ronly == 0;
 	}
 #ifdef UFS_EXTATTR
 	if ((error = ufs_extattr_stop(mp, td))) {
 		if (error != EOPNOTSUPP)
 			printf("WARNING: unmount %s: ufs_extattr_stop "
 			    "returned errno %d\n", mp->mnt_stat.f_mntonname,
 			    error);
 		e_restart = 0;
 	} else {
 		ufs_extattr_uepm_destroy(&ump->um_extattr);
 		e_restart = 1;
 	}
 #endif
 	if (susp) {
 		error = vfs_write_suspend_umnt(mp);
 		if (error != 0)
 			goto fail1;
 	}
 	if (MOUNTEDSOFTDEP(mp))
 		error = softdep_flushfiles(mp, flags, td);
 	else
 		error = ffs_flushfiles(mp, flags, td);
 	if (error != 0 && error != ENXIO)
 		goto fail;
 
 	UFS_LOCK(ump);
 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 		printf("WARNING: unmount %s: pending error: blocks %jd "
 		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 		    fs->fs_pendinginodes);
 		fs->fs_pendingblocks = 0;
 		fs->fs_pendinginodes = 0;
 	}
 	UFS_UNLOCK(ump);
 	if (MOUNTEDSOFTDEP(mp))
 		softdep_unmount(mp);
 	if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) {
 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
 		error = ffs_sbupdate(ump, MNT_WAIT, 0);
 		if (error && error != ENXIO) {
 			fs->fs_clean = 0;
 			goto fail;
 		}
 	}
 	if (susp)
 		vfs_write_resume(mp, VR_START_WRITE);
 	if (ump->um_trim_tq != NULL) {
 		while (ump->um_trim_inflight != 0)
 			pause("ufsutr", hz);
 		taskqueue_drain_all(ump->um_trim_tq);
 		taskqueue_free(ump->um_trim_tq);
 		free (ump->um_trimhash, M_TRIM);
 	}
 	g_topology_lock();
 	if (ump->um_fsckpid > 0) {
 		/*
 		 * Return to normal read-only mode.
 		 */
 		error = g_access(ump->um_cp, 0, -1, 0);
 		ump->um_fsckpid = 0;
 	}
 	g_vfs_close(ump->um_cp);
 	g_topology_unlock();
 	atomic_store_rel_ptr((uintptr_t *)&ump->um_dev->si_mountpt, 0);
 	vrele(ump->um_devvp);
 	dev_rel(ump->um_dev);
 	mtx_destroy(UFS_MTX(ump));
 	if (mp->mnt_gjprovider != NULL) {
 		free(mp->mnt_gjprovider, M_UFSMNT);
 		mp->mnt_gjprovider = NULL;
 	}
 	free(fs->fs_csp, M_UFSMNT);
 	free(fs, M_UFSMNT);
 	free(ump, M_UFSMNT);
 	mp->mnt_data = NULL;
 	MNT_ILOCK(mp);
 	mp->mnt_flag &= ~MNT_LOCAL;
 	MNT_IUNLOCK(mp);
 	if (td->td_su == mp) {
 		td->td_su = NULL;
 		vfs_rel(mp);
 	}
 	return (error);
 
 fail:
 	if (susp)
 		vfs_write_resume(mp, VR_START_WRITE);
 fail1:
 #ifdef UFS_EXTATTR
 	if (e_restart) {
 		ufs_extattr_uepm_init(&ump->um_extattr);
 #ifdef UFS_EXTATTR_AUTOSTART
 		(void) ufs_extattr_autostart(mp, td);
 #endif
 	}
 #endif
 
 	return (error);
 }
 
 /*
  * Flush out all the files in a filesystem.
  */
 int
 ffs_flushfiles(mp, flags, td)
 	struct mount *mp;
 	int flags;
 	struct thread *td;
 {
 	struct ufsmount *ump;
 	int qerror, error;
 
 	ump = VFSTOUFS(mp);
 	qerror = 0;
 #ifdef QUOTA
 	if (mp->mnt_flag & MNT_QUOTA) {
 		int i;
 		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
 		if (error)
 			return (error);
 		for (i = 0; i < MAXQUOTAS; i++) {
 			error = quotaoff(td, mp, i);
 			if (error != 0) {
 				if ((flags & EARLYFLUSH) == 0)
 					return (error);
 				else
 					qerror = error;
 			}
 		}
 
 		/*
 		 * Here we fall through to vflush again to ensure that
 		 * we have gotten rid of all the system vnodes, unless
 		 * quotas must not be closed.
 		 */
 	}
 #endif
 	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
 	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
 		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
 			return (error);
 		ffs_snapshot_unmount(mp);
 		flags |= FORCECLOSE;
 		/*
 		 * Here we fall through to vflush again to ensure
 		 * that we have gotten rid of all the system vnodes.
 		 */
 	}
 
 	/*
 	 * Do not close system files if quotas were not closed, to be
 	 * able to sync the remaining dquots.  The freeblks softupdate
 	 * workitems might hold a reference on a dquot, preventing
 	 * quotaoff() from completing.  Next round of
 	 * softdep_flushworklist() iteration should process the
 	 * blockers, allowing the next run of quotaoff() to finally
 	 * flush held dquots.
 	 *
 	 * Otherwise, flush all the files.
 	 */
 	if (qerror == 0 && (error = vflush(mp, 0, flags, td)) != 0)
 		return (error);
 
 	/*
 	 * Flush filesystem metadata.
 	 */
 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
 	error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
 	VOP_UNLOCK(ump->um_devvp);
 	return (error);
 }
 
 /*
  * Get filesystem statistics.
  */
 static int
 ffs_statfs(mp, sbp)
 	struct mount *mp;
 	struct statfs *sbp;
 {
 	struct ufsmount *ump;
 	struct fs *fs;
 
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
 		panic("ffs_statfs");
 	sbp->f_version = STATFS_VERSION;
 	sbp->f_bsize = fs->fs_fsize;
 	sbp->f_iosize = fs->fs_bsize;
 	sbp->f_blocks = fs->fs_dsize;
 	UFS_LOCK(ump);
 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
 	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
 	    dbtofsb(fs, fs->fs_pendingblocks);
 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - UFS_ROOTINO;
 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
 	UFS_UNLOCK(ump);
 	sbp->f_namemax = UFS_MAXNAMLEN;
 	return (0);
 }
 
 static bool
 sync_doupdate(struct inode *ip)
 {
 
 	return ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED |
 	    IN_UPDATE)) != 0);
 }
 
 static int
 ffs_sync_lazy_filter(struct vnode *vp, void *arg __unused)
 {
 	struct inode *ip;
 
 	/*
 	 * Flags are safe to access because ->v_data invalidation
 	 * is held off by listmtx.
 	 */
 	if (vp->v_type == VNON)
 		return (false);
 	ip = VTOI(vp);
 	if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0)
 		return (false);
 	return (true);
 }
 
 /*
  * For a lazy sync, we only care about access times, quotas and the
  * superblock.  Other filesystem changes are already converted to
  * cylinder group blocks or inode blocks updates and are written to
  * disk by syncer.
  */
 static int
 ffs_sync_lazy(mp)
      struct mount *mp;
 {
 	struct vnode *mvp, *vp;
 	struct inode *ip;
 	struct thread *td;
 	int allerror, error;
 
 	allerror = 0;
 	td = curthread;
 	if ((mp->mnt_flag & MNT_NOATIME) != 0) {
 #ifdef QUOTA
 		qsync(mp);
 #endif
 		goto sbupdate;
 	}
 	MNT_VNODE_FOREACH_LAZY(vp, mp, mvp, ffs_sync_lazy_filter, NULL) {
 		if (vp->v_type == VNON) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		ip = VTOI(vp);
 
 		/*
 		 * The IN_ACCESS flag is converted to IN_MODIFIED by
 		 * ufs_close() and ufs_getattr() by the calls to
 		 * ufs_itimes_locked(), without subsequent UFS_UPDATE().
 		 * Test also all the other timestamp flags too, to pick up
 		 * any other cases that could be missed.
 		 */
 		if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		if ((error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
 		    td)) != 0)
 			continue;
 #ifdef QUOTA
 		qsyncvp(vp);
 #endif
 		if (sync_doupdate(ip))
 			error = ffs_update(vp, 0);
 		if (error != 0)
 			allerror = error;
 		vput(vp);
 	}
 sbupdate:
 	if (VFSTOUFS(mp)->um_fs->fs_fmod != 0 &&
 	    (error = ffs_sbupdate(VFSTOUFS(mp), MNT_LAZY, 0)) != 0)
 		allerror = error;
 	return (allerror);
 }
 
 /*
  * Go through the disk queues to initiate sandbagged IO;
  * go through the inodes to write those that have been modified;
  * initiate the writing of the super block if it has been modified.
  *
  * Note: we are always called with the filesystem marked busy using
  * vfs_busy().
  */
 static int
 ffs_sync(mp, waitfor)
 	struct mount *mp;
 	int waitfor;
 {
 	struct vnode *mvp, *vp, *devvp;
 	struct thread *td;
 	struct inode *ip;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct fs *fs;
 	int error, count, lockreq, allerror = 0;
 	int suspend;
 	int suspended;
 	int secondary_writes;
 	int secondary_accwrites;
 	int softdep_deps;
 	int softdep_accdeps;
 	struct bufobj *bo;
 
 	suspend = 0;
 	suspended = 0;
 	td = curthread;
 	fs = ump->um_fs;
 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0)
 		panic("%s: ffs_sync: modification on read-only filesystem",
 		    fs->fs_fsmnt);
 	if (waitfor == MNT_LAZY) {
 		if (!rebooting)
 			return (ffs_sync_lazy(mp));
 		waitfor = MNT_NOWAIT;
 	}
 
 	/*
 	 * Write back each (modified) inode.
 	 */
 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
 	if (waitfor == MNT_SUSPEND) {
 		suspend = 1;
 		waitfor = MNT_WAIT;
 	}
 	if (waitfor == MNT_WAIT)
 		lockreq = LK_EXCLUSIVE;
 	lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
 loop:
 	/* Grab snapshot of secondary write counts */
 	MNT_ILOCK(mp);
 	secondary_writes = mp->mnt_secondary_writes;
 	secondary_accwrites = mp->mnt_secondary_accwrites;
 	MNT_IUNLOCK(mp);
 
 	/* Grab snapshot of softdep dependency counts */
 	softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
 
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		/*
 		 * Depend on the vnode interlock to keep things stable enough
 		 * for a quick test.  Since there might be hundreds of
 		 * thousands of vnodes, we cannot afford even a subroutine
 		 * call unless there's a good chance that we have work to do.
 		 */
 		if (vp->v_type == VNON) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		ip = VTOI(vp);
 		if ((ip->i_flag &
 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
 		    vp->v_bufobj.bo_dirty.bv_cnt == 0) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		if ((error = vget(vp, lockreq, td)) != 0) {
 			if (error == ENOENT || error == ENOLCK) {
 				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 				goto loop;
 			}
 			continue;
 		}
 #ifdef QUOTA
 		qsyncvp(vp);
 #endif
 		if ((error = ffs_syncvnode(vp, waitfor, 0)) != 0)
 			allerror = error;
 		vput(vp);
 	}
 	/*
 	 * Force stale filesystem control information to be flushed.
 	 */
 	if (waitfor == MNT_WAIT || rebooting) {
 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
 			allerror = error;
 		/* Flushed work items may create new vnodes to clean */
 		if (allerror == 0 && count)
 			goto loop;
 	}
 
 	devvp = ump->um_devvp;
 	bo = &devvp->v_bufobj;
 	BO_LOCK(bo);
 	if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) {
 		BO_UNLOCK(bo);
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 		error = VOP_FSYNC(devvp, waitfor, td);
 		VOP_UNLOCK(devvp);
 		if (MOUNTEDSOFTDEP(mp) && (error == 0 || error == EAGAIN))
 			error = ffs_sbupdate(ump, waitfor, 0);
 		if (error != 0)
 			allerror = error;
 		if (allerror == 0 && waitfor == MNT_WAIT)
 			goto loop;
 	} else if (suspend != 0) {
 		if (softdep_check_suspend(mp,
 					  devvp,
 					  softdep_deps,
 					  softdep_accdeps,
 					  secondary_writes,
 					  secondary_accwrites) != 0) {
 			MNT_IUNLOCK(mp);
 			goto loop;	/* More work needed */
 		}
 		mtx_assert(MNT_MTX(mp), MA_OWNED);
 		mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
 		MNT_IUNLOCK(mp);
 		suspended = 1;
 	} else
 		BO_UNLOCK(bo);
 	/*
 	 * Write back modified superblock.
 	 */
 	if (fs->fs_fmod != 0 &&
 	    (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
 		allerror = error;
 	return (allerror);
 }
 
 int
 ffs_vget(mp, ino, flags, vpp)
 	struct mount *mp;
 	ino_t ino;
 	int flags;
 	struct vnode **vpp;
 {
 	return (ffs_vgetf(mp, ino, flags, vpp, 0));
 }
 
 int
 ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
 	struct mount *mp;
 	ino_t ino;
 	int flags;
 	struct vnode **vpp;
 	int ffs_flags;
 {
 	struct fs *fs;
 	struct inode *ip;
 	struct ufsmount *ump;
 	struct buf *bp;
 	struct vnode *vp;
 	int error;
 
 	MPASS((ffs_flags & FFSV_REPLACE) == 0 || (flags & LK_EXCLUSIVE) != 0);
 
 	error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	if (*vpp != NULL) {
 		if ((ffs_flags & FFSV_REPLACE) == 0)
 			return (0);
 		vgone(*vpp);
 		vput(*vpp);
 	}
 
 	/*
 	 * We must promote to an exclusive lock for vnode creation.  This
 	 * can happen if lookup is passed LOCKSHARED.
 	 */
 	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
 		flags &= ~LK_TYPE_MASK;
 		flags |= LK_EXCLUSIVE;
 	}
 
 	/*
 	 * We do not lock vnode creation as it is believed to be too
 	 * expensive for such rare case as simultaneous creation of vnode
 	 * for same ino by different processes. We just allow them to race
 	 * and check later to decide who wins. Let the race begin!
 	 */
 
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
 
 	/* Allocate a new vnode/inode. */
 	error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ?
 	    &ffs_vnodeops1 : &ffs_vnodeops2, &vp);
 	if (error) {
 		*vpp = NULL;
 		uma_zfree(uma_inode, ip);
 		return (error);
 	}
 	/*
 	 * FFS supports recursive locking.
 	 */
 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
 	VN_LOCK_AREC(vp);
 	vp->v_data = ip;
 	vp->v_bufobj.bo_bsize = fs->fs_bsize;
 	ip->i_vnode = vp;
 	ip->i_ump = ump;
 	ip->i_number = ino;
 	ip->i_ea_refs = 0;
 	ip->i_nextclustercg = -1;
 	ip->i_flag = fs->fs_magic == FS_UFS1_MAGIC ? 0 : IN_UFS2;
 	ip->i_mode = 0; /* ensure error cases below throw away vnode */
 #ifdef QUOTA
 	{
 		int i;
 		for (i = 0; i < MAXQUOTAS; i++)
 			ip->i_dquot[i] = NODQUOT;
 	}
 #endif
 
 	if (ffs_flags & FFSV_FORCEINSMQ)
 		vp->v_vflag |= VV_FORCEINSMQ;
 	error = insmntque(vp, mp);
 	if (error != 0) {
 		uma_zfree(uma_inode, ip);
 		*vpp = NULL;
 		return (error);
 	}
 	vp->v_vflag &= ~VV_FORCEINSMQ;
 	error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	if (*vpp != NULL) {
 		/*
 		 * Calls from ffs_valloc() (i.e. FFSV_REPLACE set)
 		 * operate on empty inode, which must not be found by
 		 * other threads until fully filled.  Vnode for empty
 		 * inode must be not re-inserted on the hash by other
 		 * thread, after removal by us at the beginning.
 		 */
 		MPASS((ffs_flags & FFSV_REPLACE) == 0);
 		return (0);
 	}
 
 	/* Read in the disk contents for the inode, copy into the inode. */
 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
 	    (int)fs->fs_bsize, NOCRED, &bp);
 	if (error) {
 		/*
 		 * The inode does not contain anything useful, so it would
 		 * be misleading to leave it on its hash chain. With mode
 		 * still zero, it will be unlinked and returned to the free
 		 * list by vput().
 		 */
+		vgone(vp);
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	if (I_IS_UFS1(ip))
 		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
 	else
 		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
 	if ((error = ffs_load_inode(bp, ip, fs, ino)) != 0) {
 		bqrelse(bp);
+		vgone(vp);
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	if (DOINGSOFTDEP(vp))
 		softdep_load_inodeblock(ip);
 	else
 		ip->i_effnlink = ip->i_nlink;
 	bqrelse(bp);
 
 	/*
 	 * Initialize the vnode from the inode, check for aliases.
 	 * Note that the underlying vnode may have changed.
 	 */
 	error = ufs_vinit(mp, I_IS_UFS1(ip) ? &ffs_fifoops1 : &ffs_fifoops2,
 	    &vp);
 	if (error) {
+		vgone(vp);
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 
 	/*
 	 * Finish inode initialization.
 	 */
 	if (vp->v_type != VFIFO) {
 		/* FFS supports shared locking for all files except fifos. */
 		VN_LOCK_ASHARE(vp);
 	}
 
 	/*
 	 * Set up a generation number for this inode if it does not
 	 * already have one. This should only happen on old filesystems.
 	 */
 	if (ip->i_gen == 0) {
 		while (ip->i_gen == 0)
 			ip->i_gen = arc4random();
 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			UFS_INODE_SET_FLAG(ip, IN_MODIFIED);
 			DIP_SET(ip, i_gen, ip->i_gen);
 		}
 	}
 #ifdef MAC
 	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
 		/*
 		 * If this vnode is already allocated, and we're running
 		 * multi-label, attempt to perform a label association
 		 * from the extended attributes on the inode.
 		 */
 		error = mac_vnode_associate_extattr(mp, vp);
 		if (error) {
 			/* ufs_inactive will release ip->i_devvp ref. */
+			vgone(vp);
 			vput(vp);
 			*vpp = NULL;
 			return (error);
 		}
 	}
 #endif
 
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * File handle to vnode
  *
  * Have to be really careful about stale file handles:
  * - check that the inode number is valid
  * - for UFS2 check that the inode number is initialized
  * - call ffs_vget() to get the locked inode
  * - check for an unallocated inode (i_mode == 0)
  * - check that the given client host has export rights and return
  *   those rights via. exflagsp and credanonp
  */
 static int
 ffs_fhtovp(mp, fhp, flags, vpp)
 	struct mount *mp;
 	struct fid *fhp;
 	int flags;
 	struct vnode **vpp;
 {
 	struct ufid *ufhp;
 	struct ufsmount *ump;
 	struct fs *fs;
 	struct cg *cgp;
 	struct buf *bp;
 	ino_t ino;
 	u_int cg;
 	int error;
 
 	ufhp = (struct ufid *)fhp;
 	ino = ufhp->ufid_ino;
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	if (ino < UFS_ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg)
 		return (ESTALE);
 	/*
 	 * Need to check if inode is initialized because UFS2 does lazy
 	 * initialization and nfs_fhtovp can offer arbitrary inode numbers.
 	 */
 	if (fs->fs_magic != FS_UFS2_MAGIC)
 		return (ufs_fhtovp(mp, ufhp, flags, vpp));
 	cg = ino_to_cg(fs, ino);
 	if ((error = ffs_getcg(fs, ump->um_devvp, cg, 0, &bp, &cgp)) != 0)
 		return (error);
 	if (ino >= cg * fs->fs_ipg + cgp->cg_initediblk) {
 		brelse(bp);
 		return (ESTALE);
 	}
 	brelse(bp);
 	return (ufs_fhtovp(mp, ufhp, flags, vpp));
 }
 
 /*
  * Initialize the filesystem.
  */
 static int
 ffs_init(vfsp)
 	struct vfsconf *vfsp;
 {
 
 	ffs_susp_initialize();
 	softdep_initialize();
 	return (ufs_init(vfsp));
 }
 
 /*
  * Undo the work of ffs_init().
  */
 static int
 ffs_uninit(vfsp)
 	struct vfsconf *vfsp;
 {
 	int ret;
 
 	ret = ufs_uninit(vfsp);
 	softdep_uninitialize();
 	ffs_susp_uninitialize();
 	return (ret);
 }
 
 /*
  * Structure used to pass information from ffs_sbupdate to its
  * helper routine ffs_use_bwrite.
  */
 struct devfd {
 	struct ufsmount	*ump;
 	struct buf	*sbbp;
 	int		 waitfor;
 	int		 suspended;
 	int		 error;
 };
 
 /*
  * Write a superblock and associated information back to disk.
  */
 int
 ffs_sbupdate(ump, waitfor, suspended)
 	struct ufsmount *ump;
 	int waitfor;
 	int suspended;
 {
 	struct fs *fs;
 	struct buf *sbbp;
 	struct devfd devfd;
 
 	fs = ump->um_fs;
 	if (fs->fs_ronly == 1 &&
 	    (ump->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
 	    (MNT_RDONLY | MNT_UPDATE) && ump->um_fsckpid == 0)
 		panic("ffs_sbupdate: write read-only filesystem");
 	/*
 	 * We use the superblock's buf to serialize calls to ffs_sbupdate().
 	 */
 	sbbp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
 	    (int)fs->fs_sbsize, 0, 0, 0);
 	/*
 	 * Initialize info needed for write function.
 	 */
 	devfd.ump = ump;
 	devfd.sbbp = sbbp;
 	devfd.waitfor = waitfor;
 	devfd.suspended = suspended;
 	devfd.error = 0;
 	return (ffs_sbput(&devfd, fs, fs->fs_sblockloc, ffs_use_bwrite));
 }
 
 /*
  * Write function for use by filesystem-layer routines.
  */
 static int
 ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size)
 {
 	struct devfd *devfdp;
 	struct ufsmount *ump;
 	struct buf *bp;
 	struct fs *fs;
 	int error;
 
 	devfdp = devfd;
 	ump = devfdp->ump;
 	fs = ump->um_fs;
 	/*
 	 * Writing the superblock summary information.
 	 */
 	if (loc != fs->fs_sblockloc) {
 		bp = getblk(ump->um_devvp, btodb(loc), size, 0, 0, 0);
 		bcopy(buf, bp->b_data, (u_int)size);
 		if (devfdp->suspended)
 			bp->b_flags |= B_VALIDSUSPWRT;
 		if (devfdp->waitfor != MNT_WAIT)
 			bawrite(bp);
 		else if ((error = bwrite(bp)) != 0)
 			devfdp->error = error;
 		return (0);
 	}
 	/*
 	 * Writing the superblock itself. We need to do special checks for it.
 	 */
 	bp = devfdp->sbbp;
 	if (devfdp->error != 0) {
 		brelse(bp);
 		return (devfdp->error);
 	}
 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
 	    (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
 		printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
 		fs->fs_sblockloc = SBLOCK_UFS1;
 	}
 	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
 	    (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
 		printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
 		fs->fs_sblockloc = SBLOCK_UFS2;
 	}
 	if (MOUNTEDSOFTDEP(ump->um_mountp))
 		softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp);
 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
 	fs = (struct fs *)bp->b_data;
 	ffs_oldfscompat_write(fs, ump);
 	/*
 	 * Because we may have made changes to the superblock, we need to
 	 * recompute its check-hash.
 	 */
 	fs->fs_ckhash = ffs_calc_sbhash(fs);
 	if (devfdp->suspended)
 		bp->b_flags |= B_VALIDSUSPWRT;
 	if (devfdp->waitfor != MNT_WAIT)
 		bawrite(bp);
 	else if ((error = bwrite(bp)) != 0)
 		devfdp->error = error;
 	return (devfdp->error);
 }
 
 static int
 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
 	int attrnamespace, const char *attrname)
 {
 
 #ifdef UFS_EXTATTR
 	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
 	    attrname));
 #else
 	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
 	    attrname));
 #endif
 }
 
 static void
 ffs_ifree(struct ufsmount *ump, struct inode *ip)
 {
 
 	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
 		uma_zfree(uma_ufs1, ip->i_din1);
 	else if (ip->i_din2 != NULL)
 		uma_zfree(uma_ufs2, ip->i_din2);
 	uma_zfree(uma_inode, ip);
 }
 
 static int dobkgrdwrite = 1;
 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
     "Do background writes (honoring the BV_BKGRDWRITE flag)?");
 
 /*
  * Complete a background write started from bwrite.
  */
 static void
 ffs_backgroundwritedone(struct buf *bp)
 {
 	struct bufobj *bufobj;
 	struct buf *origbp;
 
 	/*
 	 * Find the original buffer that we are writing.
 	 */
 	bufobj = bp->b_bufobj;
 	BO_LOCK(bufobj);
 	if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
 		panic("backgroundwritedone: lost buffer");
 
 	/*
 	 * We should mark the cylinder group buffer origbp as
 	 * dirty, to not loose the failed write.
 	 */
 	if ((bp->b_ioflags & BIO_ERROR) != 0)
 		origbp->b_vflags |= BV_BKGRDERR;
 	BO_UNLOCK(bufobj);
 	/*
 	 * Process dependencies then return any unfinished ones.
 	 */
 	if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0)
 		buf_complete(bp);
 #ifdef SOFTUPDATES
 	if (!LIST_EMPTY(&bp->b_dep))
 		softdep_move_dependencies(bp, origbp);
 #endif
 	/*
 	 * This buffer is marked B_NOCACHE so when it is released
 	 * by biodone it will be tossed.
 	 */
 	bp->b_flags |= B_NOCACHE;
 	bp->b_flags &= ~B_CACHE;
 	pbrelvp(bp);
 
 	/*
 	 * Prevent brelse() from trying to keep and re-dirtying bp on
 	 * errors. It causes b_bufobj dereference in
 	 * bdirty()/reassignbuf(), and b_bufobj was cleared in
 	 * pbrelvp() above.
 	 */
 	if ((bp->b_ioflags & BIO_ERROR) != 0)
 		bp->b_flags |= B_INVAL;
 	bufdone(bp);
 	BO_LOCK(bufobj);
 	/*
 	 * Clear the BV_BKGRDINPROG flag in the original buffer
 	 * and awaken it if it is waiting for the write to complete.
 	 * If BV_BKGRDINPROG is not set in the original buffer it must
 	 * have been released and re-instantiated - which is not legal.
 	 */
 	KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
 	    ("backgroundwritedone: lost buffer2"));
 	origbp->b_vflags &= ~BV_BKGRDINPROG;
 	if (origbp->b_vflags & BV_BKGRDWAIT) {
 		origbp->b_vflags &= ~BV_BKGRDWAIT;
 		wakeup(&origbp->b_xflags);
 	}
 	BO_UNLOCK(bufobj);
 }
 
 
 /*
  * Write, release buffer on completion.  (Done by iodone
  * if async).  Do not bother writing anything if the buffer
  * is invalid.
  *
  * Note that we set B_CACHE here, indicating that buffer is
  * fully valid and thus cacheable.  This is true even of NFS
  * now so we set it generally.  This could be set either here
  * or in biodone() since the I/O is synchronous.  We put it
  * here.
  */
 static int
 ffs_bufwrite(struct buf *bp)
 {
 	struct buf *newbp;
 	struct cg *cgp;
 
 	CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return (0);
 	}
 
 	if (!BUF_ISLOCKED(bp))
 		panic("bufwrite: buffer is not busy???");
 	/*
 	 * If a background write is already in progress, delay
 	 * writing this block if it is asynchronous. Otherwise
 	 * wait for the background write to complete.
 	 */
 	BO_LOCK(bp->b_bufobj);
 	if (bp->b_vflags & BV_BKGRDINPROG) {
 		if (bp->b_flags & B_ASYNC) {
 			BO_UNLOCK(bp->b_bufobj);
 			bdwrite(bp);
 			return (0);
 		}
 		bp->b_vflags |= BV_BKGRDWAIT;
 		msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj), PRIBIO,
 		    "bwrbg", 0);
 		if (bp->b_vflags & BV_BKGRDINPROG)
 			panic("bufwrite: still writing");
 	}
 	bp->b_vflags &= ~BV_BKGRDERR;
 	BO_UNLOCK(bp->b_bufobj);
 
 	/*
 	 * If this buffer is marked for background writing and we
 	 * do not have to wait for it, make a copy and write the
 	 * copy so as to leave this buffer ready for further use.
 	 *
 	 * This optimization eats a lot of memory.  If we have a page
 	 * or buffer shortfall we can't do it.
 	 */
 	if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
 	    (bp->b_flags & B_ASYNC) &&
 	    !vm_page_count_severe() &&
 	    !buf_dirty_count_severe()) {
 		KASSERT(bp->b_iodone == NULL,
 		    ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
 
 		/* get a new block */
 		newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
 		if (newbp == NULL)
 			goto normal_write;
 
 		KASSERT(buf_mapped(bp), ("Unmapped cg"));
 		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
 		BO_LOCK(bp->b_bufobj);
 		bp->b_vflags |= BV_BKGRDINPROG;
 		BO_UNLOCK(bp->b_bufobj);
 		newbp->b_xflags |=
 		    (bp->b_xflags & BX_FSPRIV) | BX_BKGRDMARKER;
 		newbp->b_lblkno = bp->b_lblkno;
 		newbp->b_blkno = bp->b_blkno;
 		newbp->b_offset = bp->b_offset;
 		newbp->b_iodone = ffs_backgroundwritedone;
 		newbp->b_flags |= B_ASYNC;
 		newbp->b_flags &= ~B_INVAL;
 		pbgetvp(bp->b_vp, newbp);
 
 #ifdef SOFTUPDATES
 		/*
 		 * Move over the dependencies.  If there are rollbacks,
 		 * leave the parent buffer dirtied as it will need to
 		 * be written again.
 		 */
 		if (LIST_EMPTY(&bp->b_dep) ||
 		    softdep_move_dependencies(bp, newbp) == 0)
 			bundirty(bp);
 #else
 		bundirty(bp);
 #endif
 
 		/*
 		 * Initiate write on the copy, release the original.  The
 		 * BKGRDINPROG flag prevents it from going away until 
 		 * the background write completes. We have to recalculate
 		 * its check hash in case the buffer gets freed and then
 		 * reconstituted from the buffer cache during a later read.
 		 */
 		if ((bp->b_xflags & BX_CYLGRP) != 0) {
 			cgp = (struct cg *)bp->b_data;
 			cgp->cg_ckhash = 0;
 			cgp->cg_ckhash =
 			    calculate_crc32c(~0L, bp->b_data, bp->b_bcount);
 		}
 		bqrelse(bp);
 		bp = newbp;
 	} else
 		/* Mark the buffer clean */
 		bundirty(bp);
 
 
 	/* Let the normal bufwrite do the rest for us */
 normal_write:
 	/*
 	 * If we are writing a cylinder group, update its time.
 	 */
 	if ((bp->b_xflags & BX_CYLGRP) != 0) {
 		cgp = (struct cg *)bp->b_data;
 		cgp->cg_old_time = cgp->cg_time = time_second;
 	}
 	return (bufwrite(bp));
 }
 
 
 static void
 ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
 {
 	struct vnode *vp;
 	struct buf *tbp;
 	int error, nocopy;
 
 	vp = bo2vnode(bo);
 	if (bp->b_iocmd == BIO_WRITE) {
 		if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
 		    bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
 		    (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
 			panic("ffs_geom_strategy: bad I/O");
 		nocopy = bp->b_flags & B_NOCOPY;
 		bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY);
 		if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 &&
 		    vp->v_rdev->si_snapdata != NULL) {
 			if ((bp->b_flags & B_CLUSTER) != 0) {
 				runningbufwakeup(bp);
 				TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
 					      b_cluster.cluster_entry) {
 					error = ffs_copyonwrite(vp, tbp);
 					if (error != 0 &&
 					    error != EOPNOTSUPP) {
 						bp->b_error = error;
 						bp->b_ioflags |= BIO_ERROR;
 						bufdone(bp);
 						return;
 					}
 				}
 				bp->b_runningbufspace = bp->b_bufsize;
 				atomic_add_long(&runningbufspace,
 					       bp->b_runningbufspace);
 			} else {
 				error = ffs_copyonwrite(vp, bp);
 				if (error != 0 && error != EOPNOTSUPP) {
 					bp->b_error = error;
 					bp->b_ioflags |= BIO_ERROR;
 					bufdone(bp);
 					return;
 				}
 			}
 		}
 #ifdef SOFTUPDATES
 		if ((bp->b_flags & B_CLUSTER) != 0) {
 			TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
 				      b_cluster.cluster_entry) {
 				if (!LIST_EMPTY(&tbp->b_dep))
 					buf_start(tbp);
 			}
 		} else {
 			if (!LIST_EMPTY(&bp->b_dep))
 				buf_start(bp);
 		}
 
 #endif
 		/*
 		 * Check for metadata that needs check-hashes and update them.
 		 */
 		switch (bp->b_xflags & BX_FSPRIV) {
 		case BX_CYLGRP:
 			((struct cg *)bp->b_data)->cg_ckhash = 0;
 			((struct cg *)bp->b_data)->cg_ckhash =
 			    calculate_crc32c(~0L, bp->b_data, bp->b_bcount);
 			break;
 
 		case BX_SUPERBLOCK:
 		case BX_INODE:
 		case BX_INDIR:
 		case BX_DIR:
 			printf("Check-hash write is unimplemented!!!\n");
 			break;
 
 		case 0:
 			break;
 
 		default:
 			printf("multiple buffer types 0x%b\n",
 			    (u_int)(bp->b_xflags & BX_FSPRIV),
 			    PRINT_UFS_BUF_XFLAGS);
 			break;
 		}
 	}
 	g_vfs_strategy(bo, bp);
 }
 
 int
 ffs_own_mount(const struct mount *mp)
 {
 
 	if (mp->mnt_op == &ufs_vfsops)
 		return (1);
 	return (0);
 }
 
 #ifdef	DDB
 #ifdef SOFTUPDATES
 
 /* defined in ffs_softdep.c */
 extern void db_print_ffs(struct ufsmount *ump);
 
 DB_SHOW_COMMAND(ffs, db_show_ffs)
 {
 	struct mount *mp;
 	struct ufsmount *ump;
 
 	if (have_addr) {
 		ump = VFSTOUFS((struct mount *)addr);
 		db_print_ffs(ump);
 		return;
 	}
 
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
 			db_print_ffs(VFSTOUFS(mp));
 	}
 }
 
 #endif	/* SOFTUPDATES */
 #endif	/* DDB */
Index: projects/clang1000-import/sys/ufs/ufs/ufs_vnops.c
===================================================================
--- projects/clang1000-import/sys/ufs/ufs/ufs_vnops.c	(revision 357178)
+++ projects/clang1000-import/sys/ufs/ufs/ufs_vnops.c	(revision 357179)
@@ -1,2802 +1,2807 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_vnops.c	8.27 (Berkeley) 5/27/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_quota.h"
 #include "opt_suiddir.h"
 #include "opt_ufs.h"
 #include "opt_ffs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/namei.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/filio.h>
 #include <sys/stat.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/mount.h>
 #include <sys/priv.h>
 #include <sys/refcount.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/lockf.h>
 #include <sys/conf.h>
 #include <sys/acl.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <sys/file.h>		/* XXX */
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #include <ufs/ufs/acl.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
 #ifdef UFS_DIRHASH
 #include <ufs/ufs/dirhash.h>
 #endif
 #ifdef UFS_GJOURNAL
 #include <ufs/ufs/gjournal.h>
 FEATURE(ufs_gjournal, "Journaling support through GEOM for UFS");
 #endif
 
 #ifdef QUOTA
 FEATURE(ufs_quota, "UFS disk quotas support");
 FEATURE(ufs_quota64, "64bit UFS disk quotas support");
 #endif
 
 #ifdef SUIDDIR
 FEATURE(suiddir,
     "Give all new files in directory the same ownership as the directory");
 #endif
 
 
 #include <ufs/ffs/ffs_extern.h>
 
 static vop_accessx_t	ufs_accessx;
 static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *);
 static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *);
 static vop_close_t	ufs_close;
 static vop_create_t	ufs_create;
 static vop_getattr_t	ufs_getattr;
 static vop_ioctl_t	ufs_ioctl;
 static vop_link_t	ufs_link;
 static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *, const char *);
 static vop_markatime_t	ufs_markatime;
 static vop_mkdir_t	ufs_mkdir;
 static vop_mknod_t	ufs_mknod;
 static vop_open_t	ufs_open;
 static vop_pathconf_t	ufs_pathconf;
 static vop_print_t	ufs_print;
 static vop_readlink_t	ufs_readlink;
 static vop_remove_t	ufs_remove;
 static vop_rename_t	ufs_rename;
 static vop_rmdir_t	ufs_rmdir;
 static vop_setattr_t	ufs_setattr;
 static vop_strategy_t	ufs_strategy;
 static vop_symlink_t	ufs_symlink;
 static vop_whiteout_t	ufs_whiteout;
 static vop_close_t	ufsfifo_close;
 static vop_kqfilter_t	ufsfifo_kqfilter;
 
 SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
 
 /*
  * A virgin directory (no blushing please).
  */
 static struct dirtemplate mastertemplate = {
 	0, 12, DT_DIR, 1, ".",
 	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
 };
 static struct odirtemplate omastertemplate = {
 	0, 12, 1, ".",
 	0, DIRBLKSIZ - 12, 2, ".."
 };
 
 static void
 ufs_itimes_locked(struct vnode *vp)
 {
 	struct inode *ip;
 	struct timespec ts;
 
 	ASSERT_VI_LOCKED(vp, __func__);
 
 	ip = VTOI(vp);
 	if (UFS_RDONLY(ip))
 		goto out;
 	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
 		return;
 
 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp))
 		UFS_INODE_SET_FLAG(ip, IN_LAZYMOD);
 	else if (((vp->v_mount->mnt_kern_flag &
 		    (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) ||
 		    (ip->i_flag & (IN_CHANGE | IN_UPDATE)))
 		UFS_INODE_SET_FLAG(ip, IN_MODIFIED);
 	else if (ip->i_flag & IN_ACCESS)
 		UFS_INODE_SET_FLAG(ip, IN_LAZYACCESS);
 	vfs_timestamp(&ts);
 	if (ip->i_flag & IN_ACCESS) {
 		DIP_SET(ip, i_atime, ts.tv_sec);
 		DIP_SET(ip, i_atimensec, ts.tv_nsec);
 	}
 	if (ip->i_flag & IN_UPDATE) {
 		DIP_SET(ip, i_mtime, ts.tv_sec);
 		DIP_SET(ip, i_mtimensec, ts.tv_nsec);
 	}
 	if (ip->i_flag & IN_CHANGE) {
 		DIP_SET(ip, i_ctime, ts.tv_sec);
 		DIP_SET(ip, i_ctimensec, ts.tv_nsec);
 		DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1);
 	}
 
  out:
 	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
 }
 
 void
 ufs_itimes(struct vnode *vp)
 {
 
 	VI_LOCK(vp);
 	ufs_itimes_locked(vp);
 	VI_UNLOCK(vp);
 }
 
 /*
  * Create a regular file
  */
 static int
 ufs_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	int error;
 
 	error =
 	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
 	    ap->a_dvp, ap->a_vpp, ap->a_cnp, "ufs_create");
 	if (error != 0)
 		return (error);
 	if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp);
 	return (0);
 }
 
 /*
  * Mknod vnode call
  */
 /* ARGSUSED */
 static int
 ufs_mknod(ap)
 	struct vop_mknod_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct vattr *vap = ap->a_vap;
 	struct vnode **vpp = ap->a_vpp;
 	struct inode *ip;
 	ino_t ino;
 	int error;
 
 	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
 	    ap->a_dvp, vpp, ap->a_cnp, "ufs_mknod");
 	if (error)
 		return (error);
 	ip = VTOI(*vpp);
 	UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE);
 	if (vap->va_rdev != VNOVAL) {
 		/*
 		 * Want to be able to use this to make badblock
 		 * inodes, so don't truncate the dev number.
 		 */
 		DIP_SET(ip, i_rdev, vap->va_rdev);
 	}
 	/*
 	 * Remove inode, then reload it through VFS_VGET so it is
 	 * checked to see if it is an alias of an existing entry in
 	 * the inode cache.  XXX I don't believe this is necessary now.
 	 */
 	(*vpp)->v_type = VNON;
 	ino = ip->i_number;	/* Save this before vgone() invalidates ip. */
 	vgone(*vpp);
 	vput(*vpp);
 	error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp);
 	if (error) {
 		*vpp = NULL;
 		return (error);
 	}
 	return (0);
 }
 
 /*
  * Open called.
  */
 /* ARGSUSED */
 static int
 ufs_open(struct vop_open_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip;
 
 	if (vp->v_type == VCHR || vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 
 	ip = VTOI(vp);
 	/*
 	 * Files marked append-only must be opened for appending.
 	 */
 	if ((ip->i_flags & APPEND) &&
 	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
 		return (EPERM);
 	vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td);
 	return (0);
 }
 
 /*
  * Close called.
  *
  * Update the times on the inode.
  */
 /* ARGSUSED */
 static int
 ufs_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	int usecount;
 
 	VI_LOCK(vp);
 	usecount = vp->v_usecount;
 	if (usecount > 1)
 		ufs_itimes_locked(vp);
 	VI_UNLOCK(vp);
 	return (0);
 }
 
 static int
 ufs_accessx(ap)
 	struct vop_accessx_args /* {
 		struct vnode *a_vp;
 		accmode_t a_accmode;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	accmode_t accmode = ap->a_accmode;
 	int error;
 #ifdef UFS_ACL
 	struct acl *acl;
 	acl_type_t type;
 #endif
 
 	/*
 	 * Disallow write attempts on read-only filesystems;
 	 * unless the file is a socket, fifo, or a block or
 	 * character device resident on the filesystem.
 	 */
 	if (accmode & VMODIFY_PERMS) {
 		switch (vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 #ifdef QUOTA
 			/*
 			 * Inode is accounted in the quotas only if struct
 			 * dquot is attached to it. VOP_ACCESS() is called
 			 * from vn_open_cred() and provides a convenient
 			 * point to call getinoquota().  The lock mode is
 			 * exclusive when the file is opening for write.
 			 */
 			if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) {
 				error = getinoquota(ip);
 				if (error != 0)
 					return (error);
 			}
 #endif
 			break;
 		default:
 			break;
 		}
 	}
 
 	/*
 	 * If immutable bit set, nobody gets to write it.  "& ~VADMIN_PERMS"
 	 * permits the owner of the file to remove the IMMUTABLE flag.
 	 */
 	if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) &&
 	    (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT)))
 		return (EPERM);
 
 #ifdef UFS_ACL
 	if ((vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) != 0) {
 		if (vp->v_mount->mnt_flag & MNT_NFS4ACLS)
 			type = ACL_TYPE_NFS4;
 		else
 			type = ACL_TYPE_ACCESS;
 
 		acl = acl_alloc(M_WAITOK);
 		if (type == ACL_TYPE_NFS4)
 			error = ufs_getacl_nfs4_internal(vp, acl, ap->a_td);
 		else
 			error = VOP_GETACL(vp, type, acl, ap->a_cred, ap->a_td);
 		switch (error) {
 		case 0:
 			if (type == ACL_TYPE_NFS4) {
 				error = vaccess_acl_nfs4(vp->v_type, ip->i_uid,
 				    ip->i_gid, acl, accmode, ap->a_cred, NULL);
 			} else {
 				error = vfs_unixify_accmode(&accmode);
 				if (error == 0)
 					error = vaccess_acl_posix1e(vp->v_type, ip->i_uid,
 					    ip->i_gid, acl, accmode, ap->a_cred, NULL);
 			}
 			break;
 		default:
 			if (error != EOPNOTSUPP)
 				printf(
 "ufs_accessx(): Error retrieving ACL on object (%d).\n",
 				    error);
 			/*
 			 * XXX: Fall back until debugged.  Should
 			 * eventually possibly log an error, and return
 			 * EPERM for safety.
 			 */
 			error = vfs_unixify_accmode(&accmode);
 			if (error == 0)
 				error = vaccess(vp->v_type, ip->i_mode, ip->i_uid,
 				    ip->i_gid, accmode, ap->a_cred, NULL);
 		}
 		acl_free(acl);
 
 		return (error);
 	}
 #endif /* !UFS_ACL */
 	error = vfs_unixify_accmode(&accmode);
 	if (error == 0)
 		error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid,
 		    accmode, ap->a_cred, NULL);
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 ufs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct vattr *vap = ap->a_vap;
 
 	VI_LOCK(vp);
 	ufs_itimes_locked(vp);
 	if (I_IS_UFS1(ip)) {
 		vap->va_atime.tv_sec = ip->i_din1->di_atime;
 		vap->va_atime.tv_nsec = ip->i_din1->di_atimensec;
 	} else {
 		vap->va_atime.tv_sec = ip->i_din2->di_atime;
 		vap->va_atime.tv_nsec = ip->i_din2->di_atimensec;
 	}
 	VI_UNLOCK(vp);
 	/*
 	 * Copy from inode table
 	 */
 	vap->va_fsid = dev2udev(ITOUMP(ip)->um_dev);
 	vap->va_fileid = ip->i_number;
 	vap->va_mode = ip->i_mode & ~IFMT;
 	vap->va_nlink = ip->i_effnlink;
 	vap->va_uid = ip->i_uid;
 	vap->va_gid = ip->i_gid;
 	if (I_IS_UFS1(ip)) {
 		vap->va_rdev = ip->i_din1->di_rdev;
 		vap->va_size = ip->i_din1->di_size;
 		vap->va_mtime.tv_sec = ip->i_din1->di_mtime;
 		vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec;
 		vap->va_ctime.tv_sec = ip->i_din1->di_ctime;
 		vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec;
 		vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks);
 		vap->va_filerev = ip->i_din1->di_modrev;
 	} else {
 		vap->va_rdev = ip->i_din2->di_rdev;
 		vap->va_size = ip->i_din2->di_size;
 		vap->va_mtime.tv_sec = ip->i_din2->di_mtime;
 		vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec;
 		vap->va_ctime.tv_sec = ip->i_din2->di_ctime;
 		vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec;
 		vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime;
 		vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec;
 		vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks);
 		vap->va_filerev = ip->i_din2->di_modrev;
 	}
 	vap->va_flags = ip->i_flags;
 	vap->va_gen = ip->i_gen;
 	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
 	vap->va_type = IFTOVT(ip->i_mode);
 	return (0);
 }
 
 /*
  * Set attribute vnode op. called from several syscalls
  */
 static int
 ufs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vattr *vap = ap->a_vap;
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
 	int error;
 
 	/*
 	 * Check for unsettable attributes.
 	 */
 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
 	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
 		return (EINVAL);
 	}
 	if (vap->va_flags != VNOVAL) {
 		if ((vap->va_flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE |
 		    SF_NOUNLINK | SF_SNAPSHOT | UF_APPEND | UF_ARCHIVE |
 		    UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK |
 		    UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE |
 		    UF_SPARSE | UF_SYSTEM)) != 0)
 			return (EOPNOTSUPP);
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		/*
 		 * Callers may only modify the file flags on objects they
 		 * have VADMIN rights for.
 		 */
 		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
 			return (error);
 		/*
 		 * Unprivileged processes are not permitted to unset system
 		 * flags, or modify flags if any system flags are set.
 		 * Privileged non-jail processes may not modify system flags
 		 * if securelevel > 0 and any existing system flags are set.
 		 * Privileged jail processes behave like privileged non-jail
 		 * processes if the PR_ALLOW_CHFLAGS permission bit is set;
 		 * otherwise, they behave like unprivileged processes.
 		 */
 		if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) {
 			if (ip->i_flags &
 			    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
 				error = securelevel_gt(cred, 0);
 				if (error)
 					return (error);
 			}
 			/* The snapshot flag cannot be toggled. */
 			if ((vap->va_flags ^ ip->i_flags) & SF_SNAPSHOT)
 				return (EPERM);
 		} else {
 			if (ip->i_flags &
 			    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
 			    ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE))
 				return (EPERM);
 		}
 		ip->i_flags = vap->va_flags;
 		DIP_SET(ip, i_flags, vap->va_flags);
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 		error = UFS_UPDATE(vp, 0);
 		if (ip->i_flags & (IMMUTABLE | APPEND))
 			return (error);
 	}
 	/*
 	 * If immutable or append, no one can change any of its attributes
 	 * except the ones already handled (in some cases, file flags
 	 * including the immutability flags themselves for the superuser).
 	 */
 	if (ip->i_flags & (IMMUTABLE | APPEND))
 		return (EPERM);
 	/*
 	 * Go through the fields and update iff not VNOVAL.
 	 */
 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred,
 		    td)) != 0)
 			return (error);
 	}
 	if (vap->va_size != VNOVAL) {
 		/*
 		 * XXX most of the following special cases should be in
 		 * callers instead of in N filesystems.  The VDIR check
 		 * mostly already is.
 		 */
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
 		case VLNK:
 		case VREG:
 			/*
 			 * Truncation should have an effect in these cases.
 			 * Disallow it if the filesystem is read-only or
 			 * the file is being snapshotted.
 			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			if ((ip->i_flags & SF_SNAPSHOT) != 0)
 				return (EPERM);
 			break;
 		default:
 			/*
 			 * According to POSIX, the result is unspecified
 			 * for file types other than regular files,
 			 * directories and shared memory objects.  We
 			 * don't support shared memory objects in the file
 			 * system, and have dubious support for truncating
 			 * symlinks.  Just ignore the request in other cases.
 			 */
 			return (0);
 		}
 		if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL |
 		    ((vap->va_vaflags & VA_SYNC) != 0 ? IO_SYNC : 0),
 		    cred)) != 0)
 			return (error);
 	}
 	if (vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL ||
 	    vap->va_birthtime.tv_sec != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if ((ip->i_flags & SF_SNAPSHOT) != 0)
 			return (EPERM);
 		error = vn_utimes_perm(vp, vap, cred, td);
 		if (error != 0)
 			return (error);
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_MODIFIED);
 		if (vap->va_atime.tv_sec != VNOVAL) {
 			ip->i_flag &= ~IN_ACCESS;
 			DIP_SET(ip, i_atime, vap->va_atime.tv_sec);
 			DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec);
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
 			ip->i_flag &= ~IN_UPDATE;
 			DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec);
 			DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec);
 		}
 		if (vap->va_birthtime.tv_sec != VNOVAL && I_IS_UFS2(ip)) {
 			ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec;
 			ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec;
 		}
 		error = UFS_UPDATE(vp, 0);
 		if (error)
 			return (error);
 	}
 	error = 0;
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode &
 		   (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH)))
 			return (EPERM);
 		error = ufs_chmod(vp, (int)vap->va_mode, cred, td);
 	}
 	return (error);
 }
 
 #ifdef UFS_ACL
 static int
 ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode,
     int file_owner_id, struct ucred *cred, struct thread *td)
 {
 	int error;
 	struct acl *aclp;
 
 	aclp = acl_alloc(M_WAITOK);
 	error = ufs_getacl_nfs4_internal(vp, aclp, td);
 	/*
 	 * We don't have to handle EOPNOTSUPP here, as the filesystem claims
 	 * it supports ACLs.
 	 */
 	if (error)
 		goto out;
 
 	acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id);
 	error = ufs_setacl_nfs4_internal(vp, aclp, td);
 
 out:
 	acl_free(aclp);
 	return (error);
 }
 #endif /* UFS_ACL */
 
 /*
  * Mark this file's access time for update for vfs_mark_atime().  This
  * is called from execve() and mmap().
  */
 static int
 ufs_markatime(ap)
 	struct vop_markatime_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct inode *ip = VTOI(ap->a_vp);
 
 	UFS_INODE_SET_FLAG_SHARED(ip, IN_ACCESS);
 	/*
 	 * XXXKIB No UFS_UPDATE(ap->a_vp, 0) there.
 	 */
 	return (0);
 }
 
 /*
  * Change the mode on a file.
  * Inode must be locked before calling.
  */
 static int
 ufs_chmod(vp, mode, cred, td)
 	struct vnode *vp;
 	int mode;
 	struct ucred *cred;
 	struct thread *td;
 {
 	struct inode *ip = VTOI(vp);
 	int error;
 
 	/*
 	 * To modify the permissions on a file, must possess VADMIN
 	 * for that file.
 	 */
 	if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred, td)))
 		return (error);
 	/*
 	 * Privileged processes may set the sticky bit on non-directories,
 	 * as well as set the setgid bit on a file with a group that the
 	 * process is not a member of.  Both of these are allowed in
 	 * jail(8).
 	 */
 	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
 		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE))
 			return (EFTYPE);
 	}
 	if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) {
 		error = priv_check_cred(cred, PRIV_VFS_SETGID);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * Deny setting setuid if we are not the file owner.
 	 */
 	if ((mode & ISUID) && ip->i_uid != cred->cr_uid) {
 		error = priv_check_cred(cred, PRIV_VFS_ADMIN);
 		if (error)
 			return (error);
 	}
 
 	ip->i_mode &= ~ALLPERMS;
 	ip->i_mode |= (mode & ALLPERMS);
 	DIP_SET(ip, i_mode, ip->i_mode);
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 #ifdef UFS_ACL
 	if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0)
 		error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td);
 #endif
 	if (error == 0 && (ip->i_flag & IN_CHANGE) != 0)
 		error = UFS_UPDATE(vp, 0);
 
 	return (error);
 }
 
 /*
  * Perform chown operation on inode ip;
  * inode must be locked prior to call.
  */
 static int
 ufs_chown(vp, uid, gid, cred, td)
 	struct vnode *vp;
 	uid_t uid;
 	gid_t gid;
 	struct ucred *cred;
 	struct thread *td;
 {
 	struct inode *ip = VTOI(vp);
 	uid_t ouid;
 	gid_t ogid;
 	int error = 0;
 #ifdef QUOTA
 	int i;
 	ufs2_daddr_t change;
 #endif
 
 	if (uid == (uid_t)VNOVAL)
 		uid = ip->i_uid;
 	if (gid == (gid_t)VNOVAL)
 		gid = ip->i_gid;
 	/*
 	 * To modify the ownership of a file, must possess VADMIN for that
 	 * file.
 	 */
 	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
 		return (error);
 	/*
 	 * To change the owner of a file, or change the group of a file to a
 	 * group of which we are not a member, the caller must have
 	 * privilege.
 	 */
 	if (((uid != ip->i_uid && uid != cred->cr_uid) || 
 	    (gid != ip->i_gid && !groupmember(gid, cred))) &&
 	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN)))
 		return (error);
 	ogid = ip->i_gid;
 	ouid = ip->i_uid;
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) != 0)
 		return (error);
 	if (ouid == uid) {
 		dqrele(vp, ip->i_dquot[USRQUOTA]);
 		ip->i_dquot[USRQUOTA] = NODQUOT;
 	}
 	if (ogid == gid) {
 		dqrele(vp, ip->i_dquot[GRPQUOTA]);
 		ip->i_dquot[GRPQUOTA] = NODQUOT;
 	}
 	change = DIP(ip, i_blocks);
 	(void) chkdq(ip, -change, cred, CHOWN|FORCE);
 	(void) chkiq(ip, -1, cred, CHOWN|FORCE);
 	for (i = 0; i < MAXQUOTAS; i++) {
 		dqrele(vp, ip->i_dquot[i]);
 		ip->i_dquot[i] = NODQUOT;
 	}
 #endif
 	ip->i_gid = gid;
 	DIP_SET(ip, i_gid, gid);
 	ip->i_uid = uid;
 	DIP_SET(ip, i_uid, uid);
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) == 0) {
 		if (ouid == uid) {
 			dqrele(vp, ip->i_dquot[USRQUOTA]);
 			ip->i_dquot[USRQUOTA] = NODQUOT;
 		}
 		if (ogid == gid) {
 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
 			ip->i_dquot[GRPQUOTA] = NODQUOT;
 		}
 		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
 			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
 				goto good;
 			else
 				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
 		}
 		for (i = 0; i < MAXQUOTAS; i++) {
 			dqrele(vp, ip->i_dquot[i]);
 			ip->i_dquot[i] = NODQUOT;
 		}
 	}
 	ip->i_gid = ogid;
 	DIP_SET(ip, i_gid, ogid);
 	ip->i_uid = ouid;
 	DIP_SET(ip, i_uid, ouid);
 	if (getinoquota(ip) == 0) {
 		if (ouid == uid) {
 			dqrele(vp, ip->i_dquot[USRQUOTA]);
 			ip->i_dquot[USRQUOTA] = NODQUOT;
 		}
 		if (ogid == gid) {
 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
 			ip->i_dquot[GRPQUOTA] = NODQUOT;
 		}
 		(void) chkdq(ip, change, cred, FORCE|CHOWN);
 		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
 		(void) getinoquota(ip);
 	}
 	return (error);
 good:
 	if (getinoquota(ip))
 		panic("ufs_chown: lost quota");
 #endif /* QUOTA */
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 	if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) {
 			ip->i_mode &= ~(ISUID | ISGID);
 			DIP_SET(ip, i_mode, ip->i_mode);
 		}
 	}
 	error = UFS_UPDATE(vp, 0);
 	return (error);
 }
 
 static int
 ufs_remove(ap)
 	struct vop_remove_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct inode *ip;
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	int error;
 	struct thread *td;
 
 	td = curthread;
 	ip = VTOI(vp);
 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(dvp)->i_flags & APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 #ifdef UFS_GJOURNAL
 	ufs_gjournal_orphan(vp);
 #endif
 	error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
 	if (ip->i_nlink <= 0)
 		vp->v_vflag |= VV_NOSYNC;
 	if ((ip->i_flags & SF_SNAPSHOT) != 0) {
 		/*
 		 * Avoid deadlock where another thread is trying to
 		 * update the inodeblock for dvp and is waiting on
 		 * snaplk.  Temporary unlock the vnode lock for the
 		 * unlinked file and sync the directory.  This should
 		 * allow vput() of the directory to not block later on
 		 * while holding the snapshot vnode locked, assuming
 		 * that the directory hasn't been unlinked too.
 		 */
 		VOP_UNLOCK(vp);
 		(void) VOP_FSYNC(dvp, MNT_WAIT, td);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	}
 out:
 	return (error);
 }
 
 static void
 print_bad_link_count(const char *funcname, struct vnode *dvp)
 {
 	struct inode *dip;
 
 	dip = VTOI(dvp);
 	uprintf("%s: Bad link count %d on parent inode %jd in file system %s\n",
 	    funcname, dip->i_effnlink, (intmax_t)dip->i_number,
 	    dvp->v_mount->mnt_stat.f_mntonname);
 }
 
 /*
  * link vnode call
  */
 static int
 ufs_link(ap)
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip;
 	struct direct newdir;
 	int error;
 
 #ifdef INVARIANTS
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("ufs_link: no name");
 #endif
 	if (VTOI(tdvp)->i_effnlink < 2) {
 		print_bad_link_count("ufs_link", tdvp);
 		error = EINVAL;
 		goto out;
 	}
 	ip = VTOI(vp);
 	if (ip->i_nlink >= UFS_LINK_MAX) {
 		error = EMLINK;
 		goto out;
 	}
 	/*
 	 * The file may have been removed after namei droped the original
 	 * lock.
 	 */
 	if (ip->i_effnlink == 0) {
 		error = ENOENT;
 		goto out;
 	}
 	if (ip->i_flags & (IMMUTABLE | APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 	ip->i_effnlink++;
 	ip->i_nlink++;
 	DIP_SET(ip, i_nlink, ip->i_nlink);
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 	if (DOINGSOFTDEP(vp))
 		softdep_setup_link(VTOI(tdvp), ip);
 	error = UFS_UPDATE(vp, !DOINGSOFTDEP(vp) && !DOINGASYNC(vp));
 	if (!error) {
 		ufs_makedirentry(ip, cnp, &newdir);
 		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0);
 	}
 
 	if (error) {
 		ip->i_effnlink--;
 		ip->i_nlink--;
 		DIP_SET(ip, i_nlink, ip->i_nlink);
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 		if (DOINGSOFTDEP(vp))
 			softdep_revert_link(VTOI(tdvp), ip);
 	}
 out:
 	return (error);
 }
 
 /*
  * whiteout vnode call
  */
 static int
 ufs_whiteout(ap)
 	struct vop_whiteout_args /* {
 		struct vnode *a_dvp;
 		struct componentname *a_cnp;
 		int a_flags;
 	} */ *ap;
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct direct newdir;
 	int error = 0;
 
 	switch (ap->a_flags) {
 	case LOOKUP:
 		/* 4.4 format directories support whiteout operations */
 		if (dvp->v_mount->mnt_maxsymlinklen > 0)
 			return (0);
 		return (EOPNOTSUPP);
 
 	case CREATE:
 		/* create a new directory whiteout */
 #ifdef INVARIANTS
 		if ((cnp->cn_flags & SAVENAME) == 0)
 			panic("ufs_whiteout: missing name");
 		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
 			panic("ufs_whiteout: old format filesystem");
 #endif
 
 		newdir.d_ino = UFS_WINO;
 		newdir.d_namlen = cnp->cn_namelen;
 		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
 		newdir.d_type = DT_WHT;
 		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0);
 		break;
 
 	case DELETE:
 		/* remove an existing directory whiteout */
 #ifdef INVARIANTS
 		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
 			panic("ufs_whiteout: old format filesystem");
 #endif
 
 		cnp->cn_flags &= ~DOWHITEOUT;
 		error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
 		break;
 	default:
 		panic("ufs_whiteout: unknown op");
 	}
 	return (error);
 }
 
 static volatile int rename_restarts;
 SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD,
     __DEVOLATILE(int *, &rename_restarts), 0,
     "Times rename had to restart due to lock contention");
 
 /*
  * Rename system call.
  * 	rename("foo", "bar");
  * is essentially
  *	unlink("bar");
  *	link("foo", "bar");
  *	unlink("foo");
  * but ``atomically''.  Can't do full commit without saving state in the
  * inode on disk which isn't feasible at this time.  Best we can do is
  * always guarantee the target exists.
  *
  * Basic algorithm is:
  *
  * 1) Bump link count on source while we're linking it to the
  *    target.  This also ensure the inode won't be deleted out
  *    from underneath us while we work (it may be truncated by
  *    a concurrent `trunc' or `open' for creation).
  * 2) Link source to destination.  If destination already exists,
  *    delete it first.
  * 3) Unlink source reference to inode if still around. If a
  *    directory was moved and the parent of the destination
  *    is different from the source, patch the ".." entry in the
  *    directory.
  */
 static int
 ufs_rename(ap)
 	struct vop_rename_args  /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap;
 {
 	struct vnode *tvp = ap->a_tvp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct vnode *nvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct thread *td = fcnp->cn_thread;
 	struct inode *fip, *tip, *tdp, *fdp;
 	struct direct newdir;
 	off_t endoff;
 	int doingdirectory, newparent;
 	int error = 0;
 	struct mount *mp;
 	ino_t ino;
 
 #ifdef INVARIANTS
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
 	    (fcnp->cn_flags & HASBUF) == 0)
 		panic("ufs_rename: no name");
 #endif
 	endoff = 0;
 	mp = tdvp->v_mount;
 	VOP_UNLOCK(tdvp);
 	if (tvp && tvp != tdvp)
 		VOP_UNLOCK(tvp);
 	/*
 	 * Check for cross-device rename.
 	 */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 		mp = NULL;
 		goto releout;
 	}
 relock:
 	/* 
 	 * We need to acquire 2 to 4 locks depending on whether tvp is NULL
 	 * and fdvp and tdvp are the same directory.  Subsequently we need
 	 * to double-check all paths and in the directory rename case we
 	 * need to verify that we are not creating a directory loop.  To
 	 * handle this we acquire all but fdvp using non-blocking
 	 * acquisitions.  If we fail to acquire any lock in the path we will
 	 * drop all held locks, acquire the new lock in a blocking fashion,
 	 * and then release it and restart the rename.  This acquire/release
 	 * step ensures that we do not spin on a lock waiting for release.
 	 */
 	error = vn_lock(fdvp, LK_EXCLUSIVE);
 	if (error)
 		goto releout;
 	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 		VOP_UNLOCK(fdvp);
 		error = vn_lock(tdvp, LK_EXCLUSIVE);
 		if (error)
 			goto releout;
 		VOP_UNLOCK(tdvp);
 		atomic_add_int(&rename_restarts, 1);
 		goto relock;
 	}
 	/*
 	 * Re-resolve fvp to be certain it still exists and fetch the
 	 * correct vnode.
 	 */
 	error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
 	if (error) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		goto releout;
 	}
 	error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 	if (error) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		if (error != EBUSY)
 			goto releout;
 		error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
 		if (error != 0)
 			goto releout;
 		VOP_UNLOCK(nvp);
 		vrele(fvp);
 		fvp = nvp;
 		atomic_add_int(&rename_restarts, 1);
 		goto relock;
 	}
 	vrele(fvp);
 	fvp = nvp;
 	/*
 	 * Re-resolve tvp and acquire the vnode lock if present.
 	 */
 	error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino);
 	if (error != 0 && error != EJUSTRETURN) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		VOP_UNLOCK(fvp);
 		goto releout;
 	}
 	/*
 	 * If tvp disappeared we just carry on.
 	 */
 	if (error == EJUSTRETURN && tvp != NULL) {
 		vrele(tvp);
 		tvp = NULL;
 	}
 	/*
 	 * Get the tvp ino if the lookup succeeded.  We may have to restart
 	 * if the non-blocking acquire fails.
 	 */
 	if (error == 0) {
 		nvp = NULL;
 		error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 		if (tvp)
 			vrele(tvp);
 		tvp = nvp;
 		if (error) {
 			VOP_UNLOCK(fdvp);
 			VOP_UNLOCK(tdvp);
 			VOP_UNLOCK(fvp);
 			if (error != EBUSY)
 				goto releout;
 			error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
 			if (error != 0)
 				goto releout;
 			vput(nvp);
 			atomic_add_int(&rename_restarts, 1);
 			goto relock;
 		}
 	}
 	fdp = VTOI(fdvp);
 	fip = VTOI(fvp);
 	tdp = VTOI(tdvp);
 	tip = NULL;
 	if (tvp)
 		tip = VTOI(tvp);
 	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(tdvp)->i_flags & APPEND))) {
 		error = EPERM;
 		goto unlockout;
 	}
 	/*
 	 * Renaming a file to itself has no effect.  The upper layers should
 	 * not call us in that case.  However, things could change after
 	 * we drop the locks above.
 	 */
 	if (fvp == tvp) {
 		error = 0;
 		goto unlockout;
 	}
 	doingdirectory = 0;
 	newparent = 0;
 	ino = fip->i_number;
 	if (fip->i_nlink >= UFS_LINK_MAX) {
 		error = EMLINK;
 		goto unlockout;
 	}
 	if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
 	    || (fdp->i_flags & APPEND)) {
 		error = EPERM;
 		goto unlockout;
 	}
 	if ((fip->i_mode & IFMT) == IFDIR) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
 		    fdp == fip ||
 		    (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
 			error = EINVAL;
 			goto unlockout;
 		}
 		if (fdp->i_number != tdp->i_number)
 			newparent = tdp->i_number;
 		doingdirectory = 1;
 	}
 	if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) ||
 	    (tvp != NULL && tvp->v_type == VDIR &&
 	    tvp->v_mountedhere != NULL)) {
 		error = EXDEV;
 		goto unlockout;
 	}
 
 	/*
 	 * If ".." must be changed (ie the directory gets a new
 	 * parent) then the source directory must not be in the
 	 * directory hierarchy above the target, as this would
 	 * orphan everything below the source directory. Also
 	 * the user must have write permission in the source so
 	 * as to be able to change "..".
 	 */
 	if (doingdirectory && newparent) {
 		error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
 		if (error)
 			goto unlockout;
 		error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred,
 		    &ino);
 		/*
 		 * We encountered a lock that we have to wait for.  Unlock
 		 * everything else and VGET before restarting.
 		 */
 		if (ino) {
 			VOP_UNLOCK(fdvp);
 			VOP_UNLOCK(fvp);
 			VOP_UNLOCK(tdvp);
 			if (tvp)
 				VOP_UNLOCK(tvp);
 			error = VFS_VGET(mp, ino, LK_SHARED, &nvp);
 			if (error == 0)
 				vput(nvp);
 			atomic_add_int(&rename_restarts, 1);
 			goto relock;
 		}
 		if (error)
 			goto unlockout;
 		if ((tcnp->cn_flags & SAVESTART) == 0)
 			panic("ufs_rename: lost to startdir");
 	}
 	if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 ||
 	    tdp->i_effnlink == 0)
 		panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
 
 	/*
 	 * 1) Bump link count while we're moving stuff
 	 *    around.  If we crash somewhere before
 	 *    completing our work, the link count
 	 *    may be wrong, but correctable.
 	 */
 	fip->i_effnlink++;
 	fip->i_nlink++;
 	DIP_SET(fip, i_nlink, fip->i_nlink);
 	UFS_INODE_SET_FLAG(fip, IN_CHANGE);
 	if (DOINGSOFTDEP(fvp))
 		softdep_setup_link(tdp, fip);
 	error = UFS_UPDATE(fvp, !DOINGSOFTDEP(fvp) && !DOINGASYNC(fvp));
 	if (error)
 		goto bad;
 
 	/*
 	 * 2) If target doesn't exist, link the target
 	 *    to the source and unlink the source.
 	 *    Otherwise, rewrite the target directory
 	 *    entry to reference the source inode and
 	 *    expunge the original entry's existence.
 	 */
 	if (tip == NULL) {
 		if (ITODEV(tdp) != ITODEV(fip))
 			panic("ufs_rename: EXDEV");
 		if (doingdirectory && newparent) {
 			/*
 			 * Account for ".." in new directory.
 			 * When source and destination have the same
 			 * parent we don't adjust the link count.  The
 			 * actual link modification is completed when
 			 * .. is rewritten below.
 			 */
 			if (tdp->i_nlink >= UFS_LINK_MAX) {
 				error = EMLINK;
 				goto bad;
 			}
 		}
 		ufs_makedirentry(fip, tcnp, &newdir);
 		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1);
 		if (error)
 			goto bad;
 		/* Setup tdvp for directory compaction if needed. */
 		if (tdp->i_count && tdp->i_endoff &&
 		    tdp->i_endoff < tdp->i_size)
 			endoff = tdp->i_endoff;
 	} else {
 		if (ITODEV(tip) != ITODEV(tdp) || ITODEV(tip) != ITODEV(fip))
 			panic("ufs_rename: EXDEV");
 		/*
 		 * Short circuit rename(foo, foo).
 		 */
 		if (tip->i_number == fip->i_number)
 			panic("ufs_rename: same file");
 		/*
 		 * If the parent directory is "sticky", then the caller
 		 * must possess VADMIN for the parent directory, or the
 		 * destination of the rename.  This implements append-only
 		 * directories.
 		 */
 		if ((tdp->i_mode & S_ISTXT) &&
 		    VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) &&
 		    VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) {
 			error = EPERM;
 			goto bad;
 		}
 		/*
 		 * Target must be empty if a directory and have no links
 		 * to it. Also, ensure source and target are compatible
 		 * (both directories, or both not directories).
 		 */
 		if ((tip->i_mode & IFMT) == IFDIR) {
 			if ((tip->i_effnlink > 2) ||
 			    !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) {
 				error = ENOTEMPTY;
 				goto bad;
 			}
 			if (!doingdirectory) {
 				error = ENOTDIR;
 				goto bad;
 			}
 			cache_purge(tdvp);
 		} else if (doingdirectory) {
 			error = EISDIR;
 			goto bad;
 		}
 		if (doingdirectory) {
 			if (!newparent) {
 				tdp->i_effnlink--;
 				if (DOINGSOFTDEP(tdvp))
 					softdep_change_linkcnt(tdp);
 			}
 			tip->i_effnlink--;
 			if (DOINGSOFTDEP(tvp))
 				softdep_change_linkcnt(tip);
 		}
 		error = ufs_dirrewrite(tdp, tip, fip->i_number,
 		    IFTODT(fip->i_mode),
 		    (doingdirectory && newparent) ? newparent : doingdirectory);
 		if (error) {
 			if (doingdirectory) {
 				if (!newparent) {
 					tdp->i_effnlink++;
 					if (DOINGSOFTDEP(tdvp))
 						softdep_change_linkcnt(tdp);
 				}
 				tip->i_effnlink++;
 				if (DOINGSOFTDEP(tvp))
 					softdep_change_linkcnt(tip);
 			}
 		}
 		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
 			/*
 			 * The only stuff left in the directory is "."
 			 * and "..". The "." reference is inconsequential
 			 * since we are quashing it. We have removed the "."
 			 * reference and the reference in the parent directory,
 			 * but there may be other hard links. The soft
 			 * dependency code will arrange to do these operations
 			 * after the parent directory entry has been deleted on
 			 * disk, so when running with that code we avoid doing
 			 * them now.
 			 */
 			if (!newparent) {
 				tdp->i_nlink--;
 				DIP_SET(tdp, i_nlink, tdp->i_nlink);
 				UFS_INODE_SET_FLAG(tdp, IN_CHANGE);
 			}
 			tip->i_nlink--;
 			DIP_SET(tip, i_nlink, tip->i_nlink);
 			UFS_INODE_SET_FLAG(tip, IN_CHANGE);
 		}
 	}
 
 	/*
 	 * 3) Unlink the source.  We have to resolve the path again to
 	 * fixup the directory offset and count for ufs_dirremove.
 	 */
 	if (fdvp == tdvp) {
 		error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
 		if (error)
 			panic("ufs_rename: from entry went away!");
 		if (ino != fip->i_number)
 			panic("ufs_rename: ino mismatch %ju != %ju\n",
 			    (uintmax_t)ino, (uintmax_t)fip->i_number);
 	}
 	/*
 	 * If the source is a directory with a
 	 * new parent, the link count of the old
 	 * parent directory must be decremented
 	 * and ".." set to point to the new parent.
 	 */
 	if (doingdirectory && newparent) {
 		/*
 		 * If tip exists we simply use its link, otherwise we must
 		 * add a new one.
 		 */
 		if (tip == NULL) {
 			tdp->i_effnlink++;
 			tdp->i_nlink++;
 			DIP_SET(tdp, i_nlink, tdp->i_nlink);
 			UFS_INODE_SET_FLAG(tdp, IN_CHANGE);
 			if (DOINGSOFTDEP(tdvp))
 				softdep_setup_dotdot_link(tdp, fip);
 			error = UFS_UPDATE(tdvp, !DOINGSOFTDEP(tdvp) &&
 			    !DOINGASYNC(tdvp));
 			/* Don't go to bad here as the new link exists. */
 			if (error)
 				goto unlockout;
 		} else if (DOINGSUJ(tdvp))
 			/* Journal must account for each new link. */
 			softdep_setup_dotdot_link(tdp, fip);
 		fip->i_offset = mastertemplate.dot_reclen;
 		ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0);
 		cache_purge(fdvp);
 	}
 	error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0);
 	/*
 	 * The kern_renameat() looks up the fvp using the DELETE flag, which
 	 * causes the removal of the name cache entry for fvp.
 	 * As the relookup of the fvp is done in two steps:
 	 * ufs_lookup_ino() and then VFS_VGET(), another thread might do a
 	 * normal lookup of the from name just before the VFS_VGET() call,
 	 * causing the cache entry to be re-instantiated.
 	 *
 	 * The same issue also applies to tvp if it exists as
 	 * otherwise we may have a stale name cache entry for the new
 	 * name that references the old i-node if it has other links
 	 * or open file descriptors.
 	 */
 	cache_purge(fvp);
 	if (tvp)
 		cache_purge(tvp);
 	cache_purge_negative(tdvp);
 
 unlockout:
 	vput(fdvp);
 	vput(fvp);
 	if (tvp)
 		vput(tvp);
 	/*
 	 * If compaction or fsync was requested do it now that other locks
 	 * are no longer needed.
 	 */
 	if (error == 0 && endoff != 0) {
 		error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL |
 		    (DOINGASYNC(tdvp) ? 0 : IO_SYNC), tcnp->cn_cred);
 		if (error != 0)
 			vn_printf(tdvp,
 			    "ufs_rename: failed to truncate, error %d\n",
 			    error);
 #ifdef UFS_DIRHASH
 		else if (tdp->i_dirhash != NULL)
 			ufsdirhash_dirtrunc(tdp, endoff);
 #endif
 		/*
 		 * Even if the directory compaction failed, rename was
 		 * succesful.  Do not propagate a UFS_TRUNCATE() error
 		 * to the caller.
 		 */
 		error = 0;
 	}
 	if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
 		error = VOP_FSYNC(tdvp, MNT_WAIT, td);
 	vput(tdvp);
 	return (error);
 
 bad:
 	fip->i_effnlink--;
 	fip->i_nlink--;
 	DIP_SET(fip, i_nlink, fip->i_nlink);
 	UFS_INODE_SET_FLAG(fip, IN_CHANGE);
 	if (DOINGSOFTDEP(fvp))
 		softdep_revert_link(tdp, fip);
 	goto unlockout;
 
 releout:
 	vrele(fdvp);
 	vrele(fvp);
 	vrele(tdvp);
 	if (tvp)
 		vrele(tvp);
 
 	return (error);
 }
 
 #ifdef UFS_ACL
 static int
 ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp,
     mode_t dmode, struct ucred *cred, struct thread *td)
 {
 	int error;
 	struct inode *ip = VTOI(tvp);
 	struct acl *dacl, *acl;
 
 	acl = acl_alloc(M_WAITOK);
 	dacl = acl_alloc(M_WAITOK);
 
 	/*
 	 * Retrieve default ACL from parent, if any.
 	 */
 	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
 	switch (error) {
 	case 0:
 		/*
 		 * Retrieved a default ACL, so merge mode and ACL if
 		 * necessary.  If the ACL is empty, fall through to
 		 * the "not defined or available" case.
 		 */
 		if (acl->acl_cnt != 0) {
 			dmode = acl_posix1e_newfilemode(dmode, acl);
 			ip->i_mode = dmode;
 			DIP_SET(ip, i_mode, dmode);
 			*dacl = *acl;
 			ufs_sync_acl_from_inode(ip, acl);
 			break;
 		}
 		/* FALLTHROUGH */
 
 	case EOPNOTSUPP:
 		/*
 		 * Just use the mode as-is.
 		 */
 		ip->i_mode = dmode;
 		DIP_SET(ip, i_mode, dmode);
 		error = 0;
 		goto out;
 	
 	default:
 		goto out;
 	}
 
 	/*
 	 * XXX: If we abort now, will Soft Updates notify the extattr
 	 * code that the EAs for the file need to be released?
 	 */
 	error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
 	if (error == 0)
 		error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td);
 	switch (error) {
 	case 0:
 		break;
 
 	case EOPNOTSUPP:
 		/*
 		 * XXX: This should not happen, as EOPNOTSUPP above
 		 * was supposed to free acl.
 		 */
 		printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n");
 		/*
 		panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()");
 		 */
 		break;
 
 	default:
 		goto out;
 	}
 
 out:
 	acl_free(acl);
 	acl_free(dacl);
 
 	return (error);
 }
 
 static int
 ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp,
     mode_t mode, struct ucred *cred, struct thread *td)
 {
 	int error;
 	struct inode *ip = VTOI(tvp);
 	struct acl *acl;
 
 	acl = acl_alloc(M_WAITOK);
 
 	/*
 	 * Retrieve default ACL for parent, if any.
 	 */
 	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
 	switch (error) {
 	case 0:
 		/*
 		 * Retrieved a default ACL, so merge mode and ACL if
 		 * necessary.
 		 */
 		if (acl->acl_cnt != 0) {
 			/*
 			 * Two possible ways for default ACL to not
 			 * be present.  First, the EA can be
 			 * undefined, or second, the default ACL can
 			 * be blank.  If it's blank, fall through to
 			 * the it's not defined case.
 			 */
 			mode = acl_posix1e_newfilemode(mode, acl);
 			ip->i_mode = mode;
 			DIP_SET(ip, i_mode, mode);
 			ufs_sync_acl_from_inode(ip, acl);
 			break;
 		}
 		/* FALLTHROUGH */
 
 	case EOPNOTSUPP:
 		/*
 		 * Just use the mode as-is.
 		 */
 		ip->i_mode = mode;
 		DIP_SET(ip, i_mode, mode);
 		error = 0;
 		goto out;
 
 	default:
 		goto out;
 	}
 
 	/*
 	 * XXX: If we abort now, will Soft Updates notify the extattr
 	 * code that the EAs for the file need to be released?
 	 */
 	error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
 	switch (error) {
 	case 0:
 		break;
 
 	case EOPNOTSUPP:
 		/*
 		 * XXX: This should not happen, as EOPNOTSUPP above was
 		 * supposed to free acl.
 		 */
 		printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
 		    "but no VOP_SETACL()\n");
 		/* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
 		    "but no VOP_SETACL()"); */
 		break;
 
 	default:
 		goto out;
 	}
 
 out:
 	acl_free(acl);
 
 	return (error);
 }
 
 static int
 ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp,
     mode_t child_mode, struct ucred *cred, struct thread *td)
 {
 	int error;
 	struct acl *parent_aclp, *child_aclp;
 
 	parent_aclp = acl_alloc(M_WAITOK);
 	child_aclp = acl_alloc(M_WAITOK | M_ZERO);
 
 	error = ufs_getacl_nfs4_internal(dvp, parent_aclp, td);
 	if (error)
 		goto out;
 	acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp,
 	    child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR);
 	error = ufs_setacl_nfs4_internal(tvp, child_aclp, td);
 	if (error)
 		goto out;
 out:
 	acl_free(parent_aclp);
 	acl_free(child_aclp);
 
 	return (error);
 }
 #endif
 
 /*
  * Mkdir system call
  */
 static int
 ufs_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip, *dp;
 	struct vnode *tvp;
 	struct buf *bp;
 	struct dirtemplate dirtemplate, *dtp;
 	struct direct newdir;
 	int error, dmode;
 	long blkoff;
 
 #ifdef INVARIANTS
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("ufs_mkdir: no name");
 #endif
 	dp = VTOI(dvp);
 	if (dp->i_nlink >= UFS_LINK_MAX) {
 		error = EMLINK;
 		goto out;
 	}
 	dmode = vap->va_mode & 0777;
 	dmode |= IFDIR;
 	/*
 	 * Must simulate part of ufs_makeinode here to acquire the inode,
 	 * but not have it entered in the parent directory. The entry is
 	 * made later after writing "." and ".." entries.
 	 */
 	if (dp->i_effnlink < 2) {
 		print_bad_link_count("ufs_mkdir", dvp);
 		error = EINVAL;
 		goto out;
 	}
 	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
 	if (error)
 		goto out;
 	ip = VTOI(tvp);
 	ip->i_gid = dp->i_gid;
 	DIP_SET(ip, i_gid, dp->i_gid);
 #ifdef SUIDDIR
 	{
 #ifdef QUOTA
 		struct ucred ucred, *ucp;
 		gid_t ucred_group;
 		ucp = cnp->cn_cred;
 #endif
 		/*
 		 * If we are hacking owners here, (only do this where told to)
 		 * and we are not giving it TO root, (would subvert quotas)
 		 * then go ahead and give it to the other user.
 		 * The new directory also inherits the SUID bit.
 		 * If user's UID and dir UID are the same,
 		 * 'give it away' so that the SUID is still forced on.
 		 */
 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 		    (dp->i_mode & ISUID) && dp->i_uid) {
 			dmode |= ISUID;
 			ip->i_uid = dp->i_uid;
 			DIP_SET(ip, i_uid, dp->i_uid);
 #ifdef QUOTA
 			if (dp->i_uid != cnp->cn_cred->cr_uid) {
 				/*
 				 * Make sure the correct user gets charged
 				 * for the space.
 				 * Make a dummy credential for the victim.
 				 * XXX This seems to never be accessed out of
 				 * our context so a stack variable is ok.
 				 */
 				refcount_init(&ucred.cr_ref, 1);
 				ucred.cr_uid = ip->i_uid;
 				ucred.cr_ngroups = 1;
 				ucred.cr_groups = &ucred_group;
 				ucred.cr_groups[0] = dp->i_gid;
 				ucp = &ucred;
 			}
 #endif
 		} else {
 			ip->i_uid = cnp->cn_cred->cr_uid;
 			DIP_SET(ip, i_uid, ip->i_uid);
 		}
 #ifdef QUOTA
 		if ((error = getinoquota(ip)) ||
 	    	    (error = chkiq(ip, 1, ucp, 0))) {
 			if (DOINGSOFTDEP(tvp))
 				softdep_revert_link(dp, ip);
 			UFS_VFREE(tvp, ip->i_number, dmode);
+			vgone(tvp);
 			vput(tvp);
 			return (error);
 		}
 #endif
 	}
 #else	/* !SUIDDIR */
 	ip->i_uid = cnp->cn_cred->cr_uid;
 	DIP_SET(ip, i_uid, ip->i_uid);
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) ||
 	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
 		if (DOINGSOFTDEP(tvp))
 			softdep_revert_link(dp, ip);
 		UFS_VFREE(tvp, ip->i_number, dmode);
+		vgone(tvp);
 		vput(tvp);
 		return (error);
 	}
 #endif
 #endif	/* !SUIDDIR */
 	UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE);
 	ip->i_mode = dmode;
 	DIP_SET(ip, i_mode, dmode);
 	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
 	ip->i_effnlink = 2;
 	ip->i_nlink = 2;
 	DIP_SET(ip, i_nlink, 2);
 
 	if (cnp->cn_flags & ISWHITEOUT) {
 		ip->i_flags |= UF_OPAQUE;
 		DIP_SET(ip, i_flags, ip->i_flags);
 	}
 
 	/*
 	 * Bump link count in parent directory to reflect work done below.
 	 * Should be done before reference is created so cleanup is
 	 * possible if we crash.
 	 */
 	dp->i_effnlink++;
 	dp->i_nlink++;
 	DIP_SET(dp, i_nlink, dp->i_nlink);
 	UFS_INODE_SET_FLAG(dp, IN_CHANGE);
 	if (DOINGSOFTDEP(dvp))
 		softdep_setup_mkdir(dp, ip);
 	error = UFS_UPDATE(dvp, !DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp));
 	if (error)
 		goto bad;
 #ifdef MAC
 	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
 		error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
 		    dvp, tvp, cnp);
 		if (error)
 			goto bad;
 	}
 #endif
 #ifdef UFS_ACL
 	if (dvp->v_mount->mnt_flag & MNT_ACLS) {
 		error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode,
 		    cnp->cn_cred, cnp->cn_thread);
 		if (error)
 			goto bad;
 	} else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
 		error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode,
 		    cnp->cn_cred, cnp->cn_thread);
 		if (error)
 			goto bad;
 	}
 #endif /* !UFS_ACL */
 
 	/*
 	 * Initialize directory with "." and ".." from static template.
 	 */
 	if (dvp->v_mount->mnt_maxsymlinklen > 0)
 		dtp = &mastertemplate;
 	else
 		dtp = (struct dirtemplate *)&omastertemplate;
 	dirtemplate = *dtp;
 	dirtemplate.dot_ino = ip->i_number;
 	dirtemplate.dotdot_ino = dp->i_number;
 	vnode_pager_setsize(tvp, DIRBLKSIZ);
 	if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
 	    BA_CLRBUF, &bp)) != 0)
 		goto bad;
 	ip->i_size = DIRBLKSIZ;
 	DIP_SET(ip, i_size, DIRBLKSIZ);
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
 	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
 	if (DOINGSOFTDEP(tvp)) {
 		/*
 		 * Ensure that the entire newly allocated block is a
 		 * valid directory so that future growth within the
 		 * block does not have to ensure that the block is
 		 * written before the inode.
 		 */
 		blkoff = DIRBLKSIZ;
 		while (blkoff < bp->b_bcount) {
 			((struct direct *)
 			   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
 			blkoff += DIRBLKSIZ;
 		}
 	}
 	if ((error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) &&
 	    !DOINGASYNC(tvp))) != 0) {
 		(void)bwrite(bp);
 		goto bad;
 	}
 	/*
 	 * Directory set up, now install its entry in the parent directory.
 	 *
 	 * If we are not doing soft dependencies, then we must write out the
 	 * buffer containing the new directory body before entering the new 
 	 * name in the parent. If we are doing soft dependencies, then the
 	 * buffer containing the new directory body will be passed to and
 	 * released in the soft dependency code after the code has attached
 	 * an appropriate ordering dependency to the buffer which ensures that
 	 * the buffer is written before the new name is written in the parent.
 	 */
 	if (DOINGASYNC(dvp))
 		bdwrite(bp);
 	else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp))))
 		goto bad;
 	ufs_makedirentry(ip, cnp, &newdir);
 	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp, 0);
 	
 bad:
 	if (error == 0) {
 		*ap->a_vpp = tvp;
 	} else {
 		dp->i_effnlink--;
 		dp->i_nlink--;
 		DIP_SET(dp, i_nlink, dp->i_nlink);
 		UFS_INODE_SET_FLAG(dp, IN_CHANGE);
 		/*
 		 * No need to do an explicit VOP_TRUNCATE here, vrele will
 		 * do this for us because we set the link count to 0.
 		 */
 		ip->i_effnlink = 0;
 		ip->i_nlink = 0;
 		DIP_SET(ip, i_nlink, 0);
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 		if (DOINGSOFTDEP(tvp))
 			softdep_revert_mkdir(dp, ip);
-
+		vgone(tvp);
 		vput(tvp);
 	}
 out:
 	return (error);
 }
 
 /*
  * Rmdir system call.
  */
 static int
 ufs_rmdir(ap)
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip, *dp;
 	int error;
 
 	ip = VTOI(vp);
 	dp = VTOI(dvp);
 
 	/*
 	 * Do not remove a directory that is in the process of being renamed.
 	 * Verify the directory is empty (and valid). Rmdir ".." will not be
 	 * valid since ".." will contain a reference to the current directory
 	 * and thus be non-empty. Do not allow the removal of mounted on
 	 * directories (this can happen when an NFS exported filesystem
 	 * tries to remove a locally mounted on directory).
 	 */
 	error = 0;
 	if (dp->i_effnlink <= 2) {
 		if (dp->i_effnlink == 2)
 			print_bad_link_count("ufs_rmdir", dvp);
 		error = EINVAL;
 		goto out;
 	}
 	if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
 		error = ENOTEMPTY;
 		goto out;
 	}
 	if ((dp->i_flags & APPEND)
 	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
 		error = EPERM;
 		goto out;
 	}
 	if (vp->v_mountedhere != 0) {
 		error = EINVAL;
 		goto out;
 	}
 #ifdef UFS_GJOURNAL
 	ufs_gjournal_orphan(vp);
 #endif
 	/*
 	 * Delete reference to directory before purging
 	 * inode.  If we crash in between, the directory
 	 * will be reattached to lost+found,
 	 */
 	dp->i_effnlink--;
 	ip->i_effnlink--;
 	if (DOINGSOFTDEP(vp))
 		softdep_setup_rmdir(dp, ip);
 	error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
 	if (error) {
 		dp->i_effnlink++;
 		ip->i_effnlink++;
 		if (DOINGSOFTDEP(vp))
 			softdep_revert_rmdir(dp, ip);
 		goto out;
 	}
 	cache_purge(dvp);
 	/*
 	 * The only stuff left in the directory is "." and "..". The "."
 	 * reference is inconsequential since we are quashing it. The soft
 	 * dependency code will arrange to do these operations after
 	 * the parent directory entry has been deleted on disk, so
 	 * when running with that code we avoid doing them now.
 	 */
 	if (!DOINGSOFTDEP(vp)) {
 		dp->i_nlink--;
 		DIP_SET(dp, i_nlink, dp->i_nlink);
 		UFS_INODE_SET_FLAG(dp, IN_CHANGE);
 		error = UFS_UPDATE(dvp, 0);
 		ip->i_nlink--;
 		DIP_SET(ip, i_nlink, ip->i_nlink);
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 	}
 	cache_purge(vp);
 #ifdef UFS_DIRHASH
 	/* Kill any active hash; i_effnlink == 0, so it will not come back. */
 	if (ip->i_dirhash != NULL)
 		ufsdirhash_free(ip);
 #endif
 out:
 	return (error);
 }
 
 /*
  * symlink -- make a symbolic link
  */
 static int
 ufs_symlink(ap)
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		const char *a_target;
 	} */ *ap;
 {
 	struct vnode *vp, **vpp = ap->a_vpp;
 	struct inode *ip;
 	int len, error;
 
 	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
 	    vpp, ap->a_cnp, "ufs_symlink");
 	if (error)
 		return (error);
 	vp = *vpp;
 	len = strlen(ap->a_target);
 	if (len < vp->v_mount->mnt_maxsymlinklen) {
 		ip = VTOI(vp);
 		bcopy(ap->a_target, SHORTLINK(ip), len);
 		ip->i_size = len;
 		DIP_SET(ip, i_size, len);
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
 		error = UFS_UPDATE(vp, 0);
 	} else
 		error = vn_rdwr(UIO_WRITE, vp, __DECONST(void *, ap->a_target),
 		    len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
 		    ap->a_cnp->cn_cred, NOCRED, NULL, NULL);
 	if (error)
 		vput(vp);
 	return (error);
 }
 
 /*
  * Vnode op for reading directories.
  */
 int
 ufs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_eofflag;
 		int *a_ncookies;
 		u_long **a_cookies;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct buf *bp;
 	struct inode *ip;
 	struct direct *dp, *edp;
 	u_long *cookies;
 	struct dirent dstdp;
 	off_t offset, startoffset;
 	size_t readcnt, skipcnt;
 	ssize_t startresid;
 	u_int ncookies;
 	int error;
 
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	ip = VTOI(vp);
 	if (ip->i_effnlink == 0)
 		return (0);
 	if (ap->a_ncookies != NULL) {
 		if (uio->uio_resid < 0)
 			ncookies = 0;
 		else
 			ncookies = uio->uio_resid;
 		if (uio->uio_offset >= ip->i_size)
 			ncookies = 0;
 		else if (ip->i_size - uio->uio_offset < ncookies)
 			ncookies = ip->i_size - uio->uio_offset;
 		ncookies = ncookies / (offsetof(struct direct, d_name) + 4) + 1;
 		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
 		*ap->a_ncookies = ncookies;
 		*ap->a_cookies = cookies;
 	} else {
 		ncookies = 0;
 		cookies = NULL;
 	}
 	offset = startoffset = uio->uio_offset;
 	startresid = uio->uio_resid;
 	error = 0;
 	while (error == 0 && uio->uio_resid > 0 &&
 	    uio->uio_offset < ip->i_size) {
 		error = ffs_blkatoff(vp, uio->uio_offset, NULL, &bp);
 		if (error)
 			break;
 		if (bp->b_offset + bp->b_bcount > ip->i_size)
 			readcnt = ip->i_size - bp->b_offset;
 		else
 			readcnt = bp->b_bcount;
 		skipcnt = (size_t)(uio->uio_offset - bp->b_offset) &
 		    ~(size_t)(DIRBLKSIZ - 1);
 		offset = bp->b_offset + skipcnt;
 		dp = (struct direct *)&bp->b_data[skipcnt];
 		edp = (struct direct *)&bp->b_data[readcnt];
 		while (error == 0 && uio->uio_resid > 0 && dp < edp) {
 			if (dp->d_reclen <= offsetof(struct direct, d_name) ||
 			    (caddr_t)dp + dp->d_reclen > (caddr_t)edp) {
 				error = EIO;
 				break;
 			}
 #if BYTE_ORDER == LITTLE_ENDIAN
 			/* Old filesystem format. */
 			if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 				dstdp.d_namlen = dp->d_type;
 				dstdp.d_type = dp->d_namlen;
 			} else
 #endif
 			{
 				dstdp.d_namlen = dp->d_namlen;
 				dstdp.d_type = dp->d_type;
 			}
 			if (offsetof(struct direct, d_name) + dstdp.d_namlen >
 			    dp->d_reclen) {
 				error = EIO;
 				break;
 			}
 			if (offset < startoffset || dp->d_ino == 0)
 				goto nextentry;
 			dstdp.d_fileno = dp->d_ino;
 			dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp);
 			bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen);
 			/* NOTE: d_off is the offset of the *next* entry. */
 			dstdp.d_off = offset + dp->d_reclen;
 			dirent_terminate(&dstdp);
 			if (dstdp.d_reclen > uio->uio_resid) {
 				if (uio->uio_resid == startresid)
 					error = EINVAL;
 				else
 					error = EJUSTRETURN;
 				break;
 			}
 			/* Advance dp. */
 			error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio);
 			if (error)
 				break;
 			if (cookies != NULL) {
 				KASSERT(ncookies > 0,
 				    ("ufs_readdir: cookies buffer too small"));
 				*cookies = offset + dp->d_reclen;
 				cookies++;
 				ncookies--;
 			}
 nextentry:
 			offset += dp->d_reclen;
 			dp = (struct direct *)((caddr_t)dp + dp->d_reclen);
 		}
 		bqrelse(bp);
 		uio->uio_offset = offset;
 	}
 	/* We need to correct uio_offset. */
 	uio->uio_offset = offset;
 	if (error == EJUSTRETURN)
 		error = 0;
 	if (ap->a_ncookies != NULL) {
 		if (error == 0) {
 			ap->a_ncookies -= ncookies;
 		} else {
 			free(*ap->a_cookies, M_TEMP);
 			*ap->a_ncookies = 0;
 			*ap->a_cookies = NULL;
 		}
 	}
 	if (error == 0 && ap->a_eofflag)
 		*ap->a_eofflag = ip->i_size <= uio->uio_offset;
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link
  */
 static int
 ufs_readlink(ap)
 	struct vop_readlink_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	doff_t isize;
 
 	isize = ip->i_size;
 	if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
 	    DIP(ip, i_blocks) == 0) { /* XXX - for old fastlink support */
 		return (uiomove(SHORTLINK(ip), isize, ap->a_uio));
 	}
 	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
 }
 
 /*
  * Calculate the logical to physical mapping if not done already,
  * then call the device strategy routine.
  *
  * In order to be able to swap to a file, the ufs_bmaparray() operation may not
  * deadlock on memory.  See ufs_bmap() for details.
  */
 static int
 ufs_strategy(ap)
 	struct vop_strategy_args /* {
 		struct vnode *a_vp;
 		struct buf *a_bp;
 	} */ *ap;
 {
 	struct buf *bp = ap->a_bp;
 	struct vnode *vp = ap->a_vp;
 	ufs2_daddr_t blkno;
 	int error;
 
 	if (bp->b_blkno == bp->b_lblkno) {
 		error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL);
 		bp->b_blkno = blkno;
 		if (error) {
 			bp->b_error = error;
 			bp->b_ioflags |= BIO_ERROR;
 			bufdone(bp);
 			return (0);
 		}
 		if ((long)bp->b_blkno == -1)
 			vfs_bio_clrbuf(bp);
 	}
 	if ((long)bp->b_blkno == -1) {
 		bufdone(bp);
 		return (0);
 	}
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	BO_STRATEGY(VFSTOUFS(vp->v_mount)->um_bo, bp);
 	return (0);
 }
 
 /*
  * Print out the contents of an inode.
  */
 static int
 ufs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 
 	printf("\tnlink=%d, effnlink=%d, size=%jd", ip->i_nlink,
 	    ip->i_effnlink, (intmax_t)ip->i_size);
 	if (I_IS_UFS2(ip))
 		printf(", extsize %d", ip->i_din2->di_extsize);
 	printf("\n\tgeneration=%jx, uid=%d, gid=%d, flags=0x%b\n",
 	    (uintmax_t)ip->i_gen, ip->i_uid, ip->i_gid,
 	    (u_int)ip->i_flags, PRINT_INODE_FLAGS);
 	printf("\tino %lu, on dev %s", (u_long)ip->i_number,
 	    devtoname(ITODEV(ip)));
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
 	return (0);
 }
 
 /*
  * Close wrapper for fifos.
  *
  * Update the times on the inode then do device close.
  */
 static int
 ufsfifo_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	int usecount;
 
 	VI_LOCK(vp);
 	usecount = vp->v_usecount;
 	if (usecount > 1)
 		ufs_itimes_locked(vp);
 	VI_UNLOCK(vp);
 	return (fifo_specops.vop_close(ap));
 }
 
 /*
  * Kqfilter wrapper for fifos.
  *
  * Fall through to ufs kqfilter routines if needed 
  */
 static int
 ufsfifo_kqfilter(ap)
 	struct vop_kqfilter_args *ap;
 {
 	int error;
 
 	error = fifo_specops.vop_kqfilter(ap);
 	if (error)
 		error = vfs_kqfilter(ap);
 	return (error);
 }
 
 /*
  * Return POSIX pathconf information applicable to ufs filesystems.
  */
 static int
 ufs_pathconf(ap)
 	struct vop_pathconf_args /* {
 		struct vnode *a_vp;
 		int a_name;
 		int *a_retval;
 	} */ *ap;
 {
 	int error;
 
 	error = 0;
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		*ap->a_retval = UFS_LINK_MAX;
 		break;
 	case _PC_NAME_MAX:
 		*ap->a_retval = UFS_MAXNAMLEN;
 		break;
 	case _PC_PIPE_BUF:
 		if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO)
 			*ap->a_retval = PIPE_BUF;
 		else
 			error = EINVAL;
 		break;
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		break;
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 1;
 		break;
 #ifdef UFS_ACL
 	case _PC_ACL_EXTENDED:
 		if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS)
 			*ap->a_retval = 1;
 		else
 			*ap->a_retval = 0;
 		break;
 	case _PC_ACL_NFS4:
 		if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS)
 			*ap->a_retval = 1;
 		else
 			*ap->a_retval = 0;
 		break;
 #endif
 	case _PC_ACL_PATH_MAX:
 #ifdef UFS_ACL
 		if (ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS))
 			*ap->a_retval = ACL_MAX_ENTRIES;
 		else
 			*ap->a_retval = 3;
 #else
 		*ap->a_retval = 3;
 #endif
 		break;
 #ifdef MAC
 	case _PC_MAC_PRESENT:
 		if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL)
 			*ap->a_retval = 1;
 		else
 			*ap->a_retval = 0;
 		break;
 #endif
 	case _PC_MIN_HOLE_SIZE:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_PRIO_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_SYNC_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_ALLOC_SIZE_MIN:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
 		break;
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 64;
 		break;
 	case _PC_REC_INCR_XFER_SIZE:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_MAX_XFER_SIZE:
 		*ap->a_retval = -1; /* means ``unlimited'' */
 		break;
 	case _PC_REC_MIN_XFER_SIZE:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_XFER_ALIGN:
 		*ap->a_retval = PAGE_SIZE;
 		break;
 	case _PC_SYMLINK_MAX:
 		*ap->a_retval = MAXPATHLEN;
 		break;
 
 	default:
 		error = vop_stdpathconf(ap);
 		break;
 	}
 	return (error);
 }
 
 /*
  * Initialize the vnode associated with a new inode, handle aliased
  * vnodes.
  */
 int
 ufs_vinit(mntp, fifoops, vpp)
 	struct mount *mntp;
 	struct vop_vector *fifoops;
 	struct vnode **vpp;
 {
 	struct inode *ip;
 	struct vnode *vp;
 
 	vp = *vpp;
 	ASSERT_VOP_LOCKED(vp, "ufs_vinit");
 	ip = VTOI(vp);
 	vp->v_type = IFTOVT(ip->i_mode);
 	/*
 	 * Only unallocated inodes should be of type VNON.
 	 */
 	if (ip->i_mode != 0 && vp->v_type == VNON)
 		return (EINVAL);
 	if (vp->v_type == VFIFO)
 		vp->v_op = fifoops;
 	if (ip->i_number == UFS_ROOTINO)
 		vp->v_vflag |= VV_ROOT;
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Allocate a new inode.
  * Vnode dvp must be locked.
  */
 static int
 ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
 	int mode;
 	struct vnode *dvp;
 	struct vnode **vpp;
 	struct componentname *cnp;
 	const char *callfunc;
 {
 	struct inode *ip, *pdir;
 	struct direct newdir;
 	struct vnode *tvp;
 	int error;
 
 	pdir = VTOI(dvp);
 #ifdef INVARIANTS
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("%s: no name", callfunc);
 #endif
 	*vpp = NULL;
 	if ((mode & IFMT) == 0)
 		mode |= IFREG;
 
 	if (pdir->i_effnlink < 2) {
 		print_bad_link_count(callfunc, dvp);
 		return (EINVAL);
 	}
 	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
 	if (error)
 		return (error);
 	ip = VTOI(tvp);
 	ip->i_gid = pdir->i_gid;
 	DIP_SET(ip, i_gid, pdir->i_gid);
 #ifdef SUIDDIR
 	{
 #ifdef QUOTA
 		struct ucred ucred, *ucp;
 		gid_t ucred_group;
 		ucp = cnp->cn_cred;
 #endif
 		/*
 		 * If we are not the owner of the directory,
 		 * and we are hacking owners here, (only do this where told to)
 		 * and we are not giving it TO root, (would subvert quotas)
 		 * then go ahead and give it to the other user.
 		 * Note that this drops off the execute bits for security.
 		 */
 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 		    (pdir->i_mode & ISUID) &&
 		    (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
 			ip->i_uid = pdir->i_uid;
 			DIP_SET(ip, i_uid, ip->i_uid);
 			mode &= ~07111;
 #ifdef QUOTA
 			/*
 			 * Make sure the correct user gets charged
 			 * for the space.
 			 * Quickly knock up a dummy credential for the victim.
 			 * XXX This seems to never be accessed out of our
 			 * context so a stack variable is ok.
 			 */
 			refcount_init(&ucred.cr_ref, 1);
 			ucred.cr_uid = ip->i_uid;
 			ucred.cr_ngroups = 1;
 			ucred.cr_groups = &ucred_group;
 			ucred.cr_groups[0] = pdir->i_gid;
 			ucp = &ucred;
 #endif
 		} else {
 			ip->i_uid = cnp->cn_cred->cr_uid;
 			DIP_SET(ip, i_uid, ip->i_uid);
 		}
 
 #ifdef QUOTA
 		if ((error = getinoquota(ip)) ||
 	    	    (error = chkiq(ip, 1, ucp, 0))) {
 			if (DOINGSOFTDEP(tvp))
 				softdep_revert_link(pdir, ip);
 			UFS_VFREE(tvp, ip->i_number, mode);
+			vgone(tvp);
 			vput(tvp);
 			return (error);
 		}
 #endif
 	}
 #else	/* !SUIDDIR */
 	ip->i_uid = cnp->cn_cred->cr_uid;
 	DIP_SET(ip, i_uid, ip->i_uid);
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) ||
 	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
 		if (DOINGSOFTDEP(tvp))
 			softdep_revert_link(pdir, ip);
 		UFS_VFREE(tvp, ip->i_number, mode);
+		vgone(tvp);
 		vput(tvp);
 		return (error);
 	}
 #endif
 #endif	/* !SUIDDIR */
 	UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE);
 	ip->i_mode = mode;
 	DIP_SET(ip, i_mode, mode);
 	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
 	ip->i_effnlink = 1;
 	ip->i_nlink = 1;
 	DIP_SET(ip, i_nlink, 1);
 	if (DOINGSOFTDEP(tvp))
 		softdep_setup_create(VTOI(dvp), ip);
 	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
 	    priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID)) {
 		ip->i_mode &= ~ISGID;
 		DIP_SET(ip, i_mode, ip->i_mode);
 	}
 
 	if (cnp->cn_flags & ISWHITEOUT) {
 		ip->i_flags |= UF_OPAQUE;
 		DIP_SET(ip, i_flags, ip->i_flags);
 	}
 
 	/*
 	 * Make sure inode goes to disk before directory entry.
 	 */
 	error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && !DOINGASYNC(tvp));
 	if (error)
 		goto bad;
 #ifdef MAC
 	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
 		error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
 		    dvp, tvp, cnp);
 		if (error)
 			goto bad;
 	}
 #endif
 #ifdef UFS_ACL
 	if (dvp->v_mount->mnt_flag & MNT_ACLS) {
 		error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp, mode,
 		    cnp->cn_cred, cnp->cn_thread);
 		if (error)
 			goto bad;
 	} else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
 		error = ufs_do_nfs4_acl_inheritance(dvp, tvp, mode,
 		    cnp->cn_cred, cnp->cn_thread);
 		if (error)
 			goto bad;
 	}
 #endif /* !UFS_ACL */
 	ufs_makedirentry(ip, cnp, &newdir);
 	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0);
 	if (error)
 		goto bad;
 	*vpp = tvp;
 	return (0);
 
 bad:
 	/*
 	 * Write error occurred trying to update the inode
 	 * or the directory so must deallocate the inode.
 	 */
 	ip->i_effnlink = 0;
 	ip->i_nlink = 0;
 	DIP_SET(ip, i_nlink, 0);
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 	if (DOINGSOFTDEP(tvp))
 		softdep_revert_create(VTOI(dvp), ip);
+	vgone(tvp);
 	vput(tvp);
 	return (error);
 }
 
 static int
 ufs_ioctl(struct vop_ioctl_args *ap)
 {
 	struct vnode *vp;
 	int error;
 
 	vp = ap->a_vp;
 	switch (ap->a_command) {
 	case FIOSEEKDATA:
 		error = vn_lock(vp, LK_SHARED);
 		if (error == 0) {
 			error = ufs_bmap_seekdata(vp, (off_t *)ap->a_data);
 			VOP_UNLOCK(vp);
 		} else
 			error = EBADF;
 		return (error);
 	case FIOSEEKHOLE:
 		return (vn_bmap_seekhole(vp, ap->a_command, (off_t *)ap->a_data,
 		    ap->a_cred));
 	default:
 		return (ENOTTY);
 	}
 }
 
 /* Global vfs data structures for ufs. */
 struct vop_vector ufs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 	.vop_fsync =		VOP_PANIC,
 	.vop_read =		VOP_PANIC,
 	.vop_reallocblks =	VOP_PANIC,
 	.vop_write =		VOP_PANIC,
 	.vop_accessx =		ufs_accessx,
 	.vop_bmap =		ufs_bmap,
 	.vop_cachedlookup =	ufs_lookup,
 	.vop_close =		ufs_close,
 	.vop_create =		ufs_create,
 	.vop_getattr =		ufs_getattr,
 	.vop_inactive =		ufs_inactive,
 	.vop_ioctl =		ufs_ioctl,
 	.vop_link =		ufs_link,
 	.vop_lookup =		vfs_cache_lookup,
 	.vop_markatime =	ufs_markatime,
 	.vop_mkdir =		ufs_mkdir,
 	.vop_mknod =		ufs_mknod,
 	.vop_need_inactive =	ufs_need_inactive,
 	.vop_open =		ufs_open,
 	.vop_pathconf =		ufs_pathconf,
 	.vop_poll =		vop_stdpoll,
 	.vop_print =		ufs_print,
 	.vop_readdir =		ufs_readdir,
 	.vop_readlink =		ufs_readlink,
 	.vop_reclaim =		ufs_reclaim,
 	.vop_remove =		ufs_remove,
 	.vop_rename =		ufs_rename,
 	.vop_rmdir =		ufs_rmdir,
 	.vop_setattr =		ufs_setattr,
 #ifdef MAC
 	.vop_setlabel =		vop_stdsetlabel_ea,
 #endif
 	.vop_strategy =		ufs_strategy,
 	.vop_symlink =		ufs_symlink,
 	.vop_whiteout =		ufs_whiteout,
 #ifdef UFS_EXTATTR
 	.vop_getextattr =	ufs_getextattr,
 	.vop_deleteextattr =	ufs_deleteextattr,
 	.vop_setextattr =	ufs_setextattr,
 #endif
 #ifdef UFS_ACL
 	.vop_getacl =		ufs_getacl,
 	.vop_setacl =		ufs_setacl,
 	.vop_aclcheck =		ufs_aclcheck,
 #endif
 };
 VFS_VOP_VECTOR_REGISTER(ufs_vnodeops);
 
 struct vop_vector ufs_fifoops = {
 	.vop_default =		&fifo_specops,
 	.vop_fsync =		VOP_PANIC,
 	.vop_accessx =		ufs_accessx,
 	.vop_close =		ufsfifo_close,
 	.vop_getattr =		ufs_getattr,
 	.vop_inactive =		ufs_inactive,
 	.vop_kqfilter =		ufsfifo_kqfilter,
 	.vop_markatime =	ufs_markatime,
 	.vop_pathconf = 	ufs_pathconf,
 	.vop_print =		ufs_print,
 	.vop_read =		VOP_PANIC,
 	.vop_reclaim =		ufs_reclaim,
 	.vop_setattr =		ufs_setattr,
 #ifdef MAC
 	.vop_setlabel =		vop_stdsetlabel_ea,
 #endif
 	.vop_write =		VOP_PANIC,
 #ifdef UFS_EXTATTR
 	.vop_getextattr =	ufs_getextattr,
 	.vop_deleteextattr =	ufs_deleteextattr,
 	.vop_setextattr =	ufs_setextattr,
 #endif
 #ifdef UFS_ACL
 	.vop_getacl =		ufs_getacl,
 	.vop_setacl =		ufs_setacl,
 	.vop_aclcheck =		ufs_aclcheck,
 #endif
 };
 VFS_VOP_VECTOR_REGISTER(ufs_fifoops);
Index: projects/clang1000-import/sys/x86/cpufreq/hwpstate_amd.c
===================================================================
--- projects/clang1000-import/sys/x86/cpufreq/hwpstate_amd.c	(revision 357178)
+++ projects/clang1000-import/sys/x86/cpufreq/hwpstate_amd.c	(revision 357179)
@@ -1,543 +1,547 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005 Nate Lawson
  * Copyright (c) 2004 Colin Percival
  * Copyright (c) 2004-2005 Bruno Durcot
  * Copyright (c) 2004 FUKUDA Nobuhiko
  * Copyright (c) 2009 Michael Reifenberger
  * Copyright (c) 2009 Norikatsu Shigemura
  * Copyright (c) 2008-2009 Gen Otsuji
  *
  * This code is depending on kern_cpu.c, est.c, powernow.c, p4tcc.c, smist.c
  * in various parts. The authors of these files are Nate Lawson,
  * Colin Percival, Bruno Durcot, and FUKUDA Nobuhiko.
  * This code contains patches by Michael Reifenberger and Norikatsu Shigemura.
  * Thank you.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted providing that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * For more info:
  * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 10h Processors
  * 31116 Rev 3.20  February 04, 2009
  * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 11h Processors
  * 41256 Rev 3.00 - July 07, 2008
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/pcpu.h>
 #include <sys/smp.h>
 #include <sys/sched.h>
 
 #include <machine/md_var.h>
 #include <machine/cputypes.h>
 #include <machine/specialreg.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 
 #include <dev/acpica/acpivar.h>
 
 #include "acpi_if.h"
 #include "cpufreq_if.h"
 
 #define	MSR_AMD_10H_11H_LIMIT	0xc0010061
 #define	MSR_AMD_10H_11H_CONTROL	0xc0010062
 #define	MSR_AMD_10H_11H_STATUS	0xc0010063
 #define	MSR_AMD_10H_11H_CONFIG	0xc0010064
 
 #define	AMD_10H_11H_MAX_STATES	16
 
 /* for MSR_AMD_10H_11H_LIMIT C001_0061 */
 #define	AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)	(((msr) >> 4) & 0x7)
 #define	AMD_10H_11H_GET_PSTATE_LIMIT(msr)	(((msr)) & 0x7)
 /* for MSR_AMD_10H_11H_CONFIG 10h:C001_0064:68 / 11h:C001_0064:6B */
 #define	AMD_10H_11H_CUR_VID(msr)		(((msr) >> 9) & 0x7F)
 #define	AMD_10H_11H_CUR_DID(msr)		(((msr) >> 6) & 0x07)
 #define	AMD_10H_11H_CUR_FID(msr)		((msr) & 0x3F)
 
 #define	AMD_17H_CUR_VID(msr)			(((msr) >> 14) & 0xFF)
 #define	AMD_17H_CUR_DID(msr)			(((msr) >> 8) & 0x3F)
 #define	AMD_17H_CUR_FID(msr)			((msr) & 0xFF)
 
 #define	HWPSTATE_DEBUG(dev, msg...)			\
 	do {						\
 		if (hwpstate_verbose)			\
 			device_printf(dev, msg);	\
 	} while (0)
 
 struct hwpstate_setting {
 	int	freq;		/* CPU clock in Mhz or 100ths of a percent. */
 	int	volts;		/* Voltage in mV. */
 	int	power;		/* Power consumed in mW. */
 	int	lat;		/* Transition latency in us. */
 	int	pstate_id;	/* P-State id */
 };
 
 struct hwpstate_softc {
 	device_t		dev;
 	struct hwpstate_setting	hwpstate_settings[AMD_10H_11H_MAX_STATES];
 	int			cfnum;
 };
 
 static void	hwpstate_identify(driver_t *driver, device_t parent);
 static int	hwpstate_probe(device_t dev);
 static int	hwpstate_attach(device_t dev);
 static int	hwpstate_detach(device_t dev);
 static int	hwpstate_set(device_t dev, const struct cf_setting *cf);
 static int	hwpstate_get(device_t dev, struct cf_setting *cf);
 static int	hwpstate_settings(device_t dev, struct cf_setting *sets, int *count);
 static int	hwpstate_type(device_t dev, int *type);
 static int	hwpstate_shutdown(device_t dev);
 static int	hwpstate_features(driver_t *driver, u_int *features);
 static int	hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev);
 static int	hwpstate_get_info_from_msr(device_t dev);
 static int	hwpstate_goto_pstate(device_t dev, int pstate_id);
 
 static int	hwpstate_verbose;
 SYSCTL_INT(_debug, OID_AUTO, hwpstate_verbose, CTLFLAG_RWTUN,
     &hwpstate_verbose, 0, "Debug hwpstate");
 
 static int	hwpstate_verify;
 SYSCTL_INT(_debug, OID_AUTO, hwpstate_verify, CTLFLAG_RWTUN,
     &hwpstate_verify, 0, "Verify P-state after setting");
 
 static device_method_t hwpstate_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	hwpstate_identify),
 	DEVMETHOD(device_probe,		hwpstate_probe),
 	DEVMETHOD(device_attach,	hwpstate_attach),
 	DEVMETHOD(device_detach,	hwpstate_detach),
 	DEVMETHOD(device_shutdown,	hwpstate_shutdown),
 
 	/* cpufreq interface */
 	DEVMETHOD(cpufreq_drv_set,	hwpstate_set),
 	DEVMETHOD(cpufreq_drv_get,	hwpstate_get),
 	DEVMETHOD(cpufreq_drv_settings,	hwpstate_settings),
 	DEVMETHOD(cpufreq_drv_type,	hwpstate_type),
 
 	/* ACPI interface */
 	DEVMETHOD(acpi_get_features,	hwpstate_features),
 
 	{0, 0}
 };
 
 static devclass_t hwpstate_devclass;
 static driver_t hwpstate_driver = {
 	"hwpstate",
 	hwpstate_methods,
 	sizeof(struct hwpstate_softc),
 };
 
 DRIVER_MODULE(hwpstate, cpu, hwpstate_driver, hwpstate_devclass, 0, 0);
 
 /*
  * Go to Px-state on all cpus considering the limit.
  */
 static int
 hwpstate_goto_pstate(device_t dev, int id)
 {
 	sbintime_t sbt;
 	uint64_t msr;
 	int cpu, i, j, limit;
 
 	/* get the current pstate limit */
 	msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
 	limit = AMD_10H_11H_GET_PSTATE_LIMIT(msr);
-	if (limit > id)
+	if (limit > id) {
+		HWPSTATE_DEBUG(dev,
+		    "Restricting requested P%d to P%d due to HW limit\n", id,
+		    limit);
 		id = limit;
+	}
 
 	cpu = curcpu;
 	HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, cpu);
 	/* Go To Px-state */
 	wrmsr(MSR_AMD_10H_11H_CONTROL, id);
 
 	/*
 	 * We are going to the same Px-state on all cpus.
 	 * Probably should take _PSD into account.
 	 */
 	CPU_FOREACH(i) {
 		if (i == cpu)
 			continue;
 
 		/* Bind to each cpu. */
 		thread_lock(curthread);
 		sched_bind(curthread, i);
 		thread_unlock(curthread);
 		HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, i);
 		/* Go To Px-state */
 		wrmsr(MSR_AMD_10H_11H_CONTROL, id);
 	}
 
 	/*
 	 * Verify whether each core is in the requested P-state.
 	 */
 	if (hwpstate_verify) {
 		CPU_FOREACH(i) {
 			thread_lock(curthread);
 			sched_bind(curthread, i);
 			thread_unlock(curthread);
 			/* wait loop (100*100 usec is enough ?) */
 			for (j = 0; j < 100; j++) {
 				/* get the result. not assure msr=id */
 				msr = rdmsr(MSR_AMD_10H_11H_STATUS);
 				if (msr == id)
 					break;
 				sbt = SBT_1MS / 10;
 				tsleep_sbt(dev, PZERO, "pstate_goto", sbt,
 				    sbt >> tc_precexp, 0);
 			}
 			HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n",
 			    (int)msr, i);
 			if (msr != id) {
 				HWPSTATE_DEBUG(dev,
 				    "error: loop is not enough.\n");
 				return (ENXIO);
 			}
 		}
 	}
 
 	return (0);
 }
 
 static int
 hwpstate_set(device_t dev, const struct cf_setting *cf)
 {
 	struct hwpstate_softc *sc;
 	struct hwpstate_setting *set;
 	int i;
 
 	if (cf == NULL)
 		return (EINVAL);
 	sc = device_get_softc(dev);
 	set = sc->hwpstate_settings;
 	for (i = 0; i < sc->cfnum; i++)
 		if (CPUFREQ_CMP(cf->freq, set[i].freq))
 			break;
 	if (i == sc->cfnum)
 		return (EINVAL);
 
 	return (hwpstate_goto_pstate(dev, set[i].pstate_id));
 }
 
 static int
 hwpstate_get(device_t dev, struct cf_setting *cf)
 {
 	struct hwpstate_softc *sc;
 	struct hwpstate_setting set;
 	uint64_t msr;
 
 	sc = device_get_softc(dev);
 	if (cf == NULL)
 		return (EINVAL);
 	msr = rdmsr(MSR_AMD_10H_11H_STATUS);
 	if (msr >= sc->cfnum)
 		return (EINVAL);
 	set = sc->hwpstate_settings[msr];
 
 	cf->freq = set.freq;
 	cf->volts = set.volts;
 	cf->power = set.power;
 	cf->lat = set.lat;
 	cf->dev = dev;
 	return (0);
 }
 
 static int
 hwpstate_settings(device_t dev, struct cf_setting *sets, int *count)
 {
 	struct hwpstate_softc *sc;
 	struct hwpstate_setting set;
 	int i;
 
 	if (sets == NULL || count == NULL)
 		return (EINVAL);
 	sc = device_get_softc(dev);
 	if (*count < sc->cfnum)
 		return (E2BIG);
 	for (i = 0; i < sc->cfnum; i++, sets++) {
 		set = sc->hwpstate_settings[i];
 		sets->freq = set.freq;
 		sets->volts = set.volts;
 		sets->power = set.power;
 		sets->lat = set.lat;
 		sets->dev = dev;
 	}
 	*count = sc->cfnum;
 
 	return (0);
 }
 
 static int
 hwpstate_type(device_t dev, int *type)
 {
 
 	if (type == NULL)
 		return (EINVAL);
 
 	*type = CPUFREQ_TYPE_ABSOLUTE;
 	return (0);
 }
 
 static void
 hwpstate_identify(driver_t *driver, device_t parent)
 {
 
 	if (device_find_child(parent, "hwpstate", -1) != NULL)
 		return;
 
 	if ((cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10) &&
 	    cpu_vendor_id != CPU_VENDOR_HYGON)
 		return;
 
 	/*
 	 * Check if hardware pstate enable bit is set.
 	 */
 	if ((amd_pminfo & AMDPM_HW_PSTATE) == 0) {
 		HWPSTATE_DEBUG(parent, "hwpstate enable bit is not set.\n");
 		return;
 	}
 
 	if (resource_disabled("hwpstate", 0))
 		return;
 
 	if (BUS_ADD_CHILD(parent, 10, "hwpstate", -1) == NULL)
 		device_printf(parent, "hwpstate: add child failed\n");
 }
 
 static int
 hwpstate_probe(device_t dev)
 {
 	struct hwpstate_softc *sc;
 	device_t perf_dev;
 	uint64_t msr;
 	int error, type;
 
 	/*
 	 * Only hwpstate0.
 	 * It goes well with acpi_throttle.
 	 */
 	if (device_get_unit(dev) != 0)
 		return (ENXIO);
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	/*
 	 * Check if acpi_perf has INFO only flag.
 	 */
 	perf_dev = device_find_child(device_get_parent(dev), "acpi_perf", -1);
 	error = TRUE;
 	if (perf_dev && device_is_attached(perf_dev)) {
 		error = CPUFREQ_DRV_TYPE(perf_dev, &type);
 		if (error == 0) {
 			if ((type & CPUFREQ_FLAG_INFO_ONLY) == 0) {
 				/*
 				 * If acpi_perf doesn't have INFO_ONLY flag,
 				 * it will take care of pstate transitions.
 				 */
 				HWPSTATE_DEBUG(dev, "acpi_perf will take care of pstate transitions.\n");
 				return (ENXIO);
 			} else {
 				/*
 				 * If acpi_perf has INFO_ONLY flag, (_PCT has FFixedHW)
 				 * we can get _PSS info from acpi_perf
 				 * without going into ACPI.
 				 */
 				HWPSTATE_DEBUG(dev, "going to fetch info from acpi_perf\n");
 				error = hwpstate_get_info_from_acpi_perf(dev, perf_dev);
 			}
 		}
 	}
 
 	if (error == 0) {
 		/*
 		 * Now we get _PSS info from acpi_perf without error.
 		 * Let's check it.
 		 */
 		msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
 		if (sc->cfnum != 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)) {
 			HWPSTATE_DEBUG(dev, "MSR (%jd) and ACPI _PSS (%d)"
 			    " count mismatch\n", (intmax_t)msr, sc->cfnum);
 			error = TRUE;
 		}
 	}
 
 	/*
 	 * If we cannot get info from acpi_perf,
 	 * Let's get info from MSRs.
 	 */
 	if (error)
 		error = hwpstate_get_info_from_msr(dev);
 	if (error)
 		return (error);
 
 	device_set_desc(dev, "Cool`n'Quiet 2.0");
 	return (0);
 }
 
 static int
 hwpstate_attach(device_t dev)
 {
 
 	return (cpufreq_register(dev));
 }
 
 static int
 hwpstate_get_info_from_msr(device_t dev)
 {
 	struct hwpstate_softc *sc;
 	struct hwpstate_setting *hwpstate_set;
 	uint64_t msr;
 	int family, i, fid, did;
 
 	family = CPUID_TO_FAMILY(cpu_id);
 	sc = device_get_softc(dev);
 	/* Get pstate count */
 	msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
 	sc->cfnum = 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr);
 	hwpstate_set = sc->hwpstate_settings;
 	for (i = 0; i < sc->cfnum; i++) {
 		msr = rdmsr(MSR_AMD_10H_11H_CONFIG + i);
 		if ((msr & ((uint64_t)1 << 63)) == 0) {
 			HWPSTATE_DEBUG(dev, "msr is not valid.\n");
 			return (ENXIO);
 		}
 		did = AMD_10H_11H_CUR_DID(msr);
 		fid = AMD_10H_11H_CUR_FID(msr);
 
 		/* Convert fid/did to frequency. */
 		switch (family) {
 		case 0x11:
 			hwpstate_set[i].freq = (100 * (fid + 0x08)) >> did;
 			break;
 		case 0x10:
 		case 0x12:
 		case 0x15:
 		case 0x16:
 			hwpstate_set[i].freq = (100 * (fid + 0x10)) >> did;
 			break;
 		case 0x17:
 		case 0x18:
 			did = AMD_17H_CUR_DID(msr);
 			if (did == 0) {
 				HWPSTATE_DEBUG(dev, "unexpected did: 0\n");
 				did = 1;
 			}
 			fid = AMD_17H_CUR_FID(msr);
 			hwpstate_set[i].freq = (200 * fid) / did;
 			break;
 		default:
 			HWPSTATE_DEBUG(dev, "get_info_from_msr: %s family"
 			    " 0x%02x CPUs are not supported yet\n",
 			    cpu_vendor_id == CPU_VENDOR_HYGON ? "Hygon" : "AMD",
 			    family);
 			return (ENXIO);
 		}
 		hwpstate_set[i].pstate_id = i;
 		/* There was volts calculation, but deleted it. */
 		hwpstate_set[i].volts = CPUFREQ_VAL_UNKNOWN;
 		hwpstate_set[i].power = CPUFREQ_VAL_UNKNOWN;
 		hwpstate_set[i].lat = CPUFREQ_VAL_UNKNOWN;
 	}
 	return (0);
 }
 
 static int
 hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev)
 {
 	struct hwpstate_softc *sc;
 	struct cf_setting *perf_set;
 	struct hwpstate_setting *hwpstate_set;
 	int count, error, i;
 
 	perf_set = malloc(MAX_SETTINGS * sizeof(*perf_set), M_TEMP, M_NOWAIT);
 	if (perf_set == NULL) {
 		HWPSTATE_DEBUG(dev, "nomem\n");
 		return (ENOMEM);
 	}
 	/*
 	 * Fetch settings from acpi_perf.
 	 * Now it is attached, and has info only flag.
 	 */
 	count = MAX_SETTINGS;
 	error = CPUFREQ_DRV_SETTINGS(perf_dev, perf_set, &count);
 	if (error) {
 		HWPSTATE_DEBUG(dev, "error: CPUFREQ_DRV_SETTINGS.\n");
 		goto out;
 	}
 	sc = device_get_softc(dev);
 	sc->cfnum = count;
 	hwpstate_set = sc->hwpstate_settings;
 	for (i = 0; i < count; i++) {
 		if (i == perf_set[i].spec[0]) {
 			hwpstate_set[i].pstate_id = i;
 			hwpstate_set[i].freq = perf_set[i].freq;
 			hwpstate_set[i].volts = perf_set[i].volts;
 			hwpstate_set[i].power = perf_set[i].power;
 			hwpstate_set[i].lat = perf_set[i].lat;
 		} else {
 			HWPSTATE_DEBUG(dev, "ACPI _PSS object mismatch.\n");
 			error = ENXIO;
 			goto out;
 		}
 	}
 out:
 	if (perf_set)
 		free(perf_set, M_TEMP);
 	return (error);
 }
 
 static int
 hwpstate_detach(device_t dev)
 {
 
 	hwpstate_goto_pstate(dev, 0);
 	return (cpufreq_unregister(dev));
 }
 
 static int
 hwpstate_shutdown(device_t dev)
 {
 
 	/* hwpstate_goto_pstate(dev, 0); */
 	return (0);
 }
 
 static int
 hwpstate_features(driver_t *driver, u_int *features)
 {
 
 	/* Notify the ACPI CPU that we support direct access to MSRs */
 	*features = ACPI_CAP_PERF_MSRS;
 	return (0);
 }
Index: projects/clang1000-import/sys/x86/iommu/intel_dmar.h
===================================================================
--- projects/clang1000-import/sys/x86/iommu/intel_dmar.h	(revision 357178)
+++ projects/clang1000-import/sys/x86/iommu/intel_dmar.h	(revision 357179)
@@ -1,575 +1,576 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2013-2015 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __X86_IOMMU_INTEL_DMAR_H
 #define	__X86_IOMMU_INTEL_DMAR_H
 
 /* Host or physical memory address, after translation. */
 typedef uint64_t dmar_haddr_t;
 /* Guest or bus address, before translation. */
 typedef uint64_t dmar_gaddr_t;
 
 struct dmar_qi_genseq {
 	u_int gen;
 	uint32_t seq;
 };
 
 struct dmar_map_entry {
 	dmar_gaddr_t start;
 	dmar_gaddr_t end;
-	dmar_gaddr_t free_after;	/* Free space after the entry */
+	dmar_gaddr_t first;		/* Least start in subtree */
+	dmar_gaddr_t last;		/* Greatest end in subtree */
 	dmar_gaddr_t free_down;		/* Max free space below the
 					   current R/B tree node */
 	u_int flags;
 	TAILQ_ENTRY(dmar_map_entry) dmamap_link; /* Link for dmamap entries */
 	RB_ENTRY(dmar_map_entry) rb_entry;	 /* Links for domain entries */
 	TAILQ_ENTRY(dmar_map_entry) unroll_link; /* Link for unroll after
 						    dmamap_load failure */
 	struct dmar_domain *domain;
 	struct dmar_qi_genseq gseq;
 };
 
 RB_HEAD(dmar_gas_entries_tree, dmar_map_entry);
 RB_PROTOTYPE(dmar_gas_entries_tree, dmar_map_entry, rb_entry,
     dmar_gas_cmp_entries);
 
 #define	DMAR_MAP_ENTRY_PLACE	0x0001	/* Fake entry */
 #define	DMAR_MAP_ENTRY_RMRR	0x0002	/* Permanent, not linked by
 					   dmamap_link */
 #define	DMAR_MAP_ENTRY_MAP	0x0004	/* Busdma created, linked by
 					   dmamap_link */
 #define	DMAR_MAP_ENTRY_UNMAPPED	0x0010	/* No backing pages */
 #define	DMAR_MAP_ENTRY_QI_NF	0x0020	/* qi task, do not free entry */
 #define	DMAR_MAP_ENTRY_READ	0x1000	/* Read permitted */
 #define	DMAR_MAP_ENTRY_WRITE	0x2000	/* Write permitted */
 #define	DMAR_MAP_ENTRY_SNOOP	0x4000	/* Snoop */
 #define	DMAR_MAP_ENTRY_TM	0x8000	/* Transient */
 
 /*
  * Locking annotations:
  * (u) - Protected by dmar unit lock
  * (d) - Protected by domain lock
  * (c) - Immutable after initialization
  */
 
 /*
  * The domain abstraction.  Most non-constant members of the domain
  * are protected by owning dmar unit lock, not by the domain lock.
  * Most important, the dmar lock protects the contexts list.
  *
  * The domain lock protects the address map for the domain, and list
  * of unload entries delayed.
  *
  * Page tables pages and pages content is protected by the vm object
  * lock pgtbl_obj, which contains the page tables pages.
  */
 struct dmar_domain {
 	int domain;			/* (c) DID, written in context entry */
 	int mgaw;			/* (c) Real max address width */
 	int agaw;			/* (c) Adjusted guest address width */
 	int pglvl;			/* (c) The pagelevel */
 	int awlvl;			/* (c) The pagelevel as the bitmask,
 					   to set in context entry */
 	dmar_gaddr_t end;		/* (c) Highest address + 1 in
 					   the guest AS */
 	u_int ctx_cnt;			/* (u) Number of contexts owned */
 	u_int refs;			/* (u) Refs, including ctx */
 	struct dmar_unit *dmar;		/* (c) */
 	struct mtx lock;		/* (c) */
 	LIST_ENTRY(dmar_domain) link;	/* (u) Member in the dmar list */
 	LIST_HEAD(, dmar_ctx) contexts;	/* (u) */
 	vm_object_t pgtbl_obj;		/* (c) Page table pages */
 	u_int flags;			/* (u) */
 	u_int entries_cnt;		/* (d) */
 	struct dmar_gas_entries_tree rb_root; /* (d) */
 	struct dmar_map_entries_tailq unload_entries; /* (d) Entries to
 							 unload */
 	struct dmar_map_entry *first_place, *last_place; /* (d) */
 	struct task unload_task;	/* (c) */
 	u_int batch_no;
 };
 
 struct dmar_ctx {
 	struct bus_dma_tag_dmar ctx_tag; /* (c) Root tag */
 	uint16_t rid;			/* (c) pci RID */
 	uint64_t last_fault_rec[2];	/* Last fault reported */
 	struct dmar_domain *domain;	/* (c) */
 	LIST_ENTRY(dmar_ctx) link;	/* (u) Member in the domain list */
 	u_int refs;			/* (u) References from tags */
 	u_int flags;			/* (u) */
 	u_long loads;			/* atomic updates, for stat only */
 	u_long unloads;			/* same */
 };
 
 #define	DMAR_DOMAIN_GAS_INITED		0x0001
 #define	DMAR_DOMAIN_PGTBL_INITED	0x0002
 #define	DMAR_DOMAIN_IDMAP		0x0010	/* Domain uses identity
 						   page table */
 #define	DMAR_DOMAIN_RMRR		0x0020	/* Domain contains RMRR entry,
 						   cannot be turned off */
 
 /* struct dmar_ctx flags */
 #define	DMAR_CTX_FAULTED	0x0001	/* Fault was reported,
 					   last_fault_rec is valid */
 #define	DMAR_CTX_DISABLED	0x0002	/* Device is disabled, the
 					   ephemeral reference is kept
 					   to prevent context destruction */
 
 #define	DMAR_DOMAIN_PGLOCK(dom)		VM_OBJECT_WLOCK((dom)->pgtbl_obj)
 #define	DMAR_DOMAIN_PGTRYLOCK(dom)	VM_OBJECT_TRYWLOCK((dom)->pgtbl_obj)
 #define	DMAR_DOMAIN_PGUNLOCK(dom)	VM_OBJECT_WUNLOCK((dom)->pgtbl_obj)
 #define	DMAR_DOMAIN_ASSERT_PGLOCKED(dom) \
 	VM_OBJECT_ASSERT_WLOCKED((dom)->pgtbl_obj)
 
 #define	DMAR_DOMAIN_LOCK(dom)	mtx_lock(&(dom)->lock)
 #define	DMAR_DOMAIN_UNLOCK(dom)	mtx_unlock(&(dom)->lock)
 #define	DMAR_DOMAIN_ASSERT_LOCKED(dom) mtx_assert(&(dom)->lock, MA_OWNED)
 
 struct dmar_msi_data {
 	int irq;
 	int irq_rid;
 	struct resource *irq_res;
 	void *intr_handle;
 	int (*handler)(void *);
 	int msi_data_reg;
 	int msi_addr_reg;
 	int msi_uaddr_reg;
 	void (*enable_intr)(struct dmar_unit *);
 	void (*disable_intr)(struct dmar_unit *);
 	const char *name;
 };
 
 #define	DMAR_INTR_FAULT		0
 #define	DMAR_INTR_QI		1
 #define	DMAR_INTR_TOTAL		2
 
 struct dmar_unit {
 	device_t dev;
 	int unit;
 	uint16_t segment;
 	uint64_t base;
 
 	/* Resources */
 	int reg_rid;
 	struct resource *regs;
 
 	struct dmar_msi_data intrs[DMAR_INTR_TOTAL];
 
 	/* Hardware registers cache */
 	uint32_t hw_ver;
 	uint64_t hw_cap;
 	uint64_t hw_ecap;
 	uint32_t hw_gcmd;
 
 	/* Data for being a dmar */
 	struct mtx lock;
 	LIST_HEAD(, dmar_domain) domains;
 	struct unrhdr *domids;
 	vm_object_t ctx_obj;
 	u_int barrier_flags;
 
 	/* Fault handler data */
 	struct mtx fault_lock;
 	uint64_t *fault_log;
 	int fault_log_head;
 	int fault_log_tail;
 	int fault_log_size;
 	struct task fault_task;
 	struct taskqueue *fault_taskqueue;
 
 	/* QI */
 	int qi_enabled;
 	vm_offset_t inv_queue;
 	vm_size_t inv_queue_size;
 	uint32_t inv_queue_avail;
 	uint32_t inv_queue_tail;
 	volatile uint32_t inv_waitd_seq_hw; /* hw writes there on wait
 					       descr completion */
 	uint64_t inv_waitd_seq_hw_phys;
 	uint32_t inv_waitd_seq; /* next sequence number to use for wait descr */
 	u_int inv_waitd_gen;	/* seq number generation AKA seq overflows */
 	u_int inv_seq_waiters;	/* count of waiters for seq */
 	u_int inv_queue_full;	/* informational counter */
 
 	/* IR */
 	int ir_enabled;
 	vm_paddr_t irt_phys;
 	dmar_irte_t *irt;
 	u_int irte_cnt;
 	vmem_t *irtids;
 
 	/* Delayed freeing of map entries queue processing */
 	struct dmar_map_entries_tailq tlb_flush_entries;
 	struct task qi_task;
 	struct taskqueue *qi_taskqueue;
 
 	/* Busdma delayed map load */
 	struct task dmamap_load_task;
 	TAILQ_HEAD(, bus_dmamap_dmar) delayed_maps;
 	struct taskqueue *delayed_taskqueue;
 
 	int dma_enabled;
 
 	/*
 	 * Bitmap of buses for which context must ignore slot:func,
 	 * duplicating the page table pointer into all context table
 	 * entries.  This is a client-controlled quirk to support some
 	 * NTBs.
 	 */
 	uint32_t buswide_ctxs[(PCI_BUSMAX + 1) / NBBY / sizeof(uint32_t)];
 
 };
 
 #define	DMAR_LOCK(dmar)		mtx_lock(&(dmar)->lock)
 #define	DMAR_UNLOCK(dmar)	mtx_unlock(&(dmar)->lock)
 #define	DMAR_ASSERT_LOCKED(dmar) mtx_assert(&(dmar)->lock, MA_OWNED)
 
 #define	DMAR_FAULT_LOCK(dmar)	mtx_lock_spin(&(dmar)->fault_lock)
 #define	DMAR_FAULT_UNLOCK(dmar)	mtx_unlock_spin(&(dmar)->fault_lock)
 #define	DMAR_FAULT_ASSERT_LOCKED(dmar) mtx_assert(&(dmar)->fault_lock, MA_OWNED)
 
 #define	DMAR_IS_COHERENT(dmar)	(((dmar)->hw_ecap & DMAR_ECAP_C) != 0)
 #define	DMAR_HAS_QI(dmar)	(((dmar)->hw_ecap & DMAR_ECAP_QI) != 0)
 #define	DMAR_X2APIC(dmar) \
 	(x2apic_mode && ((dmar)->hw_ecap & DMAR_ECAP_EIM) != 0)
 
 /* Barrier ids */
 #define	DMAR_BARRIER_RMRR	0
 #define	DMAR_BARRIER_USEQ	1
 
 struct dmar_unit *dmar_find(device_t dev, bool verbose);
 struct dmar_unit *dmar_find_hpet(device_t dev, uint16_t *rid);
 struct dmar_unit *dmar_find_ioapic(u_int apic_id, uint16_t *rid);
 
 u_int dmar_nd2mask(u_int nd);
 bool dmar_pglvl_supported(struct dmar_unit *unit, int pglvl);
 int domain_set_agaw(struct dmar_domain *domain, int mgaw);
 int dmar_maxaddr2mgaw(struct dmar_unit *unit, dmar_gaddr_t maxaddr,
     bool allow_less);
 vm_pindex_t pglvl_max_pages(int pglvl);
 int domain_is_sp_lvl(struct dmar_domain *domain, int lvl);
 dmar_gaddr_t pglvl_page_size(int total_pglvl, int lvl);
 dmar_gaddr_t domain_page_size(struct dmar_domain *domain, int lvl);
 int calc_am(struct dmar_unit *unit, dmar_gaddr_t base, dmar_gaddr_t size,
     dmar_gaddr_t *isizep);
 struct vm_page *dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags);
 void dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags);
 void *dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
     struct sf_buf **sf);
 void dmar_unmap_pgtbl(struct sf_buf *sf);
 int dmar_load_root_entry_ptr(struct dmar_unit *unit);
 int dmar_inv_ctx_glob(struct dmar_unit *unit);
 int dmar_inv_iotlb_glob(struct dmar_unit *unit);
 int dmar_flush_write_bufs(struct dmar_unit *unit);
 void dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst);
 void dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst);
 void dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst);
 int dmar_enable_translation(struct dmar_unit *unit);
 int dmar_disable_translation(struct dmar_unit *unit);
 int dmar_load_irt_ptr(struct dmar_unit *unit);
 int dmar_enable_ir(struct dmar_unit *unit);
 int dmar_disable_ir(struct dmar_unit *unit);
 bool dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id);
 void dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id);
 uint64_t dmar_get_timeout(void);
 void dmar_update_timeout(uint64_t newval);
 
 int dmar_fault_intr(void *arg);
 void dmar_enable_fault_intr(struct dmar_unit *unit);
 void dmar_disable_fault_intr(struct dmar_unit *unit);
 int dmar_init_fault_log(struct dmar_unit *unit);
 void dmar_fini_fault_log(struct dmar_unit *unit);
 
 int dmar_qi_intr(void *arg);
 void dmar_enable_qi_intr(struct dmar_unit *unit);
 void dmar_disable_qi_intr(struct dmar_unit *unit);
 int dmar_init_qi(struct dmar_unit *unit);
 void dmar_fini_qi(struct dmar_unit *unit);
 void dmar_qi_invalidate_locked(struct dmar_domain *domain, dmar_gaddr_t start,
     dmar_gaddr_t size, struct dmar_qi_genseq *psec, bool emit_wait);
 void dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit);
 void dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit);
 void dmar_qi_invalidate_iec_glob(struct dmar_unit *unit);
 void dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt);
 
 vm_object_t domain_get_idmap_pgtbl(struct dmar_domain *domain,
     dmar_gaddr_t maxaddr);
 void put_idmap_pgtbl(vm_object_t obj);
 int domain_map_buf(struct dmar_domain *domain, dmar_gaddr_t base,
     dmar_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags);
 int domain_unmap_buf(struct dmar_domain *domain, dmar_gaddr_t base,
     dmar_gaddr_t size, int flags);
 void domain_flush_iotlb_sync(struct dmar_domain *domain, dmar_gaddr_t base,
     dmar_gaddr_t size);
 int domain_alloc_pgtbl(struct dmar_domain *domain);
 void domain_free_pgtbl(struct dmar_domain *domain);
 
 int dmar_dev_depth(device_t child);
 void dmar_dev_path(device_t child, int *busno, void *path1, int depth);
 
 struct dmar_ctx *dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev,
     bool rmrr);
 struct dmar_ctx *dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev,
     uint16_t rid, bool id_mapped, bool rmrr_init);
 struct dmar_ctx *dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid,
     int dev_domain, int dev_busno, const void *dev_path, int dev_path_len,
     bool id_mapped, bool rmrr_init);
 int dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx);
 void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx);
 void dmar_free_ctx(struct dmar_ctx *ctx);
 struct dmar_ctx *dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid);
 void dmar_domain_unload_entry(struct dmar_map_entry *entry, bool free);
 void dmar_domain_unload(struct dmar_domain *domain,
     struct dmar_map_entries_tailq *entries, bool cansleep);
 void dmar_domain_free_entry(struct dmar_map_entry *entry, bool free);
 
 int dmar_init_busdma(struct dmar_unit *unit);
 void dmar_fini_busdma(struct dmar_unit *unit);
 device_t dmar_get_requester(device_t dev, uint16_t *rid);
 
 void dmar_gas_init_domain(struct dmar_domain *domain);
 void dmar_gas_fini_domain(struct dmar_domain *domain);
 struct dmar_map_entry *dmar_gas_alloc_entry(struct dmar_domain *domain,
     u_int flags);
 void dmar_gas_free_entry(struct dmar_domain *domain,
     struct dmar_map_entry *entry);
 void dmar_gas_free_space(struct dmar_domain *domain,
     struct dmar_map_entry *entry);
 int dmar_gas_map(struct dmar_domain *domain,
     const struct bus_dma_tag_common *common, dmar_gaddr_t size, int offset,
     u_int eflags, u_int flags, vm_page_t *ma, struct dmar_map_entry **res);
 void dmar_gas_free_region(struct dmar_domain *domain,
     struct dmar_map_entry *entry);
 int dmar_gas_map_region(struct dmar_domain *domain,
     struct dmar_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma);
 int dmar_gas_reserve_region(struct dmar_domain *domain, dmar_gaddr_t start,
     dmar_gaddr_t end);
 
 void dmar_dev_parse_rmrr(struct dmar_domain *domain, int dev_domain,
     int dev_busno, const void *dev_path, int dev_path_len,
     struct dmar_map_entries_tailq *rmrr_entries);
 int dmar_instantiate_rmrr_ctxs(struct dmar_unit *dmar);
 
 void dmar_quirks_post_ident(struct dmar_unit *dmar);
 void dmar_quirks_pre_use(struct dmar_unit *dmar);
 
 int dmar_init_irt(struct dmar_unit *unit);
 void dmar_fini_irt(struct dmar_unit *unit);
 
 void dmar_set_buswide_ctx(struct dmar_unit *unit, u_int busno);
 bool dmar_is_buswide_ctx(struct dmar_unit *unit, u_int busno);
 
 #define	DMAR_GM_CANWAIT	0x0001
 #define	DMAR_GM_CANSPLIT 0x0002
 #define	DMAR_GM_RMRR	0x0004
 
 #define	DMAR_PGF_WAITOK	0x0001
 #define	DMAR_PGF_ZERO	0x0002
 #define	DMAR_PGF_ALLOC	0x0004
 #define	DMAR_PGF_NOALLOC 0x0008
 #define	DMAR_PGF_OBJL	0x0010
 
 extern dmar_haddr_t dmar_high;
 extern int haw;
 extern int dmar_tbl_pagecnt;
 extern int dmar_batch_coalesce;
 extern int dmar_check_free;
 
 static inline uint32_t
 dmar_read4(const struct dmar_unit *unit, int reg)
 {
 
 	return (bus_read_4(unit->regs, reg));
 }
 
 static inline uint64_t
 dmar_read8(const struct dmar_unit *unit, int reg)
 {
 #ifdef __i386__
 	uint32_t high, low;
 
 	low = bus_read_4(unit->regs, reg);
 	high = bus_read_4(unit->regs, reg + 4);
 	return (low | ((uint64_t)high << 32));
 #else
 	return (bus_read_8(unit->regs, reg));
 #endif
 }
 
 static inline void
 dmar_write4(const struct dmar_unit *unit, int reg, uint32_t val)
 {
 
 	KASSERT(reg != DMAR_GCMD_REG || (val & DMAR_GCMD_TE) ==
 	    (unit->hw_gcmd & DMAR_GCMD_TE),
 	    ("dmar%d clearing TE 0x%08x 0x%08x", unit->unit,
 	    unit->hw_gcmd, val));
 	bus_write_4(unit->regs, reg, val);
 }
 
 static inline void
 dmar_write8(const struct dmar_unit *unit, int reg, uint64_t val)
 {
 
 	KASSERT(reg != DMAR_GCMD_REG, ("8byte GCMD write"));
 #ifdef __i386__
 	uint32_t high, low;
 
 	low = val;
 	high = val >> 32;
 	bus_write_4(unit->regs, reg, low);
 	bus_write_4(unit->regs, reg + 4, high);
 #else
 	bus_write_8(unit->regs, reg, val);
 #endif
 }
 
 /*
  * dmar_pte_store and dmar_pte_clear ensure that on i386, 32bit writes
  * are issued in the correct order.  For store, the lower word,
  * containing the P or R and W bits, is set only after the high word
  * is written.  For clear, the P bit is cleared first, then the high
  * word is cleared.
  *
  * dmar_pte_update updates the pte.  For amd64, the update is atomic.
  * For i386, it first disables the entry by clearing the word
  * containing the P bit, and then defer to dmar_pte_store.  The locked
  * cmpxchg8b is probably available on any machine having DMAR support,
  * but interrupt translation table may be mapped uncached.
  */
 static inline void
 dmar_pte_store1(volatile uint64_t *dst, uint64_t val)
 {
 #ifdef __i386__
 	volatile uint32_t *p;
 	uint32_t hi, lo;
 
 	hi = val >> 32;
 	lo = val;
 	p = (volatile uint32_t *)dst;
 	*(p + 1) = hi;
 	*p = lo;
 #else
 	*dst = val;
 #endif
 }
 
 static inline void
 dmar_pte_store(volatile uint64_t *dst, uint64_t val)
 {
 
 	KASSERT(*dst == 0, ("used pte %p oldval %jx newval %jx",
 	    dst, (uintmax_t)*dst, (uintmax_t)val));
 	dmar_pte_store1(dst, val);
 }
 
 static inline void
 dmar_pte_update(volatile uint64_t *dst, uint64_t val)
 {
 
 #ifdef __i386__
 	volatile uint32_t *p;
 
 	p = (volatile uint32_t *)dst;
 	*p = 0;
 #endif
 	dmar_pte_store1(dst, val);
 }
 
 static inline void
 dmar_pte_clear(volatile uint64_t *dst)
 {
 #ifdef __i386__
 	volatile uint32_t *p;
 
 	p = (volatile uint32_t *)dst;
 	*p = 0;
 	*(p + 1) = 0;
 #else
 	*dst = 0;
 #endif
 }
 
 static inline bool
 dmar_test_boundary(dmar_gaddr_t start, dmar_gaddr_t size,
     dmar_gaddr_t boundary)
 {
 
 	if (boundary == 0)
 		return (true);
 	return (start + size <= ((start + boundary) & ~(boundary - 1)));
 }
 
 extern struct timespec dmar_hw_timeout;
 
 #define	DMAR_WAIT_UNTIL(cond)					\
 {								\
 	struct timespec last, curr;				\
 	bool forever;						\
 								\
 	if (dmar_hw_timeout.tv_sec == 0 &&			\
 	    dmar_hw_timeout.tv_nsec == 0) {			\
 		forever = true;					\
 	} else {						\
 		forever = false;				\
 		nanouptime(&curr);				\
 		timespecadd(&curr, &dmar_hw_timeout, &last);	\
 	}							\
 	for (;;) {						\
 		if (cond) {					\
 			error = 0;				\
 			break;					\
 		}						\
 		nanouptime(&curr);				\
 		if (!forever && timespeccmp(&last, &curr, <)) {	\
 			error = ETIMEDOUT;			\
 			break;					\
 		}						\
 		cpu_spinwait();					\
 	}							\
 }
 
 #ifdef INVARIANTS
 #define	TD_PREP_PINNED_ASSERT						\
 	int old_td_pinned;						\
 	old_td_pinned = curthread->td_pinned
 #define	TD_PINNED_ASSERT						\
 	KASSERT(curthread->td_pinned == old_td_pinned,			\
 	    ("pin count leak: %d %d %s:%d", curthread->td_pinned,	\
 	    old_td_pinned, __FILE__, __LINE__))
 #else
 #define	TD_PREP_PINNED_ASSERT
 #define	TD_PINNED_ASSERT
 #endif
 
 #endif
Index: projects/clang1000-import/sys/x86/iommu/intel_drv.c
===================================================================
--- projects/clang1000-import/sys/x86/iommu/intel_drv.c	(revision 357178)
+++ projects/clang1000-import/sys/x86/iommu/intel_drv.c	(revision 357179)
@@ -1,1344 +1,1344 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2013-2015 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_acpi.h"
 #if defined(__amd64__)
 #define	DEV_APIC
 #else
 #include "opt_apic.h"
 #endif
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memdesc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/rman.h>
 #include <sys/rwlock.h>
 #include <sys/smp.h>
 #include <sys/taskqueue.h>
 #include <sys/tree.h>
 #include <sys/vmem.h>
 #include <machine/bus.h>
 #include <machine/pci_cfgreg.h>
 #include <contrib/dev/acpica/include/acpi.h>
 #include <contrib/dev/acpica/include/accommon.h>
 #include <dev/acpica/acpivar.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_map.h>
 #include <x86/include/busdma_impl.h>
 #include <x86/iommu/intel_reg.h>
 #include <x86/iommu/busdma_dmar.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <x86/iommu/intel_dmar.h>
 
 #ifdef DEV_APIC
 #include "pcib_if.h"
 #include <machine/intr_machdep.h>
 #include <x86/apicreg.h>
 #include <x86/apicvar.h>
 #endif
 
 #define	DMAR_FAULT_IRQ_RID	0
 #define	DMAR_QI_IRQ_RID		1
 #define	DMAR_REG_RID		2
 
 static devclass_t dmar_devclass;
 static device_t *dmar_devs;
 static int dmar_devcnt;
 
 typedef int (*dmar_iter_t)(ACPI_DMAR_HEADER *, void *);
 
 static void
 dmar_iterate_tbl(dmar_iter_t iter, void *arg)
 {
 	ACPI_TABLE_DMAR *dmartbl;
 	ACPI_DMAR_HEADER *dmarh;
 	char *ptr, *ptrend;
 	ACPI_STATUS status;
 
 	status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl);
 	if (ACPI_FAILURE(status))
 		return;
 	ptr = (char *)dmartbl + sizeof(*dmartbl);
 	ptrend = (char *)dmartbl + dmartbl->Header.Length;
 	for (;;) {
 		if (ptr >= ptrend)
 			break;
 		dmarh = (ACPI_DMAR_HEADER *)ptr;
 		if (dmarh->Length <= 0) {
 			printf("dmar_identify: corrupted DMAR table, l %d\n",
 			    dmarh->Length);
 			break;
 		}
 		ptr += dmarh->Length;
 		if (!iter(dmarh, arg))
 			break;
 	}
 	AcpiPutTable((ACPI_TABLE_HEADER *)dmartbl);
 }
 
 struct find_iter_args {
 	int i;
 	ACPI_DMAR_HARDWARE_UNIT *res;
 };
 
 static int
 dmar_find_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
 {
 	struct find_iter_args *fia;
 
 	if (dmarh->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT)
 		return (1);
 
 	fia = arg;
 	if (fia->i == 0) {
 		fia->res = (ACPI_DMAR_HARDWARE_UNIT *)dmarh;
 		return (0);
 	}
 	fia->i--;
 	return (1);
 }
 
 static ACPI_DMAR_HARDWARE_UNIT *
 dmar_find_by_index(int idx)
 {
 	struct find_iter_args fia;
 
 	fia.i = idx;
 	fia.res = NULL;
 	dmar_iterate_tbl(dmar_find_iter, &fia);
 	return (fia.res);
 }
 
 static int
 dmar_count_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
 {
 
 	if (dmarh->Type == ACPI_DMAR_TYPE_HARDWARE_UNIT)
 		dmar_devcnt++;
 	return (1);
 }
 
 static int dmar_enable = 0;
 static void
 dmar_identify(driver_t *driver, device_t parent)
 {
 	ACPI_TABLE_DMAR *dmartbl;
 	ACPI_DMAR_HARDWARE_UNIT *dmarh;
 	ACPI_STATUS status;
 	int i, error;
 
 	if (acpi_disabled("dmar"))
 		return;
 	TUNABLE_INT_FETCH("hw.dmar.enable", &dmar_enable);
 	if (!dmar_enable)
 		return;
 #ifdef INVARIANTS
 	TUNABLE_INT_FETCH("hw.dmar.check_free", &dmar_check_free);
 #endif
 	status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl);
 	if (ACPI_FAILURE(status))
 		return;
 	haw = dmartbl->Width + 1;
 	if ((1ULL << (haw + 1)) > BUS_SPACE_MAXADDR)
 		dmar_high = BUS_SPACE_MAXADDR;
 	else
 		dmar_high = 1ULL << (haw + 1);
 	if (bootverbose) {
 		printf("DMAR HAW=%d flags=<%b>\n", dmartbl->Width,
 		    (unsigned)dmartbl->Flags,
 		    "\020\001INTR_REMAP\002X2APIC_OPT_OUT");
 	}
 	AcpiPutTable((ACPI_TABLE_HEADER *)dmartbl);
 
 	dmar_iterate_tbl(dmar_count_iter, NULL);
 	if (dmar_devcnt == 0)
 		return;
 	dmar_devs = malloc(sizeof(device_t) * dmar_devcnt, M_DEVBUF,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < dmar_devcnt; i++) {
 		dmarh = dmar_find_by_index(i);
 		if (dmarh == NULL) {
 			printf("dmar_identify: cannot find HWUNIT %d\n", i);
 			continue;
 		}
 		dmar_devs[i] = BUS_ADD_CHILD(parent, 1, "dmar", i);
 		if (dmar_devs[i] == NULL) {
 			printf("dmar_identify: cannot create instance %d\n", i);
 			continue;
 		}
 		error = bus_set_resource(dmar_devs[i], SYS_RES_MEMORY,
 		    DMAR_REG_RID, dmarh->Address, PAGE_SIZE);
 		if (error != 0) {
 			printf(
 	"dmar%d: unable to alloc register window at 0x%08jx: error %d\n",
 			    i, (uintmax_t)dmarh->Address, error);
 			device_delete_child(parent, dmar_devs[i]);
 			dmar_devs[i] = NULL;
 		}
 	}
 }
 
 static int
 dmar_probe(device_t dev)
 {
 
 	if (acpi_get_handle(dev) != NULL)
 		return (ENXIO);
 	device_set_desc(dev, "DMA remap");
 	return (BUS_PROBE_NOWILDCARD);
 }
 
 static void
 dmar_release_intr(device_t dev, struct dmar_unit *unit, int idx)
 {
 	struct dmar_msi_data *dmd;
 
 	dmd = &unit->intrs[idx];
 	if (dmd->irq == -1)
 		return;
 	bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
 	bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
 	bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
 	PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)),
 	    dev, dmd->irq);
 	dmd->irq = -1;
 }
 
 static void
 dmar_release_resources(device_t dev, struct dmar_unit *unit)
 {
 	int i;
 
 	dmar_fini_busdma(unit);
 	dmar_fini_irt(unit);
 	dmar_fini_qi(unit);
 	dmar_fini_fault_log(unit);
 	for (i = 0; i < DMAR_INTR_TOTAL; i++)
 		dmar_release_intr(dev, unit, i);
 	if (unit->regs != NULL) {
 		bus_deactivate_resource(dev, SYS_RES_MEMORY, unit->reg_rid,
 		    unit->regs);
 		bus_release_resource(dev, SYS_RES_MEMORY, unit->reg_rid,
 		    unit->regs);
 		unit->regs = NULL;
 	}
 	if (unit->domids != NULL) {
 		delete_unrhdr(unit->domids);
 		unit->domids = NULL;
 	}
 	if (unit->ctx_obj != NULL) {
 		vm_object_deallocate(unit->ctx_obj);
 		unit->ctx_obj = NULL;
 	}
 }
 
 static int
 dmar_alloc_irq(device_t dev, struct dmar_unit *unit, int idx)
 {
 	device_t pcib;
 	struct dmar_msi_data *dmd;
 	uint64_t msi_addr;
 	uint32_t msi_data;
 	int error;
 
 	dmd = &unit->intrs[idx];
 	pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */
 	error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq);
 	if (error != 0) {
 		device_printf(dev, "cannot allocate %s interrupt, %d\n",
 		    dmd->name, error);
 		goto err1;
 	}
 	error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid,
 	    dmd->irq, 1);
 	if (error != 0) {
 		device_printf(dev, "cannot set %s interrupt resource, %d\n",
 		    dmd->name, error);
 		goto err2;
 	}
 	dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
 	    &dmd->irq_rid, RF_ACTIVE);
 	if (dmd->irq_res == NULL) {
 		device_printf(dev,
 		    "cannot allocate resource for %s interrupt\n", dmd->name);
 		error = ENXIO;
 		goto err3;
 	}
 	error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC,
 	    dmd->handler, NULL, unit, &dmd->intr_handle);
 	if (error != 0) {
 		device_printf(dev, "cannot setup %s interrupt, %d\n",
 		    dmd->name, error);
 		goto err4;
 	}
 	bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, "%s", dmd->name);
 	error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data);
 	if (error != 0) {
 		device_printf(dev, "cannot map %s interrupt, %d\n",
 		    dmd->name, error);
 		goto err5;
 	}
 	dmar_write4(unit, dmd->msi_data_reg, msi_data);
 	dmar_write4(unit, dmd->msi_addr_reg, msi_addr);
 	/* Only for xAPIC mode */
 	dmar_write4(unit, dmd->msi_uaddr_reg, msi_addr >> 32);
 	return (0);
 
 err5:
 	bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
 err4:
 	bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
 err3:
 	bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
 err2:
 	PCIB_RELEASE_MSIX(pcib, dev, dmd->irq);
 	dmd->irq = -1;
 err1:
 	return (error);
 }
 
 #ifdef DEV_APIC
 static int
 dmar_remap_intr(device_t dev, device_t child, u_int irq)
 {
 	struct dmar_unit *unit;
 	struct dmar_msi_data *dmd;
 	uint64_t msi_addr;
 	uint32_t msi_data;
 	int i, error;
 
 	unit = device_get_softc(dev);
 	for (i = 0; i < DMAR_INTR_TOTAL; i++) {
 		dmd = &unit->intrs[i];
 		if (irq == dmd->irq) {
 			error = PCIB_MAP_MSI(device_get_parent(
 			    device_get_parent(dev)),
 			    dev, irq, &msi_addr, &msi_data);
 			if (error != 0)
 				return (error);
 			DMAR_LOCK(unit);
 			(dmd->disable_intr)(unit);
 			dmar_write4(unit, dmd->msi_data_reg, msi_data);
 			dmar_write4(unit, dmd->msi_addr_reg, msi_addr);
 			dmar_write4(unit, dmd->msi_uaddr_reg, msi_addr >> 32);
 			(dmd->enable_intr)(unit);
 			DMAR_UNLOCK(unit);
 			return (0);
 		}
 	}
 	return (ENOENT);
 }
 #endif
 
 static void
 dmar_print_caps(device_t dev, struct dmar_unit *unit,
     ACPI_DMAR_HARDWARE_UNIT *dmaru)
 {
 	uint32_t caphi, ecaphi;
 
 	device_printf(dev, "regs@0x%08jx, ver=%d.%d, seg=%d, flags=<%b>\n",
 	    (uintmax_t)dmaru->Address, DMAR_MAJOR_VER(unit->hw_ver),
 	    DMAR_MINOR_VER(unit->hw_ver), dmaru->Segment,
 	    dmaru->Flags, "\020\001INCLUDE_ALL_PCI");
 	caphi = unit->hw_cap >> 32;
 	device_printf(dev, "cap=%b,", (u_int)unit->hw_cap,
 	    "\020\004AFL\005WBF\006PLMR\007PHMR\010CM\027ZLR\030ISOCH");
 	printf("%b, ", caphi, "\020\010PSI\027DWD\030DRD\031FL1GP\034PSI");
 	printf("ndoms=%d, sagaw=%d, mgaw=%d, fro=%d, nfr=%d, superp=%d",
 	    DMAR_CAP_ND(unit->hw_cap), DMAR_CAP_SAGAW(unit->hw_cap),
 	    DMAR_CAP_MGAW(unit->hw_cap), DMAR_CAP_FRO(unit->hw_cap),
 	    DMAR_CAP_NFR(unit->hw_cap), DMAR_CAP_SPS(unit->hw_cap));
 	if ((unit->hw_cap & DMAR_CAP_PSI) != 0)
 		printf(", mamv=%d", DMAR_CAP_MAMV(unit->hw_cap));
 	printf("\n");
 	ecaphi = unit->hw_ecap >> 32;
 	device_printf(dev, "ecap=%b,", (u_int)unit->hw_ecap,
 	    "\020\001C\002QI\003DI\004IR\005EIM\007PT\010SC\031ECS\032MTS"
 	    "\033NEST\034DIS\035PASID\036PRS\037ERS\040SRS");
 	printf("%b, ", ecaphi, "\020\002NWFS\003EAFS");
 	printf("mhmw=%d, iro=%d\n", DMAR_ECAP_MHMV(unit->hw_ecap),
 	    DMAR_ECAP_IRO(unit->hw_ecap));
 }
 
 static int
 dmar_attach(device_t dev)
 {
 	struct dmar_unit *unit;
 	ACPI_DMAR_HARDWARE_UNIT *dmaru;
 	uint64_t timeout;
 	int i, error;
 
 	unit = device_get_softc(dev);
 	unit->dev = dev;
 	unit->unit = device_get_unit(dev);
 	dmaru = dmar_find_by_index(unit->unit);
 	if (dmaru == NULL)
 		return (EINVAL);
 	unit->segment = dmaru->Segment;
 	unit->base = dmaru->Address;
 	unit->reg_rid = DMAR_REG_RID;
 	unit->regs = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &unit->reg_rid, RF_ACTIVE);
 	if (unit->regs == NULL) {
 		device_printf(dev, "cannot allocate register window\n");
 		return (ENOMEM);
 	}
 	unit->hw_ver = dmar_read4(unit, DMAR_VER_REG);
 	unit->hw_cap = dmar_read8(unit, DMAR_CAP_REG);
 	unit->hw_ecap = dmar_read8(unit, DMAR_ECAP_REG);
 	if (bootverbose)
 		dmar_print_caps(dev, unit, dmaru);
 	dmar_quirks_post_ident(unit);
 
 	timeout = dmar_get_timeout();
 	TUNABLE_UINT64_FETCH("hw.dmar.timeout", &timeout);
 	dmar_update_timeout(timeout);
 
 	for (i = 0; i < DMAR_INTR_TOTAL; i++)
 		unit->intrs[i].irq = -1;
 
 	unit->intrs[DMAR_INTR_FAULT].name = "fault";
 	unit->intrs[DMAR_INTR_FAULT].irq_rid = DMAR_FAULT_IRQ_RID;
 	unit->intrs[DMAR_INTR_FAULT].handler = dmar_fault_intr;
 	unit->intrs[DMAR_INTR_FAULT].msi_data_reg = DMAR_FEDATA_REG;
 	unit->intrs[DMAR_INTR_FAULT].msi_addr_reg = DMAR_FEADDR_REG;
 	unit->intrs[DMAR_INTR_FAULT].msi_uaddr_reg = DMAR_FEUADDR_REG;
 	unit->intrs[DMAR_INTR_FAULT].enable_intr = dmar_enable_fault_intr;
 	unit->intrs[DMAR_INTR_FAULT].disable_intr = dmar_disable_fault_intr;
 	error = dmar_alloc_irq(dev, unit, DMAR_INTR_FAULT);
 	if (error != 0) {
 		dmar_release_resources(dev, unit);
 		return (error);
 	}
 	if (DMAR_HAS_QI(unit)) {
 		unit->intrs[DMAR_INTR_QI].name = "qi";
 		unit->intrs[DMAR_INTR_QI].irq_rid = DMAR_QI_IRQ_RID;
 		unit->intrs[DMAR_INTR_QI].handler = dmar_qi_intr;
 		unit->intrs[DMAR_INTR_QI].msi_data_reg = DMAR_IEDATA_REG;
 		unit->intrs[DMAR_INTR_QI].msi_addr_reg = DMAR_IEADDR_REG;
 		unit->intrs[DMAR_INTR_QI].msi_uaddr_reg = DMAR_IEUADDR_REG;
 		unit->intrs[DMAR_INTR_QI].enable_intr = dmar_enable_qi_intr;
 		unit->intrs[DMAR_INTR_QI].disable_intr = dmar_disable_qi_intr;
 		error = dmar_alloc_irq(dev, unit, DMAR_INTR_QI);
 		if (error != 0) {
 			dmar_release_resources(dev, unit);
 			return (error);
 		}
 	}
 
 	mtx_init(&unit->lock, "dmarhw", NULL, MTX_DEF);
 	unit->domids = new_unrhdr(0, dmar_nd2mask(DMAR_CAP_ND(unit->hw_cap)),
 	    &unit->lock);
 	LIST_INIT(&unit->domains);
 
 	/*
 	 * 9.2 "Context Entry":
 	 * When Caching Mode (CM) field is reported as Set, the
 	 * domain-id value of zero is architecturally reserved.
 	 * Software must not use domain-id value of zero
 	 * when CM is Set.
 	 */
 	if ((unit->hw_cap & DMAR_CAP_CM) != 0)
 		alloc_unr_specific(unit->domids, 0);
 
 	unit->ctx_obj = vm_pager_allocate(OBJT_PHYS, NULL, IDX_TO_OFF(1 +
 	    DMAR_CTX_CNT), 0, 0, NULL);
 
 	/*
 	 * Allocate and load the root entry table pointer.  Enable the
 	 * address translation after the required invalidations are
 	 * done.
 	 */
 	dmar_pgalloc(unit->ctx_obj, 0, DMAR_PGF_WAITOK | DMAR_PGF_ZERO);
 	DMAR_LOCK(unit);
 	error = dmar_load_root_entry_ptr(unit);
 	if (error != 0) {
 		DMAR_UNLOCK(unit);
 		dmar_release_resources(dev, unit);
 		return (error);
 	}
 	error = dmar_inv_ctx_glob(unit);
 	if (error != 0) {
 		DMAR_UNLOCK(unit);
 		dmar_release_resources(dev, unit);
 		return (error);
 	}
 	if ((unit->hw_ecap & DMAR_ECAP_DI) != 0) {
 		error = dmar_inv_iotlb_glob(unit);
 		if (error != 0) {
 			DMAR_UNLOCK(unit);
 			dmar_release_resources(dev, unit);
 			return (error);
 		}
 	}
 
 	DMAR_UNLOCK(unit);
 	error = dmar_init_fault_log(unit);
 	if (error != 0) {
 		dmar_release_resources(dev, unit);
 		return (error);
 	}
 	error = dmar_init_qi(unit);
 	if (error != 0) {
 		dmar_release_resources(dev, unit);
 		return (error);
 	}
 	error = dmar_init_irt(unit);
 	if (error != 0) {
 		dmar_release_resources(dev, unit);
 		return (error);
 	}
 	error = dmar_init_busdma(unit);
 	if (error != 0) {
 		dmar_release_resources(dev, unit);
 		return (error);
 	}
 
 #ifdef NOTYET
 	DMAR_LOCK(unit);
 	error = dmar_enable_translation(unit);
 	if (error != 0) {
 		DMAR_UNLOCK(unit);
 		dmar_release_resources(dev, unit);
 		return (error);
 	}
 	DMAR_UNLOCK(unit);
 #endif
 
 	return (0);
 }
 
 static int
 dmar_detach(device_t dev)
 {
 
 	return (EBUSY);
 }
 
 static int
 dmar_suspend(device_t dev)
 {
 
 	return (0);
 }
 
 static int
 dmar_resume(device_t dev)
 {
 
 	/* XXXKIB */
 	return (0);
 }
 
 static device_method_t dmar_methods[] = {
 	DEVMETHOD(device_identify, dmar_identify),
 	DEVMETHOD(device_probe, dmar_probe),
 	DEVMETHOD(device_attach, dmar_attach),
 	DEVMETHOD(device_detach, dmar_detach),
 	DEVMETHOD(device_suspend, dmar_suspend),
 	DEVMETHOD(device_resume, dmar_resume),
 #ifdef DEV_APIC
 	DEVMETHOD(bus_remap_intr, dmar_remap_intr),
 #endif
 	DEVMETHOD_END
 };
 
 static driver_t	dmar_driver = {
 	"dmar",
 	dmar_methods,
 	sizeof(struct dmar_unit),
 };
 
 DRIVER_MODULE(dmar, acpi, dmar_driver, dmar_devclass, 0, 0);
 MODULE_DEPEND(dmar, acpi, 1, 1, 1);
 
 void
 dmar_set_buswide_ctx(struct dmar_unit *unit, u_int busno)
 {
 
 	MPASS(busno <= PCI_BUSMAX);
 	DMAR_LOCK(unit);
 	unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |=
 	    1 << (busno % (NBBY * sizeof(uint32_t)));
 	DMAR_UNLOCK(unit);
 }
 
 bool
 dmar_is_buswide_ctx(struct dmar_unit *unit, u_int busno)
 {
 
 	MPASS(busno <= PCI_BUSMAX);
 	return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] &
 	    (1U << (busno % (NBBY * sizeof(uint32_t))))) != 0);
 }
 
 static void
 dmar_print_path(int busno, int depth, const ACPI_DMAR_PCI_PATH *path)
 {
 	int i;
 
 	printf("[%d, ", busno);
 	for (i = 0; i < depth; i++) {
 		if (i != 0)
 			printf(", ");
 		printf("(%d, %d)", path[i].Device, path[i].Function);
 	}
 	printf("]");
 }
 
 int
 dmar_dev_depth(device_t child)
 {
 	devclass_t pci_class;
 	device_t bus, pcib;
 	int depth;
 
 	pci_class = devclass_find("pci");
 	for (depth = 1; ; depth++) {
 		bus = device_get_parent(child);
 		pcib = device_get_parent(bus);
 		if (device_get_devclass(device_get_parent(pcib)) !=
 		    pci_class)
 			return (depth);
 		child = pcib;
 	}
 }
 
 void
 dmar_dev_path(device_t child, int *busno, void *path1, int depth)
 {
 	devclass_t pci_class;
 	device_t bus, pcib;
 	ACPI_DMAR_PCI_PATH *path;
 
 	pci_class = devclass_find("pci");
 	path = path1;
 	for (depth--; depth != -1; depth--) {
 		path[depth].Device = pci_get_slot(child);
 		path[depth].Function = pci_get_function(child);
 		bus = device_get_parent(child);
 		pcib = device_get_parent(bus);
 		if (device_get_devclass(device_get_parent(pcib)) !=
 		    pci_class) {
 			/* reached a host bridge */
 			*busno = pcib_get_bus(bus);
 			return;
 		}
 		child = pcib;
 	}
 	panic("wrong depth");
 }
 
 static int
 dmar_match_pathes(int busno1, const ACPI_DMAR_PCI_PATH *path1, int depth1,
     int busno2, const ACPI_DMAR_PCI_PATH *path2, int depth2,
     enum AcpiDmarScopeType scope_type)
 {
 	int i, depth;
 
 	if (busno1 != busno2)
 		return (0);
 	if (scope_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && depth1 != depth2)
 		return (0);
 	depth = depth1;
 	if (depth2 < depth)
 		depth = depth2;
 	for (i = 0; i < depth; i++) {
 		if (path1[i].Device != path2[i].Device ||
 		    path1[i].Function != path2[i].Function)
 			return (0);
 	}
 	return (1);
 }
 
 static int
 dmar_match_devscope(ACPI_DMAR_DEVICE_SCOPE *devscope, int dev_busno,
     const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len)
 {
 	ACPI_DMAR_PCI_PATH *path;
 	int path_len;
 
 	if (devscope->Length < sizeof(*devscope)) {
 		printf("dmar_match_devscope: corrupted DMAR table, dl %d\n",
 		    devscope->Length);
 		return (-1);
 	}
 	if (devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
 	    devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
 		return (0);
 	path_len = devscope->Length - sizeof(*devscope);
 	if (path_len % 2 != 0) {
 		printf("dmar_match_devscope: corrupted DMAR table, dl %d\n",
 		    devscope->Length);
 		return (-1);
 	}
 	path_len /= 2;
 	path = (ACPI_DMAR_PCI_PATH *)(devscope + 1);
 	if (path_len == 0) {
 		printf("dmar_match_devscope: corrupted DMAR table, dl %d\n",
 		    devscope->Length);
 		return (-1);
 	}
 
 	return (dmar_match_pathes(devscope->Bus, path, path_len, dev_busno,
 	    dev_path, dev_path_len, devscope->EntryType));
 }
 
 static bool
 dmar_match_by_path(struct dmar_unit *unit, int dev_domain, int dev_busno,
     const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len, const char **banner)
 {
 	ACPI_DMAR_HARDWARE_UNIT *dmarh;
 	ACPI_DMAR_DEVICE_SCOPE *devscope;
 	char *ptr, *ptrend;
 	int match;
 
 	dmarh = dmar_find_by_index(unit->unit);
 	if (dmarh == NULL)
 		return (false);
 	if (dmarh->Segment != dev_domain)
 		return (false);
 	if ((dmarh->Flags & ACPI_DMAR_INCLUDE_ALL) != 0) {
 		if (banner != NULL)
 			*banner = "INCLUDE_ALL";
 		return (true);
 	}
 	ptr = (char *)dmarh + sizeof(*dmarh);
 	ptrend = (char *)dmarh + dmarh->Header.Length;
 	while (ptr < ptrend) {
 		devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr;
 		ptr += devscope->Length;
 		match = dmar_match_devscope(devscope, dev_busno, dev_path,
 		    dev_path_len);
 		if (match == -1)
 			return (false);
 		if (match == 1) {
 			if (banner != NULL)
 				*banner = "specific match";
 			return (true);
 		}
 	}
 	return (false);
 }
 
 static struct dmar_unit *
 dmar_find_by_scope(int dev_domain, int dev_busno,
     const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len)
 {
 	struct dmar_unit *unit;
 	int i;
 
 	for (i = 0; i < dmar_devcnt; i++) {
 		if (dmar_devs[i] == NULL)
 			continue;
 		unit = device_get_softc(dmar_devs[i]);
 		if (dmar_match_by_path(unit, dev_domain, dev_busno, dev_path,
 		    dev_path_len, NULL))
 			return (unit);
 	}
 	return (NULL);
 }
 
 struct dmar_unit *
 dmar_find(device_t dev, bool verbose)
 {
 	device_t dmar_dev;
 	struct dmar_unit *unit;
 	const char *banner;
 	int i, dev_domain, dev_busno, dev_path_len;
 
 	/*
 	 * This function can only handle PCI(e) devices.
 	 */
 	if (device_get_devclass(device_get_parent(dev)) !=
 	    devclass_find("pci"))
 		return (NULL);
 
 	dmar_dev = NULL;
 	dev_domain = pci_get_domain(dev);
 	dev_path_len = dmar_dev_depth(dev);
 	ACPI_DMAR_PCI_PATH dev_path[dev_path_len];
 	dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len);
 	banner = "";
 
 	for (i = 0; i < dmar_devcnt; i++) {
 		if (dmar_devs[i] == NULL)
 			continue;
 		unit = device_get_softc(dmar_devs[i]);
 		if (dmar_match_by_path(unit, dev_domain, dev_busno,
 		    dev_path, dev_path_len, &banner))
 			break;
 	}
 	if (i == dmar_devcnt)
 		return (NULL);
 
 	if (verbose) {
 		device_printf(dev, "pci%d:%d:%d:%d matched dmar%d by %s",
 		    dev_domain, pci_get_bus(dev), pci_get_slot(dev),
 		    pci_get_function(dev), unit->unit, banner);
 		printf(" scope path ");
 		dmar_print_path(dev_busno, dev_path_len, dev_path);
 		printf("\n");
 	}
 	return (unit);
 }
 
 static struct dmar_unit *
 dmar_find_nonpci(u_int id, u_int entry_type, uint16_t *rid)
 {
 	device_t dmar_dev;
 	struct dmar_unit *unit;
 	ACPI_DMAR_HARDWARE_UNIT *dmarh;
 	ACPI_DMAR_DEVICE_SCOPE *devscope;
 	ACPI_DMAR_PCI_PATH *path;
 	char *ptr, *ptrend;
 #ifdef DEV_APIC
 	int error;
 #endif
 	int i;
 
 	for (i = 0; i < dmar_devcnt; i++) {
 		dmar_dev = dmar_devs[i];
 		if (dmar_dev == NULL)
 			continue;
 		unit = (struct dmar_unit *)device_get_softc(dmar_dev);
 		dmarh = dmar_find_by_index(i);
 		if (dmarh == NULL)
 			continue;
 		ptr = (char *)dmarh + sizeof(*dmarh);
 		ptrend = (char *)dmarh + dmarh->Header.Length;
 		for (;;) {
 			if (ptr >= ptrend)
 				break;
 			devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr;
 			ptr += devscope->Length;
 			if (devscope->EntryType != entry_type)
 				continue;
 			if (devscope->EnumerationId != id)
 				continue;
 #ifdef DEV_APIC
 			if (entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
 				error = ioapic_get_rid(id, rid);
 				/*
 				 * If our IOAPIC has PCI bindings then
 				 * use the PCI device rid.
 				 */
 				if (error == 0)
 					return (unit);
 			}
 #endif
 			if (devscope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE)
 			    == 2) {
 				if (rid != NULL) {
 					path = (ACPI_DMAR_PCI_PATH *)
 					    (devscope + 1);
 					*rid = PCI_RID(devscope->Bus,
 					    path->Device, path->Function);
 				}
 				return (unit);
 			}
 			printf(
 		           "dmar_find_nonpci: id %d type %d path length != 2\n",
 			    id, entry_type);
 			break;
 		}
 	}
 	return (NULL);
 }
 
 
 struct dmar_unit *
 dmar_find_hpet(device_t dev, uint16_t *rid)
 {
 
 	return (dmar_find_nonpci(hpet_get_uid(dev), ACPI_DMAR_SCOPE_TYPE_HPET,
 	    rid));
 }
 
 struct dmar_unit *
 dmar_find_ioapic(u_int apic_id, uint16_t *rid)
 {
 
 	return (dmar_find_nonpci(apic_id, ACPI_DMAR_SCOPE_TYPE_IOAPIC, rid));
 }
 
 struct rmrr_iter_args {
 	struct dmar_domain *domain;
 	int dev_domain;
 	int dev_busno;
 	const ACPI_DMAR_PCI_PATH *dev_path;
 	int dev_path_len;
 	struct dmar_map_entries_tailq *rmrr_entries;
 };
 
 static int
 dmar_rmrr_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
 {
 	struct rmrr_iter_args *ria;
 	ACPI_DMAR_RESERVED_MEMORY *resmem;
 	ACPI_DMAR_DEVICE_SCOPE *devscope;
 	struct dmar_map_entry *entry;
 	char *ptr, *ptrend;
 	int match;
 
 	if (dmarh->Type != ACPI_DMAR_TYPE_RESERVED_MEMORY)
 		return (1);
 
 	ria = arg;
 	resmem = (ACPI_DMAR_RESERVED_MEMORY *)dmarh;
 	if (resmem->Segment != ria->dev_domain)
 		return (1);
 
 	ptr = (char *)resmem + sizeof(*resmem);
 	ptrend = (char *)resmem + resmem->Header.Length;
 	for (;;) {
 		if (ptr >= ptrend)
 			break;
 		devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr;
 		ptr += devscope->Length;
 		match = dmar_match_devscope(devscope, ria->dev_busno,
 		    ria->dev_path, ria->dev_path_len);
 		if (match == 1) {
 			entry = dmar_gas_alloc_entry(ria->domain,
 			    DMAR_PGF_WAITOK);
 			entry->start = resmem->BaseAddress;
 			/* The RMRR entry end address is inclusive. */
 			entry->end = resmem->EndAddress;
 			TAILQ_INSERT_TAIL(ria->rmrr_entries, entry,
 			    unroll_link);
 		}
 	}
 
 	return (1);
 }
 
 void
 dmar_dev_parse_rmrr(struct dmar_domain *domain, int dev_domain, int dev_busno,
     const void *dev_path, int dev_path_len,
     struct dmar_map_entries_tailq *rmrr_entries)
 {
 	struct rmrr_iter_args ria;
 
 	ria.domain = domain;
 	ria.dev_domain = dev_domain;
 	ria.dev_busno = dev_busno;
 	ria.dev_path = (const ACPI_DMAR_PCI_PATH *)dev_path;
 	ria.dev_path_len = dev_path_len;
 	ria.rmrr_entries = rmrr_entries;
 	dmar_iterate_tbl(dmar_rmrr_iter, &ria);
 }
 
 struct inst_rmrr_iter_args {
 	struct dmar_unit *dmar;
 };
 
 static device_t
 dmar_path_dev(int segment, int path_len, int busno,
     const ACPI_DMAR_PCI_PATH *path, uint16_t *rid)
 {
 	device_t dev;
 	int i;
 
 	dev = NULL;
 	for (i = 0; i < path_len; i++) {
 		dev = pci_find_dbsf(segment, busno, path->Device,
 		    path->Function);
 		if (i != path_len - 1) {
 			busno = pci_cfgregread(busno, path->Device,
 			    path->Function, PCIR_SECBUS_1, 1);
 			path++;
 		}
 	}
 	*rid = PCI_RID(busno, path->Device, path->Function);
 	return (dev);
 }
 
 static int
 dmar_inst_rmrr_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
 {
 	const ACPI_DMAR_RESERVED_MEMORY *resmem;
 	const ACPI_DMAR_DEVICE_SCOPE *devscope;
 	struct inst_rmrr_iter_args *iria;
 	const char *ptr, *ptrend;
 	device_t dev;
 	struct dmar_unit *unit;
 	int dev_path_len;
 	uint16_t rid;
 
 	iria = arg;
 
 	if (dmarh->Type != ACPI_DMAR_TYPE_RESERVED_MEMORY)
 		return (1);
 
 	resmem = (ACPI_DMAR_RESERVED_MEMORY *)dmarh;
 	if (resmem->Segment != iria->dmar->segment)
 		return (1);
 
 	ptr = (const char *)resmem + sizeof(*resmem);
 	ptrend = (const char *)resmem + resmem->Header.Length;
 	for (;;) {
 		if (ptr >= ptrend)
 			break;
 		devscope = (const ACPI_DMAR_DEVICE_SCOPE *)ptr;
 		ptr += devscope->Length;
 		/* XXXKIB bridge */
 		if (devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_ENDPOINT)
 			continue;
 		rid = 0;
 		dev_path_len = (devscope->Length -
 		    sizeof(ACPI_DMAR_DEVICE_SCOPE)) / 2;
 		dev = dmar_path_dev(resmem->Segment, dev_path_len,
 		    devscope->Bus,
 		    (const ACPI_DMAR_PCI_PATH *)(devscope + 1), &rid);
 		if (dev == NULL) {
 			if (bootverbose) {
 				printf("dmar%d no dev found for RMRR "
 				    "[%#jx, %#jx] rid %#x scope path ",
 				     iria->dmar->unit,
 				     (uintmax_t)resmem->BaseAddress,
 				     (uintmax_t)resmem->EndAddress,
 				     rid);
 				dmar_print_path(devscope->Bus, dev_path_len,
 				    (const ACPI_DMAR_PCI_PATH *)(devscope + 1));
 				printf("\n");
 			}
 			unit = dmar_find_by_scope(resmem->Segment,
 			    devscope->Bus,
 			    (const ACPI_DMAR_PCI_PATH *)(devscope + 1),
 			    dev_path_len);
 			if (iria->dmar != unit)
 				continue;
 			dmar_get_ctx_for_devpath(iria->dmar, rid,
 			    resmem->Segment, devscope->Bus, 
 			    (const ACPI_DMAR_PCI_PATH *)(devscope + 1),
 			    dev_path_len, false, true);
 		} else {
 			unit = dmar_find(dev, false);
 			if (iria->dmar != unit)
 				continue;
 			dmar_instantiate_ctx(iria->dmar, dev, true);
 		}
 	}
 
 	return (1);
 
 }
 
 /*
  * Pre-create all contexts for the DMAR which have RMRR entries.
  */
 int
 dmar_instantiate_rmrr_ctxs(struct dmar_unit *dmar)
 {
 	struct inst_rmrr_iter_args iria;
 	int error;
 
 	if (!dmar_barrier_enter(dmar, DMAR_BARRIER_RMRR))
 		return (0);
 
 	error = 0;
 	iria.dmar = dmar;
 	dmar_iterate_tbl(dmar_inst_rmrr_iter, &iria);
 	DMAR_LOCK(dmar);
 	if (!LIST_EMPTY(&dmar->domains)) {
 		KASSERT((dmar->hw_gcmd & DMAR_GCMD_TE) == 0,
 	    ("dmar%d: RMRR not handled but translation is already enabled",
 		    dmar->unit));
 		error = dmar_enable_translation(dmar);
 		if (bootverbose) {
 			if (error == 0) {
 				printf("dmar%d: enabled translation\n",
 				    dmar->unit);
 			} else {
 				printf("dmar%d: enabling translation failed, "
 				    "error %d\n", dmar->unit, error);
 			}
 		}
 	}
 	dmar_barrier_exit(dmar, DMAR_BARRIER_RMRR);
 	return (error);
 }
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #include <ddb/db_lex.h>
 
 static void
 dmar_print_domain_entry(const struct dmar_map_entry *entry)
 {
 	struct dmar_map_entry *l, *r;
 
 	db_printf(
-	    "    start %jx end %jx free_after %jx free_down %jx flags %x ",
-	    entry->start, entry->end, entry->free_after, entry->free_down,
-	    entry->flags);
+	    "    start %jx end %jx first %jx last %jx free_down %jx flags %x ",
+	    entry->start, entry->end, entry->first, entry->last,
+	    entry->free_down, entry->flags);
 	db_printf("left ");
 	l = RB_LEFT(entry, rb_entry);
 	if (l == NULL)
 		db_printf("NULL ");
 	else
 		db_printf("%jx ", l->start);
 	db_printf("right ");
 	r = RB_RIGHT(entry, rb_entry);
 	if (r == NULL)
 		db_printf("NULL");
 	else
 		db_printf("%jx", r->start);
 	db_printf("\n");
 }
 
 static void
 dmar_print_ctx(struct dmar_ctx *ctx)
 {
 
 	db_printf(
 	    "    @%p pci%d:%d:%d refs %d flags %x loads %lu unloads %lu\n",
 	    ctx, pci_get_bus(ctx->ctx_tag.owner),
 	    pci_get_slot(ctx->ctx_tag.owner),
 	    pci_get_function(ctx->ctx_tag.owner), ctx->refs, ctx->flags,
 	    ctx->loads, ctx->unloads);
 }
 
 static void
 dmar_print_domain(struct dmar_domain *domain, bool show_mappings)
 {
 	struct dmar_map_entry *entry;
 	struct dmar_ctx *ctx;
 
 	db_printf(
 	    "  @%p dom %d mgaw %d agaw %d pglvl %d end %jx refs %d\n"
 	    "   ctx_cnt %d flags %x pgobj %p map_ents %u\n",
 	    domain, domain->domain, domain->mgaw, domain->agaw, domain->pglvl,
 	    (uintmax_t)domain->end, domain->refs, domain->ctx_cnt,
 	    domain->flags, domain->pgtbl_obj, domain->entries_cnt);
 	if (!LIST_EMPTY(&domain->contexts)) {
 		db_printf("  Contexts:\n");
 		LIST_FOREACH(ctx, &domain->contexts, link)
 			dmar_print_ctx(ctx);
 	}
 	if (!show_mappings)
 		return;
 	db_printf("    mapped:\n");
 	RB_FOREACH(entry, dmar_gas_entries_tree, &domain->rb_root) {
 		dmar_print_domain_entry(entry);
 		if (db_pager_quit)
 			break;
 	}
 	if (db_pager_quit)
 		return;
 	db_printf("    unloading:\n");
 	TAILQ_FOREACH(entry, &domain->unload_entries, dmamap_link) {
 		dmar_print_domain_entry(entry);
 		if (db_pager_quit)
 			break;
 	}
 }
 
 DB_FUNC(dmar_domain, db_dmar_print_domain, db_show_table, CS_OWN, NULL)
 {
 	struct dmar_unit *unit;
 	struct dmar_domain *domain;
 	struct dmar_ctx *ctx;
 	bool show_mappings, valid;
 	int pci_domain, bus, device, function, i, t;
 	db_expr_t radix;
 
 	valid = false;
 	radix = db_radix;
 	db_radix = 10;
 	t = db_read_token();
 	if (t == tSLASH) {
 		t = db_read_token();
 		if (t != tIDENT) {
 			db_printf("Bad modifier\n");
 			db_radix = radix;
 			db_skip_to_eol();
 			return;
 		}
 		show_mappings = strchr(db_tok_string, 'm') != NULL;
 		t = db_read_token();
 	} else {
 		show_mappings = false;
 	}
 	if (t == tNUMBER) {
 		pci_domain = db_tok_number;
 		t = db_read_token();
 		if (t == tNUMBER) {
 			bus = db_tok_number;
 			t = db_read_token();
 			if (t == tNUMBER) {
 				device = db_tok_number;
 				t = db_read_token();
 				if (t == tNUMBER) {
 					function = db_tok_number;
 					valid = true;
 				}
 			}
 		}
 	}
 			db_radix = radix;
 	db_skip_to_eol();
 	if (!valid) {
 		db_printf("usage: show dmar_domain [/m] "
 		    "<domain> <bus> <device> <func>\n");
 		return;
 	}
 	for (i = 0; i < dmar_devcnt; i++) {
 		unit = device_get_softc(dmar_devs[i]);
 		LIST_FOREACH(domain, &unit->domains, link) {
 			LIST_FOREACH(ctx, &domain->contexts, link) {
 				if (pci_domain == unit->segment && 
 				    bus == pci_get_bus(ctx->ctx_tag.owner) &&
 				    device ==
 				    pci_get_slot(ctx->ctx_tag.owner) &&
 				    function ==
 				    pci_get_function(ctx->ctx_tag.owner)) {
 					dmar_print_domain(domain,
 					    show_mappings);
 					goto out;
 				}
 			}
 		}
 	}
 out:;
 }
 
 static void
 dmar_print_one(int idx, bool show_domains, bool show_mappings)
 {
 	struct dmar_unit *unit;
 	struct dmar_domain *domain;
 	int i, frir;
 
 	unit = device_get_softc(dmar_devs[idx]);
 	db_printf("dmar%d at %p, root at 0x%jx, ver 0x%x\n", unit->unit, unit,
 	    dmar_read8(unit, DMAR_RTADDR_REG), dmar_read4(unit, DMAR_VER_REG));
 	db_printf("cap 0x%jx ecap 0x%jx gsts 0x%x fsts 0x%x fectl 0x%x\n",
 	    (uintmax_t)dmar_read8(unit, DMAR_CAP_REG),
 	    (uintmax_t)dmar_read8(unit, DMAR_ECAP_REG),
 	    dmar_read4(unit, DMAR_GSTS_REG),
 	    dmar_read4(unit, DMAR_FSTS_REG),
 	    dmar_read4(unit, DMAR_FECTL_REG));
 	if (unit->ir_enabled) {
 		db_printf("ir is enabled; IRT @%p phys 0x%jx maxcnt %d\n",
 		    unit->irt, (uintmax_t)unit->irt_phys, unit->irte_cnt);
 	}
 	db_printf("fed 0x%x fea 0x%x feua 0x%x\n",
 	    dmar_read4(unit, DMAR_FEDATA_REG),
 	    dmar_read4(unit, DMAR_FEADDR_REG),
 	    dmar_read4(unit, DMAR_FEUADDR_REG));
 	db_printf("primary fault log:\n");
 	for (i = 0; i < DMAR_CAP_NFR(unit->hw_cap); i++) {
 		frir = (DMAR_CAP_FRO(unit->hw_cap) + i) * 16;
 		db_printf("  %d at 0x%x: %jx %jx\n", i, frir,
 		    (uintmax_t)dmar_read8(unit, frir),
 		    (uintmax_t)dmar_read8(unit, frir + 8));
 	}
 	if (DMAR_HAS_QI(unit)) {
 		db_printf("ied 0x%x iea 0x%x ieua 0x%x\n",
 		    dmar_read4(unit, DMAR_IEDATA_REG),
 		    dmar_read4(unit, DMAR_IEADDR_REG),
 		    dmar_read4(unit, DMAR_IEUADDR_REG));
 		if (unit->qi_enabled) {
 			db_printf("qi is enabled: queue @0x%jx (IQA 0x%jx) "
 			    "size 0x%jx\n"
 		    "  head 0x%x tail 0x%x avail 0x%x status 0x%x ctrl 0x%x\n"
 		    "  hw compl 0x%x@%p/phys@%jx next seq 0x%x gen 0x%x\n",
 			    (uintmax_t)unit->inv_queue,
 			    (uintmax_t)dmar_read8(unit, DMAR_IQA_REG),
 			    (uintmax_t)unit->inv_queue_size,
 			    dmar_read4(unit, DMAR_IQH_REG),
 			    dmar_read4(unit, DMAR_IQT_REG),
 			    unit->inv_queue_avail,
 			    dmar_read4(unit, DMAR_ICS_REG),
 			    dmar_read4(unit, DMAR_IECTL_REG),
 			    unit->inv_waitd_seq_hw,
 			    &unit->inv_waitd_seq_hw,
 			    (uintmax_t)unit->inv_waitd_seq_hw_phys,
 			    unit->inv_waitd_seq,
 			    unit->inv_waitd_gen);
 		} else {
 			db_printf("qi is disabled\n");
 		}
 	}
 	if (show_domains) {
 		db_printf("domains:\n");
 		LIST_FOREACH(domain, &unit->domains, link) {
 			dmar_print_domain(domain, show_mappings);
 			if (db_pager_quit)
 				break;
 		}
 	}
 }
 
 DB_SHOW_COMMAND(dmar, db_dmar_print)
 {
 	bool show_domains, show_mappings;
 
 	show_domains = strchr(modif, 'd') != NULL;
 	show_mappings = strchr(modif, 'm') != NULL;
 	if (!have_addr) {
 		db_printf("usage: show dmar [/d] [/m] index\n");
 		return;
 	}
 	dmar_print_one((int)addr, show_domains, show_mappings);
 }
 
 DB_SHOW_ALL_COMMAND(dmars, db_show_all_dmars)
 {
 	int i;
 	bool show_domains, show_mappings;
 
 	show_domains = strchr(modif, 'd') != NULL;
 	show_mappings = strchr(modif, 'm') != NULL;
 
 	for (i = 0; i < dmar_devcnt; i++) {
 		dmar_print_one(i, show_domains, show_mappings);
 		if (db_pager_quit)
 			break;
 	}
 }
 #endif
Index: projects/clang1000-import/sys/x86/iommu/intel_gas.c
===================================================================
--- projects/clang1000-import/sys/x86/iommu/intel_gas.c	(revision 357178)
+++ projects/clang1000-import/sys/x86/iommu/intel_gas.c	(revision 357179)
@@ -1,745 +1,684 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define	RB_AUGMENT(entry) dmar_gas_augment_entry(entry)
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/memdesc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/rman.h>
 #include <sys/taskqueue.h>
 #include <sys/tree.h>
 #include <sys/uio.h>
 #include <sys/vmem.h>
 #include <dev/pci/pcivar.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/uma.h>
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #include <x86/include/busdma_impl.h>
 #include <x86/iommu/intel_reg.h>
 #include <x86/iommu/busdma_dmar.h>
 #include <dev/pci/pcireg.h>
 #include <x86/iommu/intel_dmar.h>
 
 /*
  * Guest Address Space management.
  */
 
 static uma_zone_t dmar_map_entry_zone;
 
 static void
 intel_gas_init(void)
 {
 
 	dmar_map_entry_zone = uma_zcreate("DMAR_MAP_ENTRY",
 	    sizeof(struct dmar_map_entry), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP);
 }
 SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL);
 
 struct dmar_map_entry *
 dmar_gas_alloc_entry(struct dmar_domain *domain, u_int flags)
 {
 	struct dmar_map_entry *res;
 
 	KASSERT((flags & ~(DMAR_PGF_WAITOK)) == 0,
 	    ("unsupported flags %x", flags));
 
 	res = uma_zalloc(dmar_map_entry_zone, ((flags & DMAR_PGF_WAITOK) !=
 	    0 ? M_WAITOK : M_NOWAIT) | M_ZERO);
 	if (res != NULL) {
 		res->domain = domain;
 		atomic_add_int(&domain->entries_cnt, 1);
 	}
 	return (res);
 }
 
 void
 dmar_gas_free_entry(struct dmar_domain *domain, struct dmar_map_entry *entry)
 {
 
 	KASSERT(domain == entry->domain,
 	    ("mismatched free domain %p entry %p entry->domain %p", domain,
 	    entry, entry->domain));
 	atomic_subtract_int(&domain->entries_cnt, 1);
 	uma_zfree(dmar_map_entry_zone, entry);
 }
 
 static int
 dmar_gas_cmp_entries(struct dmar_map_entry *a, struct dmar_map_entry *b)
 {
 
 	/* Last entry have zero size, so <= */
 	KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)",
 	    a, (uintmax_t)a->start, (uintmax_t)a->end));
 	KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)",
 	    b, (uintmax_t)b->start, (uintmax_t)b->end));
 	KASSERT(a->end <= b->start || b->end <= a->start ||
 	    a->end == a->start || b->end == b->start,
 	    ("overlapping entries %p (%jx, %jx) %p (%jx, %jx)",
 	    a, (uintmax_t)a->start, (uintmax_t)a->end,
 	    b, (uintmax_t)b->start, (uintmax_t)b->end));
 
 	if (a->end < b->end)
 		return (-1);
 	else if (b->end < a->end)
 		return (1);
 	return (0);
 }
 
 static void
 dmar_gas_augment_entry(struct dmar_map_entry *entry)
 {
-	struct dmar_map_entry *l, *r;
+	struct dmar_map_entry *child;
+	dmar_gaddr_t free_down;
 
-	for (; entry != NULL; entry = RB_PARENT(entry, rb_entry)) {
-		l = RB_LEFT(entry, rb_entry);
-		r = RB_RIGHT(entry, rb_entry);
-		if (l == NULL && r == NULL) {
-			entry->free_down = entry->free_after;
-		} else if (l == NULL && r != NULL) {
-			entry->free_down = MAX(entry->free_after, r->free_down);
-		} else if (/*l != NULL && */ r == NULL) {
-			entry->free_down = MAX(entry->free_after, l->free_down);
-		} else /* if (l != NULL && r != NULL) */ {
-			entry->free_down = MAX(entry->free_after, l->free_down);
-			entry->free_down = MAX(entry->free_down, r->free_down);
-		}
-	}
+	free_down = 0;
+	if ((child = RB_LEFT(entry, rb_entry)) != NULL) {
+		free_down = MAX(free_down, child->free_down);
+		free_down = MAX(free_down, entry->start - child->last);
+		entry->first = child->first;
+	} else
+		entry->first = entry->start;
+	
+	if ((child = RB_RIGHT(entry, rb_entry)) != NULL) {
+		free_down = MAX(free_down, child->free_down);
+		free_down = MAX(free_down, child->first - entry->end);
+		entry->last = child->last;
+	} else
+		entry->last = entry->end;
+	entry->free_down = free_down;
 }
 
 RB_GENERATE(dmar_gas_entries_tree, dmar_map_entry, rb_entry,
     dmar_gas_cmp_entries);
 
-static void
-dmar_gas_fix_free(struct dmar_domain *domain, struct dmar_map_entry *entry)
-{
-	struct dmar_map_entry *next;
-
-	next = RB_NEXT(dmar_gas_entries_tree, &domain->rb_root, entry);
-	entry->free_after = (next != NULL ? next->start : domain->end) -
-	    entry->end;
-	dmar_gas_augment_entry(entry);
-}
-
 #ifdef INVARIANTS
 static void
 dmar_gas_check_free(struct dmar_domain *domain)
 {
-	struct dmar_map_entry *entry, *next, *l, *r;
+	struct dmar_map_entry *entry, *l, *r;
 	dmar_gaddr_t v;
 
 	RB_FOREACH(entry, dmar_gas_entries_tree, &domain->rb_root) {
 		KASSERT(domain == entry->domain,
 		    ("mismatched free domain %p entry %p entry->domain %p",
 		    domain, entry, entry->domain));
-		next = RB_NEXT(dmar_gas_entries_tree, &domain->rb_root, entry);
-		if (next == NULL) {
-			MPASS(entry->free_after == domain->end - entry->end);
-		} else {
-			MPASS(entry->free_after = next->start - entry->end);
-			MPASS(entry->end <= next->start);
-		}
 		l = RB_LEFT(entry, rb_entry);
 		r = RB_RIGHT(entry, rb_entry);
-		if (l == NULL && r == NULL) {
-			MPASS(entry->free_down == entry->free_after);
-		} else if (l == NULL && r != NULL) {
-			MPASS(entry->free_down = MAX(entry->free_after,
-			    r->free_down));
-		} else if (r == NULL) {
-			MPASS(entry->free_down = MAX(entry->free_after,
-			    l->free_down));
-		} else {
-			v = MAX(entry->free_after, l->free_down);
+		v = 0;
+		if (l != NULL) {
+			v = MAX(v, l->free_down);
+			v = MAX(v, entry->start - l->last);
+		}
+		if (r != NULL) {
 			v = MAX(v, r->free_down);
-			MPASS(entry->free_down == v);
+			v = MAX(v, r->first - entry->end);
 		}
+		MPASS(entry->free_down == v);
 	}
 }
 #endif
 
 static bool
 dmar_gas_rb_insert(struct dmar_domain *domain, struct dmar_map_entry *entry)
 {
-	struct dmar_map_entry *prev, *found;
+	struct dmar_map_entry *found;
 
 	found = RB_INSERT(dmar_gas_entries_tree, &domain->rb_root, entry);
-	dmar_gas_fix_free(domain, entry);
-	prev = RB_PREV(dmar_gas_entries_tree, &domain->rb_root, entry);
-	if (prev != NULL)
-		dmar_gas_fix_free(domain, prev);
 	return (found == NULL);
 }
 
 static void
 dmar_gas_rb_remove(struct dmar_domain *domain, struct dmar_map_entry *entry)
 {
-	struct dmar_map_entry *prev;
 
-	prev = RB_PREV(dmar_gas_entries_tree, &domain->rb_root, entry);
 	RB_REMOVE(dmar_gas_entries_tree, &domain->rb_root, entry);
-	if (prev != NULL)
-		dmar_gas_fix_free(domain, prev);
 }
 
 void
 dmar_gas_init_domain(struct dmar_domain *domain)
 {
 	struct dmar_map_entry *begin, *end;
 
 	begin = dmar_gas_alloc_entry(domain, DMAR_PGF_WAITOK);
 	end = dmar_gas_alloc_entry(domain, DMAR_PGF_WAITOK);
 
 	DMAR_DOMAIN_LOCK(domain);
 	KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain));
 	KASSERT(RB_EMPTY(&domain->rb_root), ("non-empty entries %p", domain));
 
 	begin->start = 0;
 	begin->end = DMAR_PAGE_SIZE;
-	begin->free_after = domain->end - begin->end;
 	begin->flags = DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_UNMAPPED;
 	dmar_gas_rb_insert(domain, begin);
 
 	end->start = domain->end;
 	end->end = domain->end;
-	end->free_after = 0;
 	end->flags = DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_UNMAPPED;
 	dmar_gas_rb_insert(domain, end);
 
 	domain->first_place = begin;
 	domain->last_place = end;
 	domain->flags |= DMAR_DOMAIN_GAS_INITED;
 	DMAR_DOMAIN_UNLOCK(domain);
 }
 
 void
 dmar_gas_fini_domain(struct dmar_domain *domain)
 {
 	struct dmar_map_entry *entry, *entry1;
 
 	DMAR_DOMAIN_ASSERT_LOCKED(domain);
 	KASSERT(domain->entries_cnt == 2, ("domain still in use %p", domain));
 
 	entry = RB_MIN(dmar_gas_entries_tree, &domain->rb_root);
 	KASSERT(entry->start == 0, ("start entry start %p", domain));
 	KASSERT(entry->end == DMAR_PAGE_SIZE, ("start entry end %p", domain));
 	KASSERT(entry->flags == DMAR_MAP_ENTRY_PLACE,
 	    ("start entry flags %p", domain));
 	RB_REMOVE(dmar_gas_entries_tree, &domain->rb_root, entry);
 	dmar_gas_free_entry(domain, entry);
 
 	entry = RB_MAX(dmar_gas_entries_tree, &domain->rb_root);
 	KASSERT(entry->start == domain->end, ("end entry start %p", domain));
 	KASSERT(entry->end == domain->end, ("end entry end %p", domain));
-	KASSERT(entry->free_after == 0, ("end entry free_after %p", domain));
 	KASSERT(entry->flags == DMAR_MAP_ENTRY_PLACE,
 	    ("end entry flags %p", domain));
 	RB_REMOVE(dmar_gas_entries_tree, &domain->rb_root, entry);
 	dmar_gas_free_entry(domain, entry);
 
 	RB_FOREACH_SAFE(entry, dmar_gas_entries_tree, &domain->rb_root,
 	    entry1) {
 		KASSERT((entry->flags & DMAR_MAP_ENTRY_RMRR) != 0,
 		    ("non-RMRR entry left %p", domain));
 		RB_REMOVE(dmar_gas_entries_tree, &domain->rb_root, entry);
 		dmar_gas_free_entry(domain, entry);
 	}
 }
 
 struct dmar_gas_match_args {
 	struct dmar_domain *domain;
 	dmar_gaddr_t size;
 	int offset;
 	const struct bus_dma_tag_common *common;
 	u_int gas_flags;
 	struct dmar_map_entry *entry;
 };
 
+/*
+ * The interval [beg, end) is a free interval between two dmar_map_entries.
+ * maxaddr is an upper bound on addresses that can be allocated. Try to
+ * allocate space in the free interval, subject to the conditions expressed
+ * by a, and return 'true' if and only if the allocation attempt succeeds.
+ */
 static bool
-dmar_gas_match_one(struct dmar_gas_match_args *a, struct dmar_map_entry *prev,
-    dmar_gaddr_t end)
+dmar_gas_match_one(struct dmar_gas_match_args *a, dmar_gaddr_t beg,
+    dmar_gaddr_t end, dmar_gaddr_t maxaddr)
 {
 	dmar_gaddr_t bs, start;
 
-	if (a->entry->start + a->size > end)
+	a->entry->start = roundup2(beg + DMAR_PAGE_SIZE,
+	    a->common->alignment);
+	if (a->entry->start + a->size > maxaddr)
 		return (false);
 
 	/* DMAR_PAGE_SIZE to create gap after new entry. */
-	if (a->entry->start < prev->end + DMAR_PAGE_SIZE ||
-	    a->entry->start + a->size + a->offset + DMAR_PAGE_SIZE >
-	    prev->end + prev->free_after)
+	if (a->entry->start < beg + DMAR_PAGE_SIZE ||
+	    a->entry->start + a->size + a->offset + DMAR_PAGE_SIZE > end)
 		return (false);
 
 	/* No boundary crossing. */
 	if (dmar_test_boundary(a->entry->start + a->offset, a->size,
 	    a->common->boundary))
 		return (true);
 
 	/*
 	 * The start + offset to start + offset + size region crosses
 	 * the boundary.  Check if there is enough space after the
-	 * next boundary after the prev->end.
+	 * next boundary after the beg.
 	 */
 	bs = rounddown2(a->entry->start + a->offset + a->common->boundary,
 	    a->common->boundary);
 	start = roundup2(bs, a->common->alignment);
 	/* DMAR_PAGE_SIZE to create gap after new entry. */
-	if (start + a->offset + a->size + DMAR_PAGE_SIZE <=
-	    prev->end + prev->free_after &&
-	    start + a->offset + a->size <= end &&
+	if (start + a->offset + a->size + DMAR_PAGE_SIZE <= end &&
+	    start + a->offset + a->size <= maxaddr &&
 	    dmar_test_boundary(start + a->offset, a->size,
 	    a->common->boundary)) {
 		a->entry->start = start;
 		return (true);
 	}
 
 	/*
 	 * Not enough space to align at the requested boundary, or
 	 * boundary is smaller than the size, but allowed to split.
-	 * We already checked that start + size does not overlap end.
+	 * We already checked that start + size does not overlap maxaddr.
 	 *
 	 * XXXKIB. It is possible that bs is exactly at the start of
 	 * the next entry, then we do not have gap.  Ignore for now.
 	 */
 	if ((a->gas_flags & DMAR_GM_CANSPLIT) != 0) {
 		a->size = bs - a->entry->start;
 		return (true);
 	}
 
 	return (false);
 }
 
 static void
-dmar_gas_match_insert(struct dmar_gas_match_args *a,
-    struct dmar_map_entry *prev)
+dmar_gas_match_insert(struct dmar_gas_match_args *a)
 {
-	struct dmar_map_entry *next;
 	bool found;
 
 	/*
 	 * The prev->end is always aligned on the page size, which
 	 * causes page alignment for the entry->start too.  The size
 	 * is checked to be multiple of the page size.
 	 *
 	 * The page sized gap is created between consequent
 	 * allocations to ensure that out-of-bounds accesses fault.
 	 */
 	a->entry->end = a->entry->start + a->size;
 
-	next = RB_NEXT(dmar_gas_entries_tree, &a->domain->rb_root, prev);
-	KASSERT(next->start >= a->entry->end &&
-	    next->start - a->entry->start >= a->size &&
-	    prev->end <= a->entry->end,
-	    ("dmar_gas_match_insert hole failed %p prev (%jx, %jx) "
-	    "free_after %jx next (%jx, %jx) entry (%jx, %jx)", a->domain,
-	    (uintmax_t)prev->start, (uintmax_t)prev->end,
-	    (uintmax_t)prev->free_after,
-	    (uintmax_t)next->start, (uintmax_t)next->end,
-	    (uintmax_t)a->entry->start, (uintmax_t)a->entry->end));
-
-	prev->free_after = a->entry->start - prev->end;
-	a->entry->free_after = next->start - a->entry->end;
-
 	found = dmar_gas_rb_insert(a->domain, a->entry);
 	KASSERT(found, ("found dup %p start %jx size %jx",
 	    a->domain, (uintmax_t)a->entry->start, (uintmax_t)a->size));
 	a->entry->flags = DMAR_MAP_ENTRY_MAP;
-
-	KASSERT(RB_PREV(dmar_gas_entries_tree, &a->domain->rb_root,
-	    a->entry) == prev,
-	    ("entry %p prev %p inserted prev %p", a->entry, prev,
-	    RB_PREV(dmar_gas_entries_tree, &a->domain->rb_root, a->entry)));
-	KASSERT(RB_NEXT(dmar_gas_entries_tree, &a->domain->rb_root,
-	    a->entry) == next,
-	    ("entry %p next %p inserted next %p", a->entry, next,
-	    RB_NEXT(dmar_gas_entries_tree, &a->domain->rb_root, a->entry)));
 }
 
 static int
-dmar_gas_lowermatch(struct dmar_gas_match_args *a, struct dmar_map_entry *prev)
+dmar_gas_lowermatch(struct dmar_gas_match_args *a, struct dmar_map_entry *entry)
 {
-	struct dmar_map_entry *l;
-	int ret;
+	struct dmar_map_entry *child;
 
-	if (prev->end < a->common->lowaddr) {
-		a->entry->start = roundup2(prev->end + DMAR_PAGE_SIZE,
-		    a->common->alignment);
-		if (dmar_gas_match_one(a, prev, a->common->lowaddr)) {
-			dmar_gas_match_insert(a, prev);
-			return (0);
-		}
+	child = RB_RIGHT(entry, rb_entry);
+	if (child != NULL && entry->end < a->common->lowaddr &&
+	    dmar_gas_match_one(a, entry->end, child->first,
+	    a->common->lowaddr)) {
+		dmar_gas_match_insert(a);
+		return (0);
 	}
-	if (prev->free_down < a->size + a->offset + DMAR_PAGE_SIZE)
+	if (entry->free_down < a->size + a->offset + DMAR_PAGE_SIZE)
 		return (ENOMEM);
-	l = RB_LEFT(prev, rb_entry);
-	if (l != NULL) {
-		ret = dmar_gas_lowermatch(a, l);
-		if (ret == 0)
-			return (0);
+	child = RB_LEFT(entry, rb_entry);
+	if (child != NULL && 0 == dmar_gas_lowermatch(a, child))
+		return (0);
+	if (child != NULL && child->last < a->common->lowaddr &&
+	    dmar_gas_match_one(a, child->last, entry->start,
+	    a->common->lowaddr)) {
+		dmar_gas_match_insert(a);
+		return (0);
 	}
-	l = RB_RIGHT(prev, rb_entry);
-	if (l != NULL)
-		return (dmar_gas_lowermatch(a, l));
+	child = RB_RIGHT(entry, rb_entry);
+	if (child != NULL && 0 == dmar_gas_lowermatch(a, child))
+		return (0);
 	return (ENOMEM);
 }
 
 static int
-dmar_gas_uppermatch(struct dmar_gas_match_args *a)
+dmar_gas_uppermatch(struct dmar_gas_match_args *a, struct dmar_map_entry *entry)
 {
-	struct dmar_map_entry *next, *prev, find_entry;
+	struct dmar_map_entry *child;
 
-	find_entry.start = a->common->highaddr;
-	next = RB_NFIND(dmar_gas_entries_tree, &a->domain->rb_root,
-	    &find_entry);
-	if (next == NULL)
+	if (entry->last < a->common->highaddr)
 		return (ENOMEM);
-	prev = RB_PREV(dmar_gas_entries_tree, &a->domain->rb_root, next);
-	KASSERT(prev != NULL, ("no prev %p %jx", a->domain,
-	    (uintmax_t)find_entry.start));
-	for (;;) {
-		a->entry->start = prev->start + DMAR_PAGE_SIZE;
-		if (a->entry->start < a->common->highaddr)
-			a->entry->start = a->common->highaddr;
-		a->entry->start = roundup2(a->entry->start,
-		    a->common->alignment);
-		if (dmar_gas_match_one(a, prev, a->domain->end)) {
-			dmar_gas_match_insert(a, prev);
-			return (0);
-		}
-
-		/*
-		 * XXXKIB.  This falls back to linear iteration over
-		 * the free space in the high region.  But high
-		 * regions are almost unused, the code should be
-		 * enough to cover the case, although in the
-		 * non-optimal way.
-		 */
-		prev = next;
-		next = RB_NEXT(dmar_gas_entries_tree, &a->domain->rb_root,
-		    prev);
-		KASSERT(next != NULL, ("no next %p %jx", a->domain,
-		    (uintmax_t)find_entry.start));
-		if (next->end >= a->domain->end)
-			return (ENOMEM);
+	child = RB_LEFT(entry, rb_entry);
+	if (child != NULL && 0 == dmar_gas_uppermatch(a, child))
+		return (0);
+	if (child != NULL && child->last >= a->common->highaddr &&
+	    dmar_gas_match_one(a, child->last, entry->start,
+	    a->domain->end)) {
+		dmar_gas_match_insert(a);
+		return (0);
 	}
+	child = RB_RIGHT(entry, rb_entry);
+	if (child != NULL && entry->end >= a->common->highaddr &&
+	    dmar_gas_match_one(a, entry->end, child->first,
+	    a->domain->end)) {
+		dmar_gas_match_insert(a);
+		return (0);
+	}
+	if (child != NULL && 0 == dmar_gas_uppermatch(a, child))
+		return (0);
+	return (ENOMEM);
 }
 
 static int
 dmar_gas_find_space(struct dmar_domain *domain,
     const struct bus_dma_tag_common *common, dmar_gaddr_t size,
     int offset, u_int flags, struct dmar_map_entry *entry)
 {
 	struct dmar_gas_match_args a;
 	int error;
 
 	DMAR_DOMAIN_ASSERT_LOCKED(domain);
 	KASSERT(entry->flags == 0, ("dirty entry %p %p", domain, entry));
 	KASSERT((size & DMAR_PAGE_MASK) == 0, ("size %jx", (uintmax_t)size));
 
 	a.domain = domain;
 	a.size = size;
 	a.offset = offset;
 	a.common = common;
 	a.gas_flags = flags;
 	a.entry = entry;
 
 	/* Handle lower region. */
 	if (common->lowaddr > 0) {
 		error = dmar_gas_lowermatch(&a, RB_ROOT(&domain->rb_root));
 		if (error == 0)
 			return (0);
 		KASSERT(error == ENOMEM,
 		    ("error %d from dmar_gas_lowermatch", error));
 	}
 	/* Handle upper region. */
 	if (common->highaddr >= domain->end)
 		return (ENOMEM);
-	error = dmar_gas_uppermatch(&a);
+	error = dmar_gas_uppermatch(&a, RB_ROOT(&domain->rb_root));
 	KASSERT(error == ENOMEM,
 	    ("error %d from dmar_gas_uppermatch", error));
 	return (error);
 }
 
 static int
 dmar_gas_alloc_region(struct dmar_domain *domain, struct dmar_map_entry *entry,
     u_int flags)
 {
 	struct dmar_map_entry *next, *prev;
 	bool found;
 
 	DMAR_DOMAIN_ASSERT_LOCKED(domain);
 
 	if ((entry->start & DMAR_PAGE_MASK) != 0 ||
 	    (entry->end & DMAR_PAGE_MASK) != 0)
 		return (EINVAL);
 	if (entry->start >= entry->end)
 		return (EINVAL);
 	if (entry->end >= domain->end)
 		return (EINVAL);
 
 	next = RB_NFIND(dmar_gas_entries_tree, &domain->rb_root, entry);
 	KASSERT(next != NULL, ("next must be non-null %p %jx", domain,
 	    (uintmax_t)entry->start));
 	prev = RB_PREV(dmar_gas_entries_tree, &domain->rb_root, next);
 	/* prev could be NULL */
 
 	/*
 	 * Adapt to broken BIOSes which specify overlapping RMRR
 	 * entries.
 	 *
 	 * XXXKIB: this does not handle a case when prev or next
 	 * entries are completely covered by the current one, which
 	 * extends both ways.
 	 */
 	if (prev != NULL && prev->end > entry->start &&
 	    (prev->flags & DMAR_MAP_ENTRY_PLACE) == 0) {
 		if ((flags & DMAR_GM_RMRR) == 0 ||
 		    (prev->flags & DMAR_MAP_ENTRY_RMRR) == 0)
 			return (EBUSY);
 		entry->start = prev->end;
 	}
 	if (next->start < entry->end &&
 	    (next->flags & DMAR_MAP_ENTRY_PLACE) == 0) {
 		if ((flags & DMAR_GM_RMRR) == 0 ||
 		    (next->flags & DMAR_MAP_ENTRY_RMRR) == 0)
 			return (EBUSY);
 		entry->end = next->start;
 	}
 	if (entry->end == entry->start)
 		return (0);
 
 	if (prev != NULL && prev->end > entry->start) {
 		/* This assumes that prev is the placeholder entry. */
 		dmar_gas_rb_remove(domain, prev);
 		prev = NULL;
 	}
 	if (next->start < entry->end) {
 		dmar_gas_rb_remove(domain, next);
 		next = NULL;
 	}
 
 	found = dmar_gas_rb_insert(domain, entry);
 	KASSERT(found, ("found RMRR dup %p start %jx end %jx",
 	    domain, (uintmax_t)entry->start, (uintmax_t)entry->end));
 	if ((flags & DMAR_GM_RMRR) != 0)
 		entry->flags = DMAR_MAP_ENTRY_RMRR;
 
 #ifdef INVARIANTS
 	struct dmar_map_entry *ip, *in;
 	ip = RB_PREV(dmar_gas_entries_tree, &domain->rb_root, entry);
 	in = RB_NEXT(dmar_gas_entries_tree, &domain->rb_root, entry);
 	KASSERT(prev == NULL || ip == prev,
 	    ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)",
 	    entry, entry->start, entry->end, prev,
 	    prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end,
 	    ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end));
 	KASSERT(next == NULL || in == next,
 	    ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)",
 	    entry, entry->start, entry->end, next,
 	    next == NULL ? 0 : next->start, next == NULL ? 0 : next->end,
 	    in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end));
 #endif
 
 	return (0);
 }
 
 void
 dmar_gas_free_space(struct dmar_domain *domain, struct dmar_map_entry *entry)
 {
 
 	DMAR_DOMAIN_ASSERT_LOCKED(domain);
 	KASSERT((entry->flags & (DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_RMRR |
 	    DMAR_MAP_ENTRY_MAP)) == DMAR_MAP_ENTRY_MAP,
 	    ("permanent entry %p %p", domain, entry));
 
 	dmar_gas_rb_remove(domain, entry);
 	entry->flags &= ~DMAR_MAP_ENTRY_MAP;
 #ifdef INVARIANTS
 	if (dmar_check_free)
 		dmar_gas_check_free(domain);
 #endif
 }
 
 void
 dmar_gas_free_region(struct dmar_domain *domain, struct dmar_map_entry *entry)
 {
 	struct dmar_map_entry *next, *prev;
 
 	DMAR_DOMAIN_ASSERT_LOCKED(domain);
 	KASSERT((entry->flags & (DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_RMRR |
 	    DMAR_MAP_ENTRY_MAP)) == DMAR_MAP_ENTRY_RMRR,
 	    ("non-RMRR entry %p %p", domain, entry));
 
 	prev = RB_PREV(dmar_gas_entries_tree, &domain->rb_root, entry);
 	next = RB_NEXT(dmar_gas_entries_tree, &domain->rb_root, entry);
 	dmar_gas_rb_remove(domain, entry);
 	entry->flags &= ~DMAR_MAP_ENTRY_RMRR;
 
 	if (prev == NULL)
 		dmar_gas_rb_insert(domain, domain->first_place);
 	if (next == NULL)
 		dmar_gas_rb_insert(domain, domain->last_place);
 }
 
 int
 dmar_gas_map(struct dmar_domain *domain,
     const struct bus_dma_tag_common *common, dmar_gaddr_t size, int offset,
     u_int eflags, u_int flags, vm_page_t *ma, struct dmar_map_entry **res)
 {
 	struct dmar_map_entry *entry;
 	int error;
 
 	KASSERT((flags & ~(DMAR_GM_CANWAIT | DMAR_GM_CANSPLIT)) == 0,
 	    ("invalid flags 0x%x", flags));
 
 	entry = dmar_gas_alloc_entry(domain, (flags & DMAR_GM_CANWAIT) != 0 ?
 	    DMAR_PGF_WAITOK : 0);
 	if (entry == NULL)
 		return (ENOMEM);
 	DMAR_DOMAIN_LOCK(domain);
 	error = dmar_gas_find_space(domain, common, size, offset, flags,
 	    entry);
 	if (error == ENOMEM) {
 		DMAR_DOMAIN_UNLOCK(domain);
 		dmar_gas_free_entry(domain, entry);
 		return (error);
 	}
 #ifdef INVARIANTS
 	if (dmar_check_free)
 		dmar_gas_check_free(domain);
 #endif
 	KASSERT(error == 0,
 	    ("unexpected error %d from dmar_gas_find_entry", error));
 	KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx",
 	    (uintmax_t)entry->end, (uintmax_t)domain->end));
 	entry->flags |= eflags;
 	DMAR_DOMAIN_UNLOCK(domain);
 
 	error = domain_map_buf(domain, entry->start, entry->end - entry->start,
 	    ma,
 	    ((eflags & DMAR_MAP_ENTRY_READ) != 0 ? DMAR_PTE_R : 0) |
 	    ((eflags & DMAR_MAP_ENTRY_WRITE) != 0 ? DMAR_PTE_W : 0) |
 	    ((eflags & DMAR_MAP_ENTRY_SNOOP) != 0 ? DMAR_PTE_SNP : 0) |
 	    ((eflags & DMAR_MAP_ENTRY_TM) != 0 ? DMAR_PTE_TM : 0),
 	    (flags & DMAR_GM_CANWAIT) != 0 ? DMAR_PGF_WAITOK : 0);
 	if (error == ENOMEM) {
 		dmar_domain_unload_entry(entry, true);
 		return (error);
 	}
 	KASSERT(error == 0,
 	    ("unexpected error %d from domain_map_buf", error));
 
 	*res = entry;
 	return (0);
 }
 
 int
 dmar_gas_map_region(struct dmar_domain *domain, struct dmar_map_entry *entry,
     u_int eflags, u_int flags, vm_page_t *ma)
 {
 	dmar_gaddr_t start;
 	int error;
 
 	KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain,
 	    entry, entry->flags));
 	KASSERT((flags & ~(DMAR_GM_CANWAIT | DMAR_GM_RMRR)) == 0,
 	    ("invalid flags 0x%x", flags));
 
 	start = entry->start;
 	DMAR_DOMAIN_LOCK(domain);
 	error = dmar_gas_alloc_region(domain, entry, flags);
 	if (error != 0) {
 		DMAR_DOMAIN_UNLOCK(domain);
 		return (error);
 	}
 	entry->flags |= eflags;
 	DMAR_DOMAIN_UNLOCK(domain);
 	if (entry->end == entry->start)
 		return (0);
 
 	error = domain_map_buf(domain, entry->start, entry->end - entry->start,
 	    ma + OFF_TO_IDX(start - entry->start),
 	    ((eflags & DMAR_MAP_ENTRY_READ) != 0 ? DMAR_PTE_R : 0) |
 	    ((eflags & DMAR_MAP_ENTRY_WRITE) != 0 ? DMAR_PTE_W : 0) |
 	    ((eflags & DMAR_MAP_ENTRY_SNOOP) != 0 ? DMAR_PTE_SNP : 0) |
 	    ((eflags & DMAR_MAP_ENTRY_TM) != 0 ? DMAR_PTE_TM : 0),
 	    (flags & DMAR_GM_CANWAIT) != 0 ? DMAR_PGF_WAITOK : 0);
 	if (error == ENOMEM) {
 		dmar_domain_unload_entry(entry, false);
 		return (error);
 	}
 	KASSERT(error == 0,
 	    ("unexpected error %d from domain_map_buf", error));
 
 	return (0);
 }
 
 int
 dmar_gas_reserve_region(struct dmar_domain *domain, dmar_gaddr_t start,
     dmar_gaddr_t end)
 {
 	struct dmar_map_entry *entry;
 	int error;
 
 	entry = dmar_gas_alloc_entry(domain, DMAR_PGF_WAITOK);
 	entry->start = start;
 	entry->end = end;
 	DMAR_DOMAIN_LOCK(domain);
 	error = dmar_gas_alloc_region(domain, entry, DMAR_GM_CANWAIT);
 	if (error == 0)
 		entry->flags |= DMAR_MAP_ENTRY_UNMAPPED;
 	DMAR_DOMAIN_UNLOCK(domain);
 	if (error != 0)
 		dmar_gas_free_entry(domain, entry);
 	return (error);
 }
Index: projects/clang1000-import/tests/sys/kern/ptrace_test.c
===================================================================
--- projects/clang1000-import/tests/sys/kern/ptrace_test.c	(revision 357178)
+++ projects/clang1000-import/tests/sys/kern/ptrace_test.c	(revision 357179)
@@ -1,4304 +1,4307 @@
 /*-
  * Copyright (c) 2015 John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/cpuset.h>
 #include <sys/event.h>
 #include <sys/file.h>
 #include <sys/time.h>
 #include <sys/procctl.h>
 #include <sys/procdesc.h>
 #define	_WANT_MIPS_REGNUM
 #include <sys/ptrace.h>
 #include <sys/queue.h>
 #include <sys/runq.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/user.h>
 #include <sys/wait.h>
 #include <errno.h>
 #include <machine/cpufunc.h>
 #include <pthread.h>
 #include <sched.h>
 #include <semaphore.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <atf-c.h>
 
 /*
  * Architectures with a user-visible breakpoint().
  */
 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) ||	\
     defined(__i386__) || defined(__mips__) || defined(__riscv) ||	\
     defined(__sparc64__)
 #define	HAVE_BREAKPOINT
 #endif
 
 /*
  * Adjust PC to skip over a breakpoint when stopped for a breakpoint trap.
  */
 #ifdef HAVE_BREAKPOINT
 #if defined(__aarch64__)
 #define	SKIP_BREAK(reg)	((reg)->elr += 4)
 #elif defined(__amd64__) || defined(__i386__)
 #define	SKIP_BREAK(reg)
 #elif defined(__arm__)
 #define	SKIP_BREAK(reg)	((reg)->r_pc += 4)
 #elif defined(__mips__)
 #define	SKIP_BREAK(reg)	((reg)->r_regs[PC] += 4)
 #elif defined(__riscv)
 #define	SKIP_BREAK(reg)	((reg)->sepc += 4)
 #elif defined(__sparc64__)
 #define	SKIP_BREAK(reg)	do {						\
 	(reg)->r_tpc = (reg)->r_tnpc + 4;				\
 	(reg)->r_tnpc += 8;						\
 } while (0)
 #endif
 #endif
 
 /*
  * A variant of ATF_REQUIRE that is suitable for use in child
  * processes.  This only works if the parent process is tripped up by
  * the early exit and fails some requirement itself.
  */
 #define	CHILD_REQUIRE(exp) do {						\
 		if (!(exp))						\
 			child_fail_require(__FILE__, __LINE__,		\
 			    #exp " not met");				\
 	} while (0)
 
 static __dead2 void
 child_fail_require(const char *file, int line, const char *str)
 {
 	char buf[128];
 
 	snprintf(buf, sizeof(buf), "%s:%d: %s\n", file, line, str);
 	write(2, buf, strlen(buf));
 	_exit(32);
 }
 
 static void
 trace_me(void)
 {
 
 	/* Attach the parent process as a tracer of this process. */
 	CHILD_REQUIRE(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1);
 
 	/* Trigger a stop. */
 	raise(SIGSTOP);
 }
 
 static void
 attach_child(pid_t pid)
 {
 	pid_t wpid;
 	int status;
 
 	ATF_REQUIRE(ptrace(PT_ATTACH, pid, NULL, 0) == 0);
 
 	wpid = waitpid(pid, &status, 0);
 	ATF_REQUIRE(wpid == pid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 }
 
 static void
 wait_for_zombie(pid_t pid)
 {
 
 	/*
 	 * Wait for a process to exit.  This is kind of gross, but
 	 * there is not a better way.
 	 *
 	 * Prior to r325719, the kern.proc.pid.<pid> sysctl failed
 	 * with ESRCH.  After that change, a valid struct kinfo_proc
 	 * is returned for zombies with ki_stat set to SZOMB.
 	 */
 	for (;;) {
 		struct kinfo_proc kp;
 		size_t len;
 		int mib[4];
 
 		mib[0] = CTL_KERN;
 		mib[1] = KERN_PROC;
 		mib[2] = KERN_PROC_PID;
 		mib[3] = pid;
 		len = sizeof(kp);
 		if (sysctl(mib, nitems(mib), &kp, &len, NULL, 0) == -1) {
 			ATF_REQUIRE(errno == ESRCH);
 			break;
 		}
 		if (kp.ki_stat == SZOMB)
 			break;
 		usleep(5000);
 	}
 }
 
 /*
  * Verify that a parent debugger process "sees" the exit of a debugged
  * process exactly once when attached via PT_TRACE_ME.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__parent_wait_after_trace_me);
 ATF_TC_BODY(ptrace__parent_wait_after_trace_me, tc)
 {
 	pid_t child, wpid;
 	int status;
 
 	ATF_REQUIRE((child = fork()) != -1);
 	if (child == 0) {
 		/* Child process. */
 		trace_me();
 
 		_exit(1);
 	}
 
 	/* Parent process. */
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(child, &status, 0);
 	ATF_REQUIRE(wpid == child);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1);
 
 	/* The second wait() should report the exit status. */
 	wpid = waitpid(child, &status, 0);
 	ATF_REQUIRE(wpid == child);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	/* The child should no longer exist. */
 	wpid = waitpid(child, &status, 0);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a parent debugger process "sees" the exit of a debugged
  * process exactly once when attached via PT_ATTACH.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__parent_wait_after_attach);
 ATF_TC_BODY(ptrace__parent_wait_after_attach, tc)
 {
 	pid_t child, wpid;
 	int cpipe[2], status;
 	char c;
 
 	ATF_REQUIRE(pipe(cpipe) == 0);
 	ATF_REQUIRE((child = fork()) != -1);
 	if (child == 0) {
 		/* Child process. */
 		close(cpipe[0]);
 
 		/* Wait for the parent to attach. */
 		CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == 0);
 
 		_exit(1);
 	}
 	close(cpipe[1]);
 
 	/* Parent process. */
 
 	/* Attach to the child process. */
 	attach_child(child);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1);
 
 	/* Signal the child to exit. */
 	close(cpipe[0]);
 
 	/* The second wait() should report the exit status. */
 	wpid = waitpid(child, &status, 0);
 	ATF_REQUIRE(wpid == child);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	/* The child should no longer exist. */
 	wpid = waitpid(child, &status, 0);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a parent process "sees" the exit of a debugged process only
  * after the debugger has seen it.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__parent_sees_exit_after_child_debugger);
 ATF_TC_BODY(ptrace__parent_sees_exit_after_child_debugger, tc)
 {
 	pid_t child, debugger, wpid;
 	int cpipe[2], dpipe[2], status;
 	char c;
 
 	if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false))
 		atf_tc_skip("https://bugs.freebsd.org/239399");
 
 	ATF_REQUIRE(pipe(cpipe) == 0);
 	ATF_REQUIRE((child = fork()) != -1);
 
 	if (child == 0) {
 		/* Child process. */
 		close(cpipe[0]);
 
 		/* Wait for parent to be ready. */
 		CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == sizeof(c));
 
 		_exit(1);
 	}
 	close(cpipe[1]);
 
 	ATF_REQUIRE(pipe(dpipe) == 0);
 	ATF_REQUIRE((debugger = fork()) != -1);
 
 	if (debugger == 0) {
 		/* Debugger process. */
 		close(dpipe[0]);
 
 		CHILD_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) != -1);
 
 		wpid = waitpid(child, &status, 0);
 		CHILD_REQUIRE(wpid == child);
 		CHILD_REQUIRE(WIFSTOPPED(status));
 		CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 		CHILD_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1);
 
 		/* Signal parent that debugger is attached. */
 		CHILD_REQUIRE(write(dpipe[1], &c, sizeof(c)) == sizeof(c));
 
 		/* Wait for parent's failed wait. */
 		CHILD_REQUIRE(read(dpipe[1], &c, sizeof(c)) == 0);
 
 		wpid = waitpid(child, &status, 0);
 		CHILD_REQUIRE(wpid == child);
 		CHILD_REQUIRE(WIFEXITED(status));
 		CHILD_REQUIRE(WEXITSTATUS(status) == 1);
 
 		_exit(0);
 	}
 	close(dpipe[1]);
 
 	/* Parent process. */
 
 	/* Wait for the debugger to attach to the child. */
 	ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == sizeof(c));
 
 	/* Release the child. */
 	ATF_REQUIRE(write(cpipe[0], &c, sizeof(c)) == sizeof(c));
 	ATF_REQUIRE(read(cpipe[0], &c, sizeof(c)) == 0);
 	close(cpipe[0]);
 
 	wait_for_zombie(child);
 
 	/*
 	 * This wait should return a pid of 0 to indicate no status to
 	 * report.  The parent should see the child as non-exited
 	 * until the debugger sees the exit.
 	 */
 	wpid = waitpid(child, &status, WNOHANG);
 	ATF_REQUIRE(wpid == 0);
 
 	/* Signal the debugger to wait for the child. */
 	close(dpipe[0]);
 
 	/* Wait for the debugger. */
 	wpid = waitpid(debugger, &status, 0);
 	ATF_REQUIRE(wpid == debugger);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	/* The child process should now be ready. */
 	wpid = waitpid(child, &status, WNOHANG);
 	ATF_REQUIRE(wpid == child);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 }
 
 /*
  * Verify that a parent process "sees" the exit of a debugged process
  * only after a non-direct-child debugger has seen it.  In particular,
  * various wait() calls in the parent must avoid failing with ESRCH by
  * checking the parent's orphan list for the debugee.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__parent_sees_exit_after_unrelated_debugger);
 ATF_TC_BODY(ptrace__parent_sees_exit_after_unrelated_debugger, tc)
 {
 	pid_t child, debugger, fpid, wpid;
 	int cpipe[2], dpipe[2], status;
 	char c;
 
 	ATF_REQUIRE(pipe(cpipe) == 0);
 	ATF_REQUIRE((child = fork()) != -1);
 
 	if (child == 0) {
 		/* Child process. */
 		close(cpipe[0]);
 
 		/* Wait for parent to be ready. */
 		CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == sizeof(c));
 
 		_exit(1);
 	}
 	close(cpipe[1]);
 
 	ATF_REQUIRE(pipe(dpipe) == 0);
 	ATF_REQUIRE((debugger = fork()) != -1);
 
 	if (debugger == 0) {
 		/* Debugger parent. */
 
 		/*
 		 * Fork again and drop the debugger parent so that the
 		 * debugger is not a child of the main parent.
 		 */
 		CHILD_REQUIRE((fpid = fork()) != -1);
 		if (fpid != 0)
 			_exit(2);
 
 		/* Debugger process. */
 		close(dpipe[0]);
 
 		CHILD_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) != -1);
 
 		wpid = waitpid(child, &status, 0);
 		CHILD_REQUIRE(wpid == child);
 		CHILD_REQUIRE(WIFSTOPPED(status));
 		CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 		CHILD_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1);
 
 		/* Signal parent that debugger is attached. */
 		CHILD_REQUIRE(write(dpipe[1], &c, sizeof(c)) == sizeof(c));
 
 		/* Wait for parent's failed wait. */
 		CHILD_REQUIRE(read(dpipe[1], &c, sizeof(c)) == sizeof(c));
 
 		wpid = waitpid(child, &status, 0);
 		CHILD_REQUIRE(wpid == child);
 		CHILD_REQUIRE(WIFEXITED(status));
 		CHILD_REQUIRE(WEXITSTATUS(status) == 1);
 
 		_exit(0);
 	}
 	close(dpipe[1]);
 
 	/* Parent process. */
 
 	/* Wait for the debugger parent process to exit. */
 	wpid = waitpid(debugger, &status, 0);
 	ATF_REQUIRE(wpid == debugger);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	/* A WNOHANG wait here should see the non-exited child. */
 	wpid = waitpid(child, &status, WNOHANG);
 	ATF_REQUIRE(wpid == 0);
 
 	/* Wait for the debugger to attach to the child. */
 	ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == sizeof(c));
 
 	/* Release the child. */
 	ATF_REQUIRE(write(cpipe[0], &c, sizeof(c)) == sizeof(c));
 	ATF_REQUIRE(read(cpipe[0], &c, sizeof(c)) == 0);
 	close(cpipe[0]);
 
 	wait_for_zombie(child);
 
 	/*
 	 * This wait should return a pid of 0 to indicate no status to
 	 * report.  The parent should see the child as non-exited
 	 * until the debugger sees the exit.
 	 */
 	wpid = waitpid(child, &status, WNOHANG);
 	ATF_REQUIRE(wpid == 0);
 
 	/* Signal the debugger to wait for the child. */
 	ATF_REQUIRE(write(dpipe[0], &c, sizeof(c)) == sizeof(c));
 
 	/* Wait for the debugger. */
 	ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == 0);
 	close(dpipe[0]);
 
 	/* The child process should now be ready. */
 	wpid = waitpid(child, &status, WNOHANG);
 	ATF_REQUIRE(wpid == child);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 }
 
 /*
  * Make sure that we can collect the exit status of an orphaned process.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__parent_exits_before_child);
 ATF_TC_BODY(ptrace__parent_exits_before_child, tc)
 {
 	ssize_t n;
 	int cpipe1[2], cpipe2[2], gcpipe[2], status;
 	pid_t child, gchild;
 
 	ATF_REQUIRE(pipe(cpipe1) == 0);
 	ATF_REQUIRE(pipe(cpipe2) == 0);
 	ATF_REQUIRE(pipe(gcpipe) == 0);
 
 	ATF_REQUIRE(procctl(P_PID, getpid(), PROC_REAP_ACQUIRE, NULL) == 0);
 
 	ATF_REQUIRE((child = fork()) != -1);
 	if (child == 0) {
 		CHILD_REQUIRE((gchild = fork()) != -1);
 		if (gchild == 0) {
 			status = 1;
 			do {
 				n = read(gcpipe[0], &status, sizeof(status));
 			} while (n == -1 && errno == EINTR);
 			_exit(status);
 		}
 
 		CHILD_REQUIRE(write(cpipe1[1], &gchild, sizeof(gchild)) ==
 		    sizeof(gchild));
 		CHILD_REQUIRE(read(cpipe2[0], &status, sizeof(status)) ==
 		    sizeof(status));
 		_exit(status);
 	}
 
 	ATF_REQUIRE(read(cpipe1[0], &gchild, sizeof(gchild)) == sizeof(gchild));
 
 	ATF_REQUIRE(ptrace(PT_ATTACH, gchild, NULL, 0) == 0);
 
 	status = 0;
 	ATF_REQUIRE(write(cpipe2[1], &status, sizeof(status)) ==
 	    sizeof(status));
 	ATF_REQUIRE(waitpid(child, &status, 0) == child);
 	ATF_REQUIRE(WIFEXITED(status) && WEXITSTATUS(status) == 0);
 
 	status = 0;
 	ATF_REQUIRE(write(gcpipe[1], &status, sizeof(status)) ==
 	    sizeof(status));
 	ATF_REQUIRE(waitpid(gchild, &status, 0) == gchild);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(ptrace(PT_DETACH, gchild, (caddr_t)1, 0) == 0);
 	ATF_REQUIRE(waitpid(gchild, &status, 0) == gchild);
 	ATF_REQUIRE(WIFEXITED(status) && WEXITSTATUS(status) == 0);
 
 	ATF_REQUIRE(close(cpipe1[0]) == 0);
 	ATF_REQUIRE(close(cpipe1[1]) == 0);
 	ATF_REQUIRE(close(cpipe2[0]) == 0);
 	ATF_REQUIRE(close(cpipe2[1]) == 0);
 	ATF_REQUIRE(close(gcpipe[0]) == 0);
 	ATF_REQUIRE(close(gcpipe[1]) == 0);
 }
 
 /*
  * The parent process should always act the same regardless of how the
  * debugger is attached to it.
  */
 static __dead2 void
 follow_fork_parent(bool use_vfork)
 {
 	pid_t fpid, wpid;
 	int status;
 
 	if (use_vfork)
 		CHILD_REQUIRE((fpid = vfork()) != -1);
 	else
 		CHILD_REQUIRE((fpid = fork()) != -1);
 
 	if (fpid == 0)
 		/* Child */
 		_exit(2);
 
 	wpid = waitpid(fpid, &status, 0);
 	CHILD_REQUIRE(wpid == fpid);
 	CHILD_REQUIRE(WIFEXITED(status));
 	CHILD_REQUIRE(WEXITSTATUS(status) == 2);
 
 	_exit(1);
 }
 
 /*
  * Helper routine for follow fork tests.  This waits for two stops
  * that report both "sides" of a fork.  It returns the pid of the new
  * child process.
  */
 static pid_t
 handle_fork_events(pid_t parent, struct ptrace_lwpinfo *ppl)
 {
 	struct ptrace_lwpinfo pl;
 	bool fork_reported[2];
 	pid_t child, wpid;
 	int i, status;
 
 	fork_reported[0] = false;
 	fork_reported[1] = false;
 	child = -1;
 	
 	/*
 	 * Each process should report a fork event.  The parent should
 	 * report a PL_FLAG_FORKED event, and the child should report
 	 * a PL_FLAG_CHILD event.
 	 */
 	for (i = 0; i < 2; i++) {
 		wpid = wait(&status);
 		ATF_REQUIRE(wpid > 0);
 		ATF_REQUIRE(WIFSTOPPED(status));
 
 		ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 		    sizeof(pl)) != -1);
 		ATF_REQUIRE((pl.pl_flags & (PL_FLAG_FORKED | PL_FLAG_CHILD)) !=
 		    0);
 		ATF_REQUIRE((pl.pl_flags & (PL_FLAG_FORKED | PL_FLAG_CHILD)) !=
 		    (PL_FLAG_FORKED | PL_FLAG_CHILD));
 		if (pl.pl_flags & PL_FLAG_CHILD) {
 			ATF_REQUIRE(wpid != parent);
 			ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 			ATF_REQUIRE(!fork_reported[1]);
 			if (child == -1)
 				child = wpid;
 			else
 				ATF_REQUIRE(child == wpid);
 			if (ppl != NULL)
 				ppl[1] = pl;
 			fork_reported[1] = true;
 		} else {
 			ATF_REQUIRE(wpid == parent);
 			ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 			ATF_REQUIRE(!fork_reported[0]);
 			if (child == -1)
 				child = pl.pl_child_pid;
 			else
 				ATF_REQUIRE(child == pl.pl_child_pid);
 			if (ppl != NULL)
 				ppl[0] = pl;
 			fork_reported[0] = true;
 		}
 	}
 
 	return (child);
 }
 
 /*
  * Verify that a new child process is stopped after a followed fork and
  * that the traced parent sees the exit of the child after the debugger
  * when both processes remain attached to the debugger.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_both_attached);
 ATF_TC_BODY(ptrace__follow_fork_both_attached, tc)
 {
 	pid_t children[2], fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		follow_fork_parent(false);
 	}
 
 	/* Parent process. */
 	children[0] = fpid;
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(children[0], &status, 0);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	children[1] = handle_fork_events(children[0], NULL);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * The child can't exit until the grandchild reports status, so the
 	 * grandchild should report its exit first to the debugger.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[1]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a new child process is stopped after a followed fork
  * and that the traced parent sees the exit of the child when the new
  * child process is detached after it reports its fork.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_child_detached);
 ATF_TC_BODY(ptrace__follow_fork_child_detached, tc)
 {
 	pid_t children[2], fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		follow_fork_parent(false);
 	}
 
 	/* Parent process. */
 	children[0] = fpid;
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(children[0], &status, 0);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	children[1] = handle_fork_events(children[0], NULL);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_DETACH, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * Should not see any status from the grandchild now, only the
 	 * child.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a new child process is stopped after a followed fork
  * and that the traced parent sees the exit of the child when the
  * traced parent is detached after the fork.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_parent_detached);
 ATF_TC_BODY(ptrace__follow_fork_parent_detached, tc)
 {
 	pid_t children[2], fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		follow_fork_parent(false);
 	}
 
 	/* Parent process. */
 	children[0] = fpid;
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(children[0], &status, 0);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	children[1] = handle_fork_events(children[0], NULL);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE(ptrace(PT_DETACH, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * The child can't exit until the grandchild reports status, so the
 	 * grandchild should report its exit first to the debugger.
 	 *
 	 * Even though the child process is detached, it is still a
 	 * child of the debugger, so it will still report it's exit
 	 * after the grandchild.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[1]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void
 attach_fork_parent(int cpipe[2])
 {
 	pid_t fpid;
 
 	close(cpipe[0]);
 
 	/* Double-fork to disassociate from the debugger. */
 	CHILD_REQUIRE((fpid = fork()) != -1);
 	if (fpid != 0)
 		_exit(3);
 	
 	/* Send the pid of the disassociated child to the debugger. */
 	fpid = getpid();
 	CHILD_REQUIRE(write(cpipe[1], &fpid, sizeof(fpid)) == sizeof(fpid));
 
 	/* Wait for the debugger to attach. */
 	CHILD_REQUIRE(read(cpipe[1], &fpid, sizeof(fpid)) == 0);
 }
 
 /*
  * Verify that a new child process is stopped after a followed fork and
  * that the traced parent sees the exit of the child after the debugger
  * when both processes remain attached to the debugger.  In this test
  * the parent that forks is not a direct child of the debugger.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_both_attached_unrelated_debugger);
 ATF_TC_BODY(ptrace__follow_fork_both_attached_unrelated_debugger, tc)
 {
 	pid_t children[2], fpid, wpid;
 	int cpipe[2], status;
 
 	if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false))
 		atf_tc_skip("https://bugs.freebsd.org/239397");
 
 	ATF_REQUIRE(pipe(cpipe) == 0);
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		attach_fork_parent(cpipe);
 		follow_fork_parent(false);
 	}
 
 	/* Parent process. */
 	close(cpipe[1]);
 
 	/* Wait for the direct child to exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 3);
 
 	/* Read the pid of the fork parent. */
 	ATF_REQUIRE(read(cpipe[0], &children[0], sizeof(children[0])) ==
 	    sizeof(children[0]));
 
 	/* Attach to the fork parent. */
 	attach_child(children[0]);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the fork parent ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	/* Signal the fork parent to continue. */
 	close(cpipe[0]);
 
 	children[1] = handle_fork_events(children[0], NULL);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * The fork parent can't exit until the child reports status,
 	 * so the child should report its exit first to the debugger.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[1]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a new child process is stopped after a followed fork
  * and that the traced parent sees the exit of the child when the new
  * child process is detached after it reports its fork.  In this test
  * the parent that forks is not a direct child of the debugger.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_child_detached_unrelated_debugger);
 ATF_TC_BODY(ptrace__follow_fork_child_detached_unrelated_debugger, tc)
 {
 	pid_t children[2], fpid, wpid;
 	int cpipe[2], status;
 
 	if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false))
 		atf_tc_skip("https://bugs.freebsd.org/239292");
 
 	ATF_REQUIRE(pipe(cpipe) == 0);
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		attach_fork_parent(cpipe);
 		follow_fork_parent(false);
 	}
 
 	/* Parent process. */
 	close(cpipe[1]);
 
 	/* Wait for the direct child to exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 3);
 
 	/* Read the pid of the fork parent. */
 	ATF_REQUIRE(read(cpipe[0], &children[0], sizeof(children[0])) ==
 	    sizeof(children[0]));
 
 	/* Attach to the fork parent. */
 	attach_child(children[0]);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the fork parent ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	/* Signal the fork parent to continue. */
 	close(cpipe[0]);
 
 	children[1] = handle_fork_events(children[0], NULL);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_DETACH, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * Should not see any status from the child now, only the fork
 	 * parent.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a new child process is stopped after a followed fork
  * and that the traced parent sees the exit of the child when the
  * traced parent is detached after the fork.  In this test the parent
  * that forks is not a direct child of the debugger.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_parent_detached_unrelated_debugger);
 ATF_TC_BODY(ptrace__follow_fork_parent_detached_unrelated_debugger, tc)
 {
 	pid_t children[2], fpid, wpid;
 	int cpipe[2], status;
 
 	if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false))
 		atf_tc_skip("https://bugs.freebsd.org/239425");
 
 	ATF_REQUIRE(pipe(cpipe) == 0);
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		attach_fork_parent(cpipe);
 		follow_fork_parent(false);
 	}
 
 	/* Parent process. */
 	close(cpipe[1]);
 
 	/* Wait for the direct child to exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 3);
 
 	/* Read the pid of the fork parent. */
 	ATF_REQUIRE(read(cpipe[0], &children[0], sizeof(children[0])) ==
 	    sizeof(children[0]));
 
 	/* Attach to the fork parent. */
 	attach_child(children[0]);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the fork parent ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	/* Signal the fork parent to continue. */
 	close(cpipe[0]);
 
 	children[1] = handle_fork_events(children[0], NULL);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE(ptrace(PT_DETACH, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * Should not see any status from the fork parent now, only
 	 * the child.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[1]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a child process does not see an unrelated debugger as its
  * parent but sees its original parent process.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__getppid);
 ATF_TC_BODY(ptrace__getppid, tc)
 {
 	pid_t child, debugger, ppid, wpid;
 	int cpipe[2], dpipe[2], status;
 	char c;
 
 	if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false))
 		atf_tc_skip("https://bugs.freebsd.org/240510");
 
 
 	ATF_REQUIRE(pipe(cpipe) == 0);
 	ATF_REQUIRE((child = fork()) != -1);
 
 	if (child == 0) {
 		/* Child process. */
 		close(cpipe[0]);
 
 		/* Wait for parent to be ready. */
 		CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == sizeof(c));
 
 		/* Report the parent PID to the parent. */
 		ppid = getppid();
 		CHILD_REQUIRE(write(cpipe[1], &ppid, sizeof(ppid)) ==
 		    sizeof(ppid));
 
 		_exit(1);
 	}
 	close(cpipe[1]);
 
 	ATF_REQUIRE(pipe(dpipe) == 0);
 	ATF_REQUIRE((debugger = fork()) != -1);
 
 	if (debugger == 0) {
 		/* Debugger process. */
 		close(dpipe[0]);
 
 		CHILD_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) != -1);
 
 		wpid = waitpid(child, &status, 0);
 		CHILD_REQUIRE(wpid == child);
 		CHILD_REQUIRE(WIFSTOPPED(status));
 		CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 		CHILD_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1);
 
 		/* Signal parent that debugger is attached. */
 		CHILD_REQUIRE(write(dpipe[1], &c, sizeof(c)) == sizeof(c));
 
 		/* Wait for traced child to exit. */
 		wpid = waitpid(child, &status, 0);
 		CHILD_REQUIRE(wpid == child);
 		CHILD_REQUIRE(WIFEXITED(status));
 		CHILD_REQUIRE(WEXITSTATUS(status) == 1);
 
 		_exit(0);
 	}
 	close(dpipe[1]);
 
 	/* Parent process. */
 
 	/* Wait for the debugger to attach to the child. */
 	ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == sizeof(c));
 
 	/* Release the child. */
 	ATF_REQUIRE(write(cpipe[0], &c, sizeof(c)) == sizeof(c));
 
 	/* Read the parent PID from the child. */
 	ATF_REQUIRE(read(cpipe[0], &ppid, sizeof(ppid)) == sizeof(ppid));
 	close(cpipe[0]);
 
 	ATF_REQUIRE(ppid == getpid());
 
 	/* Wait for the debugger. */
 	wpid = waitpid(debugger, &status, 0);
 	ATF_REQUIRE(wpid == debugger);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	/* The child process should now be ready. */
 	wpid = waitpid(child, &status, WNOHANG);
 	ATF_REQUIRE(wpid == child);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 }
 
 /*
  * Verify that pl_syscall_code in struct ptrace_lwpinfo for a new
  * child process created via fork() reports the correct value.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__new_child_pl_syscall_code_fork);
 ATF_TC_BODY(ptrace__new_child_pl_syscall_code_fork, tc)
 {
 	struct ptrace_lwpinfo pl[2];
 	pid_t children[2], fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		follow_fork_parent(false);
 	}
 
 	/* Parent process. */
 	children[0] = fpid;
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(children[0], &status, 0);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	/* Wait for both halves of the fork event to get reported. */
 	children[1] = handle_fork_events(children[0], pl);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_SCX) != 0);
 	ATF_REQUIRE((pl[1].pl_flags & PL_FLAG_SCX) != 0);
 	ATF_REQUIRE(pl[0].pl_syscall_code == SYS_fork);
 	ATF_REQUIRE(pl[0].pl_syscall_code == pl[1].pl_syscall_code);
 	ATF_REQUIRE(pl[0].pl_syscall_narg == pl[1].pl_syscall_narg);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * The child can't exit until the grandchild reports status, so the
 	 * grandchild should report its exit first to the debugger.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[1]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that pl_syscall_code in struct ptrace_lwpinfo for a new
  * child process created via vfork() reports the correct value.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__new_child_pl_syscall_code_vfork);
 ATF_TC_BODY(ptrace__new_child_pl_syscall_code_vfork, tc)
 {
 	struct ptrace_lwpinfo pl[2];
 	pid_t children[2], fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		follow_fork_parent(true);
 	}
 
 	/* Parent process. */
 	children[0] = fpid;
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(children[0], &status, 0);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	/* Wait for both halves of the fork event to get reported. */
 	children[1] = handle_fork_events(children[0], pl);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_SCX) != 0);
 	ATF_REQUIRE((pl[1].pl_flags & PL_FLAG_SCX) != 0);
 	ATF_REQUIRE(pl[0].pl_syscall_code == SYS_vfork);
 	ATF_REQUIRE(pl[0].pl_syscall_code == pl[1].pl_syscall_code);
 	ATF_REQUIRE(pl[0].pl_syscall_narg == pl[1].pl_syscall_narg);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * The child can't exit until the grandchild reports status, so the
 	 * grandchild should report its exit first to the debugger.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[1]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void *
 simple_thread(void *arg __unused)
 {
 
 	pthread_exit(NULL);
 }
 
 static __dead2 void
 simple_thread_main(void)
 {
 	pthread_t thread;
 
 	CHILD_REQUIRE(pthread_create(&thread, NULL, simple_thread, NULL) == 0);
 	CHILD_REQUIRE(pthread_join(thread, NULL) == 0);
 	exit(1);
 }
 
 /*
  * Verify that pl_syscall_code in struct ptrace_lwpinfo for a new
  * thread reports the correct value.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__new_child_pl_syscall_code_thread);
 ATF_TC_BODY(ptrace__new_child_pl_syscall_code_thread, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	lwpid_t mainlwp;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		simple_thread_main();
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 	    sizeof(pl)) != -1);
 	mainlwp = pl.pl_lwpid;
 
 	/*
 	 * Continue the child ignoring the SIGSTOP and tracing all
 	 * system call exits.
 	 */
 	ATF_REQUIRE(ptrace(PT_TO_SCX, fpid, (caddr_t)1, 0) != -1);
 
 	/*
 	 * Wait for the new thread to arrive.  pthread_create() might
 	 * invoke any number of system calls.  For now we just wait
 	 * for the new thread to arrive and make sure it reports a
 	 * valid system call code.  If ptrace grows thread event
 	 * reporting then this test can be made more precise.
 	 */
 	for (;;) {
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		ATF_REQUIRE(WIFSTOPPED(status));
 		ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 		
 		ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 		    sizeof(pl)) != -1);
 		ATF_REQUIRE((pl.pl_flags & PL_FLAG_SCX) != 0);
 		ATF_REQUIRE(pl.pl_syscall_code != 0);
 		if (pl.pl_lwpid != mainlwp)
 			/* New thread seen. */
 			break;
 
 		ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 	}
 
 	/* Wait for the child to exit. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 	for (;;) {
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		if (WIFEXITED(status))
 			break;
 		
 		ATF_REQUIRE(WIFSTOPPED(status));
 		ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 		ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 	}
 		
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that the expected LWP events are reported for a child thread.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__lwp_events);
 ATF_TC_BODY(ptrace__lwp_events, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	lwpid_t lwps[2];
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		simple_thread_main();
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 	    sizeof(pl)) != -1);
 	lwps[0] = pl.pl_lwpid;
 
 	ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The first event should be for the child thread's birth. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 		
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) ==
 	    (PL_FLAG_BORN | PL_FLAG_SCX));
 	ATF_REQUIRE(pl.pl_lwpid != lwps[0]);
 	lwps[1] = pl.pl_lwpid;
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next event should be for the child thread's death. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 		
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXITED | PL_FLAG_SCE)) ==
 	    (PL_FLAG_EXITED | PL_FLAG_SCE));
 	ATF_REQUIRE(pl.pl_lwpid == lwps[1]);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void *
 exec_thread(void *arg __unused)
 {
 
 	execl("/usr/bin/true", "true", NULL);
 	exit(127);
 }
 
 static __dead2 void
 exec_thread_main(void)
 {
 	pthread_t thread;
 
 	CHILD_REQUIRE(pthread_create(&thread, NULL, exec_thread, NULL) == 0);
 	for (;;)
 		sleep(60);
 	exit(1);
 }
 
 /*
  * Verify that the expected LWP events are reported for a multithreaded
  * process that calls execve(2).
  */
 ATF_TC_WITHOUT_HEAD(ptrace__lwp_events_exec);
 ATF_TC_BODY(ptrace__lwp_events_exec, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	lwpid_t lwps[2];
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		exec_thread_main();
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 	    sizeof(pl)) != -1);
 	lwps[0] = pl.pl_lwpid;
 
 	ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The first event should be for the child thread's birth. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 		
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) ==
 	    (PL_FLAG_BORN | PL_FLAG_SCX));
 	ATF_REQUIRE(pl.pl_lwpid != lwps[0]);
 	lwps[1] = pl.pl_lwpid;
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/*
 	 * The next event should be for the main thread's death due to
 	 * single threading from execve().
 	 */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXITED | PL_FLAG_SCE)) ==
 	    (PL_FLAG_EXITED));
 	ATF_REQUIRE(pl.pl_lwpid == lwps[0]);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next event should be for the child process's exec. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXEC | PL_FLAG_SCX)) ==
 	    (PL_FLAG_EXEC | PL_FLAG_SCX));
 	ATF_REQUIRE(pl.pl_lwpid == lwps[1]);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void
 handler(int sig __unused)
 {
 }
 
 static void
 signal_main(void)
 {
 
 	signal(SIGINFO, handler);
 	raise(SIGINFO);
 	exit(0);
 }
 
 /*
  * Verify that the expected ptrace event is reported for a signal.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__siginfo);
 ATF_TC_BODY(ptrace__siginfo, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		signal_main();
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next event should be for the SIGINFO. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGINFO);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_event == PL_EVENT_SIGNAL);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_code == SI_LWP);
 	ATF_REQUIRE(pl.pl_siginfo.si_pid == wpid);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that the expected ptrace events are reported for PTRACE_EXEC.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__ptrace_exec_disable);
 ATF_TC_BODY(ptrace__ptrace_exec_disable, tc)
 {
 	pid_t fpid, wpid;
 	int events, status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		exec_thread(NULL);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	events = 0;
 	ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* Should get one event at exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 ATF_TC_WITHOUT_HEAD(ptrace__ptrace_exec_enable);
 ATF_TC_BODY(ptrace__ptrace_exec_enable, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int events, status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		exec_thread(NULL);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	events = PTRACE_EXEC;
 	ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next event should be for the child process's exec. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXEC | PL_FLAG_SCX)) ==
 	    (PL_FLAG_EXEC | PL_FLAG_SCX));
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 ATF_TC_WITHOUT_HEAD(ptrace__event_mask);
 ATF_TC_BODY(ptrace__event_mask, tc)
 {
 	pid_t fpid, wpid;
 	int events, status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		exit(0);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* PT_FOLLOW_FORK should toggle the state of PTRACE_FORK. */
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, fpid, NULL, 1) != -1);
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	ATF_REQUIRE(events & PTRACE_FORK);
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, fpid, NULL, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	ATF_REQUIRE(!(events & PTRACE_FORK));
 
 	/* PT_LWP_EVENTS should toggle the state of PTRACE_LWP. */
 	ATF_REQUIRE(ptrace(PT_LWP_EVENTS, fpid, NULL, 1) != -1);
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	ATF_REQUIRE(events & PTRACE_LWP);
 	ATF_REQUIRE(ptrace(PT_LWP_EVENTS, fpid, NULL, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	ATF_REQUIRE(!(events & PTRACE_LWP));
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* Should get one event at exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that the expected ptrace events are reported for PTRACE_VFORK.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__ptrace_vfork);
 ATF_TC_BODY(ptrace__ptrace_vfork, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int events, status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		follow_fork_parent(true);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	events |= PTRACE_VFORK;
 	ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) != -1);
 
 	/* The next event should report the end of the vfork. */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & PL_FLAG_VFORK_DONE) != 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) != -1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 ATF_TC_WITHOUT_HEAD(ptrace__ptrace_vfork_follow);
 ATF_TC_BODY(ptrace__ptrace_vfork_follow, tc)
 {
 	struct ptrace_lwpinfo pl[2];
 	pid_t children[2], fpid, wpid;
 	int events, status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		follow_fork_parent(true);
 	}
 
 	/* Parent process. */
 	children[0] = fpid;
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(children[0], &status, 0);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, children[0], (caddr_t)&events,
 	    sizeof(events)) == 0);
 	events |= PTRACE_FORK | PTRACE_VFORK;
 	ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, children[0], (caddr_t)&events,
 	    sizeof(events)) == 0);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	/* Wait for both halves of the fork event to get reported. */
 	children[1] = handle_fork_events(children[0], pl);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_VFORKED) != 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * The child can't exit until the grandchild reports status, so the
 	 * grandchild should report its exit first to the debugger.
 	 */
 	wpid = waitpid(children[1], &status, 0);
 	ATF_REQUIRE(wpid == children[1]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	/*
 	 * The child should report it's vfork() completion before it
 	 * exits.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl[0], sizeof(pl[0])) !=
 	    -1);
 	ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_VFORK_DONE) != 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 #ifdef HAVE_BREAKPOINT
 /*
  * Verify that no more events are reported after PT_KILL except for the
  * process exit when stopped due to a breakpoint trap.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_breakpoint);
 ATF_TC_BODY(ptrace__PT_KILL_breakpoint, tc)
 {
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		breakpoint();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report hitting the breakpoint. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	/* Kill the child process. */
 	ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0);
 
 	/* The last wait() should report the SIGKILL. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGKILL);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 #endif /* HAVE_BREAKPOINT */
 
 /*
  * Verify that no more events are reported after PT_KILL except for the
  * process exit when stopped inside of a system call.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_system_call);
 ATF_TC_BODY(ptrace__PT_KILL_system_call, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		getpid();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP and tracing system calls. */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report a system call entry for getpid(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 
 	/* Kill the child process. */
 	ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0);
 
 	/* The last wait() should report the SIGKILL. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGKILL);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that no more events are reported after PT_KILL except for the
  * process exit when killing a multithreaded process.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_threads);
 ATF_TC_BODY(ptrace__PT_KILL_threads, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	lwpid_t main_lwp;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		simple_thread_main();
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 	    sizeof(pl)) != -1);
 	main_lwp = pl.pl_lwpid;
 
 	ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The first event should be for the child thread's birth. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 		
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) ==
 	    (PL_FLAG_BORN | PL_FLAG_SCX));
 	ATF_REQUIRE(pl.pl_lwpid != main_lwp);
 
 	/* Kill the child process. */
 	ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0);
 
 	/* The last wait() should report the SIGKILL. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGKILL);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void *
 mask_usr1_thread(void *arg)
 {
 	pthread_barrier_t *pbarrier;
 	sigset_t sigmask;
 
 	pbarrier = (pthread_barrier_t*)arg;
 
 	sigemptyset(&sigmask);
 	sigaddset(&sigmask, SIGUSR1);
 	CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 	/* Sync up with other thread after sigmask updated. */
 	pthread_barrier_wait(pbarrier);
 
 	for (;;)
 		sleep(60);
 
 	return (NULL);
 }
 
 /*
  * Verify that the SIGKILL from PT_KILL takes priority over other signals
  * and prevents spurious stops due to those other signals.
  */
 ATF_TC(ptrace__PT_KILL_competing_signal);
 ATF_TC_HEAD(ptrace__PT_KILL_competing_signal, tc)
 {
 
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(ptrace__PT_KILL_competing_signal, tc)
 {
 	pid_t fpid, wpid;
 	int status;
 	cpuset_t setmask;
 	pthread_t t;
 	pthread_barrier_t barrier;
 	struct sched_param sched_param;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		/* Bind to one CPU so only one thread at a time will run. */
 		CPU_ZERO(&setmask);
 		CPU_SET(0, &setmask);
 		cpusetid_t setid;
 		CHILD_REQUIRE(cpuset(&setid) == 0);
 		CHILD_REQUIRE(cpuset_setaffinity(CPU_LEVEL_CPUSET,
 		    CPU_WHICH_CPUSET, setid, sizeof(setmask), &setmask) == 0);
 
 		CHILD_REQUIRE(pthread_barrier_init(&barrier, NULL, 2) == 0);
 
 		CHILD_REQUIRE(pthread_create(&t, NULL, mask_usr1_thread,
 		    (void*)&barrier) == 0);
 
 		/*
 		 * Give the main thread higher priority. The test always
 		 * assumes that, if both threads are able to run, the main
 		 * thread runs first.
 		 */
 		sched_param.sched_priority =
 		    (sched_get_priority_max(SCHED_FIFO) +
 		    sched_get_priority_min(SCHED_FIFO)) / 2;
 		CHILD_REQUIRE(pthread_setschedparam(pthread_self(),
 		    SCHED_FIFO, &sched_param) == 0);
 		sched_param.sched_priority -= RQ_PPQ;
 		CHILD_REQUIRE(pthread_setschedparam(t, SCHED_FIFO,
 		    &sched_param) == 0);
 
 		sigset_t sigmask;
 		sigemptyset(&sigmask);
 		sigaddset(&sigmask, SIGUSR2);
 		CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 		/* Sync up with other thread after sigmask updated. */
 		pthread_barrier_wait(&barrier);
 
 		trace_me();
 
 		for (;;)
 			sleep(60);
 
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* Send a signal that only the second thread can handle. */
 	ATF_REQUIRE(kill(fpid, SIGUSR2) == 0);
 
 	/* The second wait() should report the SIGUSR2. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2);
 
 	/* Send a signal that only the first thread can handle. */
 	ATF_REQUIRE(kill(fpid, SIGUSR1) == 0);
 
 	/* Replace the SIGUSR2 with a kill. */
 	ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0);
 
 	/* The last wait() should report the SIGKILL (not the SIGUSR signal). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGKILL);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that the SIGKILL from PT_KILL takes priority over other stop events
  * and prevents spurious stops caused by those events.
  */
 ATF_TC(ptrace__PT_KILL_competing_stop);
 ATF_TC_HEAD(ptrace__PT_KILL_competing_stop, tc)
 {
 
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(ptrace__PT_KILL_competing_stop, tc)
 {
 	pid_t fpid, wpid;
 	int status;
 	cpuset_t setmask;
 	pthread_t t;
 	pthread_barrier_t barrier;
 	lwpid_t main_lwp;
 	struct ptrace_lwpinfo pl;
 	struct sched_param sched_param;
 
 	if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false))
 		atf_tc_skip("https://bugs.freebsd.org/220841");
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 
 		/* Bind to one CPU so only one thread at a time will run. */
 		CPU_ZERO(&setmask);
 		CPU_SET(0, &setmask);
 		cpusetid_t setid;
 		CHILD_REQUIRE(cpuset(&setid) == 0);
 		CHILD_REQUIRE(cpuset_setaffinity(CPU_LEVEL_CPUSET,
 		    CPU_WHICH_CPUSET, setid, sizeof(setmask), &setmask) == 0);
 
 		CHILD_REQUIRE(pthread_barrier_init(&barrier, NULL, 2) == 0);
 
 		CHILD_REQUIRE(pthread_create(&t, NULL, mask_usr1_thread,
 		    (void*)&barrier) == 0);
 
 		/*
 		 * Give the main thread higher priority. The test always
 		 * assumes that, if both threads are able to run, the main
 		 * thread runs first.
 		 */
 		sched_param.sched_priority =
 		    (sched_get_priority_max(SCHED_FIFO) +
 		    sched_get_priority_min(SCHED_FIFO)) / 2;
 		CHILD_REQUIRE(pthread_setschedparam(pthread_self(),
 		    SCHED_FIFO, &sched_param) == 0);
 		sched_param.sched_priority -= RQ_PPQ;
 		CHILD_REQUIRE(pthread_setschedparam(t, SCHED_FIFO,
 		    &sched_param) == 0);
 
 		sigset_t sigmask;
 		sigemptyset(&sigmask);
 		sigaddset(&sigmask, SIGUSR2);
 		CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 		/* Sync up with other thread after sigmask updated. */
 		pthread_barrier_wait(&barrier);
 
 		/* Sync up with the test before doing the getpid(). */
 		raise(SIGSTOP);
 
 		getpid();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	main_lwp = pl.pl_lwpid;
 
 	/* Continue the child ignoring the SIGSTOP and tracing system calls. */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/*
 	 * Continue until child is done with setup, which is indicated with
 	 * SIGSTOP. Ignore system calls in the meantime.
 	 */
 	for (;;) {
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		ATF_REQUIRE(WIFSTOPPED(status));
 		if (WSTOPSIG(status) == SIGTRAP) {
 			ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 			    sizeof(pl)) != -1);
 			ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX));
 		} else {
 			ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 			break;
 		}
 		ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 	}
 
 	/* Proceed, allowing main thread to hit syscall entry for getpid(). */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 	    sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_lwpid == main_lwp);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 	/* Prevent the main thread from hitting its syscall exit for now. */
 	ATF_REQUIRE(ptrace(PT_SUSPEND, main_lwp, 0, 0) == 0);
 
 	/*
 	 * Proceed, allowing second thread to hit syscall exit for
 	 * pthread_barrier_wait().
 	 */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 	    sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_lwpid != main_lwp);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX);
 
 	/* Send a signal that only the second thread can handle. */
 	ATF_REQUIRE(kill(fpid, SIGUSR2) == 0);
 
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next wait() should report the SIGUSR2. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2);
 
 	/* Allow the main thread to try to finish its system call. */
 	ATF_REQUIRE(ptrace(PT_RESUME, main_lwp, 0, 0) == 0);
 
 	/*
 	 * At this point, the main thread is in the middle of a system call and
 	 * has been resumed. The second thread has taken a SIGUSR2 which will
 	 * be replaced with a SIGKILL below. The main thread will get to run
 	 * first. It should notice the kill request (even though the signal
 	 * replacement occurred in the other thread) and exit accordingly.  It
 	 * should not stop for the system call exit event.
 	 */
 
 	/* Replace the SIGUSR2 with a kill. */
 	ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0);
 
 	/* The last wait() should report the SIGKILL (not a syscall exit). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGKILL);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void
 sigusr1_handler(int sig)
 {
 
 	CHILD_REQUIRE(sig == SIGUSR1);
 	_exit(2);
 }
 
 /*
  * Verify that even if the signal queue is full for a child process,
  * a PT_KILL will kill the process.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_with_signal_full_sigqueue);
 ATF_TC_BODY(ptrace__PT_KILL_with_signal_full_sigqueue, tc)
 {
 	pid_t fpid, wpid;
 	int status;
 	int max_pending_per_proc;
 	size_t len;
 	int i;
 
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_handler) != SIG_ERR);
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	len = sizeof(max_pending_per_proc);
 	ATF_REQUIRE(sysctlbyname("kern.sigqueue.max_pending_per_proc",
 	    &max_pending_per_proc, &len, NULL, 0) == 0);
 
 	/* Fill the signal queue. */
 	for (i = 0; i < max_pending_per_proc; ++i)
 		ATF_REQUIRE(kill(fpid, SIGUSR1) == 0);
 
 	/* Kill the child process. */
 	ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0);
 
 	/* The last wait() should report the SIGKILL. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGKILL);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that when stopped at a system call entry, a signal can be
  * requested with PT_CONTINUE which will be delivered once the system
  * call is complete.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_system_call_entry);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_system_call_entry, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_handler) != SIG_ERR);
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		getpid();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP and tracing system calls. */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report a system call entry for getpid(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 
 	/* Continue the child process with a signal. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	for (;;) {
 		/*
 		 * The last wait() should report exit 2, i.e., a normal _exit
 		 * from the signal handler. In the meantime, catch and proceed
 		 * past any syscall stops.
 		 */
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
 			ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 			ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX));
 			ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 		} else {
 			ATF_REQUIRE(WIFEXITED(status));
 			ATF_REQUIRE(WEXITSTATUS(status) == 2);
 			break;
 		}
 	}
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void
 sigusr1_counting_handler(int sig)
 {
 	static int counter = 0;
 
 	CHILD_REQUIRE(sig == SIGUSR1);
 	counter++;
 	if (counter == 2)
 		_exit(2);
 }
 
 /*
  * Verify that, when continuing from a stop at system call entry and exit,
  * a signal can be requested from both stops, and both will be delivered when
  * the system call is complete.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_system_call_entry_and_exit);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_system_call_entry_and_exit, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_counting_handler) != SIG_ERR);
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		getpid();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP and tracing system calls. */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report a system call entry for getpid(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 
 	/* Continue the child process with a signal. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* The third wait() should report a system call exit for getpid(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX);
 
 	/* Continue the child process with a signal. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	for (;;) {
 		/*
 		 * The last wait() should report exit 2, i.e., a normal _exit
 		 * from the signal handler. In the meantime, catch and proceed
 		 * past any syscall stops.
 		 */
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
 			ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 			ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX));
 			ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 		} else {
 			ATF_REQUIRE(WIFEXITED(status));
 			ATF_REQUIRE(WEXITSTATUS(status) == 2);
 			break;
 		}
 	}
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that even if the signal queue is full for a child process,
  * a PT_CONTINUE with a signal will not result in loss of that signal.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_full_sigqueue);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_full_sigqueue, tc)
 {
 	pid_t fpid, wpid;
 	int status;
 	int max_pending_per_proc;
 	size_t len;
 	int i;
 
 	ATF_REQUIRE(signal(SIGUSR2, handler) != SIG_ERR);
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_handler) != SIG_ERR);
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	len = sizeof(max_pending_per_proc);
 	ATF_REQUIRE(sysctlbyname("kern.sigqueue.max_pending_per_proc",
 	    &max_pending_per_proc, &len, NULL, 0) == 0);
 
 	/* Fill the signal queue. */
 	for (i = 0; i < max_pending_per_proc; ++i)
 		ATF_REQUIRE(kill(fpid, SIGUSR2) == 0);
 
 	/* Continue with signal. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	for (;;) {
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		if (WIFSTOPPED(status)) {
 			ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2);
 			ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 		} else {
 			/*
 			 * The last wait() should report normal _exit from the
 			 * SIGUSR1 handler.
 			 */
 			ATF_REQUIRE(WIFEXITED(status));
 			ATF_REQUIRE(WEXITSTATUS(status) == 2);
 			break;
 		}
 	}
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static sem_t sigusr1_sem;
 static int got_usr1;
 
 static void
 sigusr1_sempost_handler(int sig __unused)
 {
 
 	got_usr1++;
 	CHILD_REQUIRE(sem_post(&sigusr1_sem) == 0);
 }
 
 /*
  * Verify that even if the signal queue is full for a child process,
  * and the signal is masked, a PT_CONTINUE with a signal will not
  * result in loss of that signal.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_masked_full_sigqueue);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_masked_full_sigqueue, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status, err;
 	int max_pending_per_proc;
 	size_t len;
 	int i;
 	sigset_t sigmask;
 
 	ATF_REQUIRE(signal(SIGUSR2, handler) != SIG_ERR);
 	ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0);
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR);
 
 	got_usr1 = 0;
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		CHILD_REQUIRE(sigemptyset(&sigmask) == 0);
 		CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0);
 		CHILD_REQUIRE(sigprocmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 		trace_me();
 		CHILD_REQUIRE(got_usr1 == 0);
 
 		/* Allow the pending SIGUSR1 in now. */
 		CHILD_REQUIRE(sigprocmask(SIG_UNBLOCK, &sigmask, NULL) == 0);
 		/* Wait to receive the SIGUSR1. */
 		do {
 			err = sem_wait(&sigusr1_sem);
 			CHILD_REQUIRE(err == 0 || errno == EINTR);
 		} while (err != 0 && errno == EINTR);
 		CHILD_REQUIRE(got_usr1 == 1);
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	len = sizeof(max_pending_per_proc);
 	ATF_REQUIRE(sysctlbyname("kern.sigqueue.max_pending_per_proc",
 	    &max_pending_per_proc, &len, NULL, 0) == 0);
 
 	/* Fill the signal queue. */
 	for (i = 0; i < max_pending_per_proc; ++i)
 		ATF_REQUIRE(kill(fpid, SIGUSR2) == 0);
 
 	/* Continue with signal. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* Collect and ignore all of the SIGUSR2. */
 	for (i = 0; i < max_pending_per_proc; ++i) {
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		ATF_REQUIRE(WIFSTOPPED(status));
 		ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2);
 		ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 	}
 
 	/* Now our PT_CONTINUE'd SIGUSR1 should cause a stop after unmask. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGUSR1);
 	ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGUSR1);
 
 	/* Continue the child, ignoring the SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last wait() should report exit after receiving SIGUSR1. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that, after stopping due to a signal, that signal can be
  * replaced with another signal.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_change_sig);
 ATF_TC_BODY(ptrace__PT_CONTINUE_change_sig, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		sleep(20);
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* Send a signal without ptrace. */
 	ATF_REQUIRE(kill(fpid, SIGINT) == 0);
 
 	/* The second wait() should report a SIGINT was received. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGINT);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGINT);
 
 	/* Continue the child process with a different signal. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGTERM) == 0);
 
 	/*
 	 * The last wait() should report having died due to the new
 	 * signal, SIGTERM.
 	 */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGTERM);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a signal can be passed through to the child even when there
  * was no true signal originally. Such cases arise when a SIGTRAP is
  * invented for e.g, system call stops.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_sigtrap_system_call_entry);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_sigtrap_system_call_entry, tc)
 {
 	struct ptrace_lwpinfo pl;
 	struct rlimit rl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		/* SIGTRAP expected to cause exit on syscall entry. */
 		rl.rlim_cur = rl.rlim_max = 0;
 		ATF_REQUIRE(setrlimit(RLIMIT_CORE, &rl) == 0);
 		getpid();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP and tracing system calls. */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report a system call entry for getpid(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 
 	/* Continue the child process with a SIGTRAP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGTRAP) == 0);
 
 	for (;;) {
 		/*
 		 * The last wait() should report exit due to SIGTRAP.  In the
 		 * meantime, catch and proceed past any syscall stops.
 		 */
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
 			ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 			ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX));
 			ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 		} else {
 			ATF_REQUIRE(WIFSIGNALED(status));
 			ATF_REQUIRE(WTERMSIG(status) == SIGTRAP);
 			break;
 		}
 	}
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 
 }
 
 /*
  * A mixed bag PT_CONTINUE with signal test.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_mix);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_mix, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_counting_handler) != SIG_ERR);
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		getpid();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP and tracing system calls. */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report a system call entry for getpid(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 
 	/* Continue with the first SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* The next wait() should report a system call exit for getpid(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX);
 
 	/* Send an ABRT without ptrace. */
 	ATF_REQUIRE(kill(fpid, SIGABRT) == 0);
 
 	/* Continue normally. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next wait() should report the SIGABRT. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGABRT);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT);
 
 	/* Continue, replacing the SIGABRT with another SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	for (;;) {
 		/*
 		 * The last wait() should report exit 2, i.e., a normal _exit
 		 * from the signal handler. In the meantime, catch and proceed
 		 * past any syscall stops.
 		 */
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
 			ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 			ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX));
 			ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 		} else {
 			ATF_REQUIRE(WIFEXITED(status));
 			ATF_REQUIRE(WEXITSTATUS(status) == 2);
 			break;
 		}
 	}
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 
 }
 
 /*
  * Verify a signal delivered by ptrace is noticed by kevent(2).
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_kqueue);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_kqueue, tc)
 {
 	pid_t fpid, wpid;
 	int status, kq, nevents;
 	struct kevent kev;
 
 	ATF_REQUIRE(signal(SIGUSR1, SIG_IGN) != SIG_ERR);
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		CHILD_REQUIRE((kq = kqueue()) > 0);
 		EV_SET(&kev, SIGUSR1, EVFILT_SIGNAL, EV_ADD, 0, 0, 0);
 		CHILD_REQUIRE(kevent(kq, &kev, 1, NULL, 0, NULL) == 0);
 
 		trace_me();
 
 		for (;;) {
 			nevents = kevent(kq, NULL, 0, &kev, 1, NULL);
 			if (nevents == -1 && errno == EINTR)
 				continue;
 			CHILD_REQUIRE(nevents > 0);
 			CHILD_REQUIRE(kev.filter == EVFILT_SIGNAL);
 			CHILD_REQUIRE(kev.ident == SIGUSR1);
 			break;
 		}
 
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue with the SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/*
 	 * The last wait() should report normal exit with code 1.
 	 */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void *
 signal_thread(void *arg)
 {
 	int err;
 	sigset_t sigmask;
 
 	pthread_barrier_t *pbarrier = (pthread_barrier_t*)arg;
 
 	/* Wait for this thread to receive a SIGUSR1. */
 	do {
 		err = sem_wait(&sigusr1_sem);
 		CHILD_REQUIRE(err == 0 || errno == EINTR);
 	} while (err != 0 && errno == EINTR);
 
 	/* Free our companion thread from the barrier. */
 	pthread_barrier_wait(pbarrier);
 
 	/*
 	 * Swap ignore duties; the next SIGUSR1 should go to the
 	 * other thread.
 	 */
 	CHILD_REQUIRE(sigemptyset(&sigmask) == 0);
 	CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0);
 	CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 	/* Sync up threads after swapping signal masks. */
 	pthread_barrier_wait(pbarrier);
 
 	/* Wait until our companion has received its SIGUSR1. */
 	pthread_barrier_wait(pbarrier);
 
 	return (NULL);
 }
 
 /*
  * Verify that a traced process with blocked signal received the
  * signal from kill() once unmasked.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__killed_with_sigmask);
 ATF_TC_BODY(ptrace__killed_with_sigmask, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status, err;
 	sigset_t sigmask;
 
 	ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0);
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR);
 	got_usr1 = 0;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		CHILD_REQUIRE(sigemptyset(&sigmask) == 0);
 		CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0);
 		CHILD_REQUIRE(sigprocmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 		trace_me();
 		CHILD_REQUIRE(got_usr1 == 0);
 
 		/* Allow the pending SIGUSR1 in now. */
 		CHILD_REQUIRE(sigprocmask(SIG_UNBLOCK, &sigmask, NULL) == 0);
 		/* Wait to receive a SIGUSR1. */
 		do {
 			err = sem_wait(&sigusr1_sem);
 			CHILD_REQUIRE(err == 0 || errno == EINTR);
 		} while (err != 0 && errno == EINTR);
 		CHILD_REQUIRE(got_usr1 == 1);
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 	ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGSTOP);
 
 	/* Send blocked SIGUSR1 which should cause a stop. */
 	ATF_REQUIRE(kill(fpid, SIGUSR1) == 0);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next wait() should report the kill(SIGUSR1) was received. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGUSR1);
 	ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGUSR1);
 
 	/* Continue the child, allowing in the SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* The last wait() should report normal exit with code 1. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a traced process with blocked signal received the
  * signal from PT_CONTINUE once unmasked.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_sigmask);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_sigmask, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status, err;
 	sigset_t sigmask;
 
 	ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0);
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR);
 	got_usr1 = 0;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		CHILD_REQUIRE(sigemptyset(&sigmask) == 0);
 		CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0);
 		CHILD_REQUIRE(sigprocmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 		trace_me();
 		CHILD_REQUIRE(got_usr1 == 0);
 
 		/* Allow the pending SIGUSR1 in now. */
 		CHILD_REQUIRE(sigprocmask(SIG_UNBLOCK, &sigmask, NULL) == 0);
 		/* Wait to receive a SIGUSR1. */
 		do {
 			err = sem_wait(&sigusr1_sem);
 			CHILD_REQUIRE(err == 0 || errno == EINTR);
 		} while (err != 0 && errno == EINTR);
 
 		CHILD_REQUIRE(got_usr1 == 1);
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 	ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGSTOP);
 
 	/* Continue the child replacing SIGSTOP with SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* The next wait() should report the SIGUSR1 was received. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGUSR1);
 	ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGUSR1);
 
 	/* Continue the child, ignoring the SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last wait() should report normal exit with code 1. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that if ptrace stops due to a signal but continues with
  * a different signal that the new signal is routed to a thread
  * that can accept it, and that the thread is awakened by the signal
  * in a timely manner.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_thread_sigmask);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_thread_sigmask, tc)
 {
 	pid_t fpid, wpid;
 	int status, err;
 	pthread_t t;
 	sigset_t sigmask;
 	pthread_barrier_t barrier;
 
 	ATF_REQUIRE(pthread_barrier_init(&barrier, NULL, 2) == 0);
 	ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0);
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR);
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		CHILD_REQUIRE(pthread_create(&t, NULL, signal_thread, (void*)&barrier) == 0);
 
 		/* The other thread should receive the first SIGUSR1. */
 		CHILD_REQUIRE(sigemptyset(&sigmask) == 0);
 		CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0);
 		CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 		trace_me();
 
 		/* Wait until other thread has received its SIGUSR1. */
 		pthread_barrier_wait(&barrier);
 
 		/*
 		 * Swap ignore duties; the next SIGUSR1 should go to this
 		 * thread.
 		 */
 		CHILD_REQUIRE(pthread_sigmask(SIG_UNBLOCK, &sigmask, NULL) == 0);
 
 		/* Sync up threads after swapping signal masks. */
 		pthread_barrier_wait(&barrier);
 
 		/*
 		 * Sync up with test code; we're ready for the next SIGUSR1
 		 * now.
 		 */
 		raise(SIGSTOP);
 
 		/* Wait for this thread to receive a SIGUSR1. */
 		do {
 			err = sem_wait(&sigusr1_sem);
 			CHILD_REQUIRE(err == 0 || errno == EINTR);
 		} while (err != 0 && errno == EINTR);
 
 		/* Free the other thread from the barrier. */
 		pthread_barrier_wait(&barrier);
 
 		CHILD_REQUIRE(pthread_join(t, NULL) == 0);
 
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/*
 	 * Send a signal without ptrace that either thread will accept (USR2,
 	 * in this case).
 	 */
 	ATF_REQUIRE(kill(fpid, SIGUSR2) == 0);
 	
 	/* The second wait() should report a SIGUSR2 was received. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2);
 
 	/* Continue the child, changing the signal to USR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* The next wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	ATF_REQUIRE(kill(fpid, SIGUSR2) == 0);
 
 	/* The next wait() should report a SIGUSR2 was received. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2);
 
 	/* Continue the child, changing the signal to USR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* The last wait() should report normal exit with code 1. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void *
 raise_sigstop_thread(void *arg __unused)
 {
 
 	raise(SIGSTOP);
 	return NULL;
 }
 
 static void *
 sleep_thread(void *arg __unused)
 {
 
 	sleep(60);
 	return NULL;
 }
 
 static void
 terminate_with_pending_sigstop(bool sigstop_from_main_thread)
 {
 	pid_t fpid, wpid;
 	int status, i;
 	cpuset_t setmask;
 	cpusetid_t setid;
 	pthread_t t;
 
 	/*
 	 * Become the reaper for this process tree. We need to be able to check
 	 * that both child and grandchild have died.
 	 */
 	ATF_REQUIRE(procctl(P_PID, getpid(), PROC_REAP_ACQUIRE, NULL) == 0);
 
 	fpid = fork();
 	ATF_REQUIRE(fpid >= 0);
 	if (fpid == 0) {
 		fpid = fork();
 		CHILD_REQUIRE(fpid >= 0);
 		if (fpid == 0) {
 			trace_me();
 
 			/* Pin to CPU 0 to serialize thread execution. */
 			CPU_ZERO(&setmask);
 			CPU_SET(0, &setmask);
 			CHILD_REQUIRE(cpuset(&setid) == 0);
 			CHILD_REQUIRE(cpuset_setaffinity(CPU_LEVEL_CPUSET,
 			    CPU_WHICH_CPUSET, setid,
 			    sizeof(setmask), &setmask) == 0);
 
 			if (sigstop_from_main_thread) {
 				/*
 				 * We expect the SIGKILL sent when our parent
 				 * dies to be delivered to the new thread.
 				 * Raise the SIGSTOP in this thread so the
 				 * threads compete.
 				 */
 				CHILD_REQUIRE(pthread_create(&t, NULL,
 				    sleep_thread, NULL) == 0);
 				raise(SIGSTOP);
 			} else {
 				/*
 				 * We expect the SIGKILL to be delivered to
 				 * this thread. After creating the new thread,
 				 * just get off the CPU so the other thread can
 				 * raise the SIGSTOP.
 				 */
 				CHILD_REQUIRE(pthread_create(&t, NULL,
 				    raise_sigstop_thread, NULL) == 0);
 				sleep(60);
 			}
 
 			exit(0);
 		}
 		/* First stop is trace_me() immediately after fork. */
 		wpid = waitpid(fpid, &status, 0);
 		CHILD_REQUIRE(wpid == fpid);
 		CHILD_REQUIRE(WIFSTOPPED(status));
 		CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 		CHILD_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 		/* Second stop is from the raise(SIGSTOP). */
 		wpid = waitpid(fpid, &status, 0);
 		CHILD_REQUIRE(wpid == fpid);
 		CHILD_REQUIRE(WIFSTOPPED(status));
 		CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 		/*
 		 * Terminate tracing process without detaching. Our child
 		 * should be killed.
 		 */
 		exit(0);
 	}
 
 	/*
 	 * We should get a normal exit from our immediate child and a SIGKILL
 	 * exit from our grandchild. The latter case is the interesting one.
 	 * Our grandchild should not have stopped due to the SIGSTOP that was
 	 * left dangling when its parent died.
 	 */
 	for (i = 0; i < 2; ++i) {
 		wpid = wait(&status);
 		if (wpid == fpid)
 			ATF_REQUIRE(WIFEXITED(status) &&
 			    WEXITSTATUS(status) == 0);
 		else
 			ATF_REQUIRE(WIFSIGNALED(status) &&
 			    WTERMSIG(status) == SIGKILL);
 	}
 }
 
 /*
  * These two tests ensure that if the tracing process exits without detaching
  * just after the child received a SIGSTOP, the child is cleanly killed and
  * doesn't go to sleep due to the SIGSTOP. The parent's death will send a
  * SIGKILL to the child. If the SIGKILL and the SIGSTOP are handled by
  * different threads, the SIGKILL must win.  There are two variants of this
  * test, designed to catch the case where the SIGKILL is delivered to the
  * younger thread (the first test) and the case where the SIGKILL is delivered
  * to the older thread (the second test). This behavior has changed in the
  * past, so make no assumption.
  */
 ATF_TC(ptrace__parent_terminate_with_pending_sigstop1);
 ATF_TC_HEAD(ptrace__parent_terminate_with_pending_sigstop1, tc)
 {
 
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(ptrace__parent_terminate_with_pending_sigstop1, tc)
 {
 
 	terminate_with_pending_sigstop(true);
 }
 
 ATF_TC(ptrace__parent_terminate_with_pending_sigstop2);
 ATF_TC_HEAD(ptrace__parent_terminate_with_pending_sigstop2, tc)
 {
 
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(ptrace__parent_terminate_with_pending_sigstop2, tc)
 {
 
 	terminate_with_pending_sigstop(false);
 }
 
 /*
  * Verify that after ptrace() discards a SIGKILL signal, the event mask
  * is not modified.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__event_mask_sigkill_discard);
 ATF_TC_BODY(ptrace__event_mask_sigkill_discard, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status, event_mask, new_event_mask;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		raise(SIGSTOP);
 		exit(0);
 	}
 
 	/* The first wait() should report the stop from trace_me(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Set several unobtrusive event bits. */
 	event_mask = PTRACE_EXEC | PTRACE_FORK | PTRACE_LWP;
 	ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, wpid, (caddr_t)&event_mask,
 	    sizeof(event_mask)) == 0);
 
 	/* Send a SIGKILL without using ptrace. */
 	ATF_REQUIRE(kill(fpid, SIGKILL) == 0);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next stop should be due to the SIGKILL. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGKILL);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGKILL);
 
 	/* Continue the child ignoring the SIGKILL. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Check the current event mask. It should not have changed. */
 	new_event_mask = 0;
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, wpid, (caddr_t)&new_event_mask,
 	    sizeof(new_event_mask)) == 0);
 	ATF_REQUIRE(event_mask == new_event_mask);
 
 	/* Continue the child to let it exit. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void *
 flock_thread(void *arg)
 {
 	int fd;
 
 	fd = *(int *)arg;
 	(void)flock(fd, LOCK_EX);
 	(void)flock(fd, LOCK_UN);
 	return (NULL);
 }
 
 /*
  * Verify that PT_ATTACH will suspend threads sleeping in an SBDRY section.
  * We rely on the fact that the lockf implementation sets SBDRY before blocking
  * on a lock. This is a regression test for r318191.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_ATTACH_with_SBDRY_thread);
 ATF_TC_BODY(ptrace__PT_ATTACH_with_SBDRY_thread, tc)
 {
 	pthread_barrier_t barrier;
 	pthread_barrierattr_t battr;
 	char tmpfile[64];
 	pid_t child, wpid;
 	int error, fd, i, status;
 
 	ATF_REQUIRE(pthread_barrierattr_init(&battr) == 0);
 	ATF_REQUIRE(pthread_barrierattr_setpshared(&battr,
 	    PTHREAD_PROCESS_SHARED) == 0);
 	ATF_REQUIRE(pthread_barrier_init(&barrier, &battr, 2) == 0);
 
 	(void)snprintf(tmpfile, sizeof(tmpfile), "./ptrace.XXXXXX");
 	fd = mkstemp(tmpfile);
 	ATF_REQUIRE(fd >= 0);
 
 	ATF_REQUIRE((child = fork()) != -1);
 	if (child == 0) {
 		pthread_t t[2];
 		int cfd;
 
 		error = pthread_barrier_wait(&barrier);
 		if (error != 0 && error != PTHREAD_BARRIER_SERIAL_THREAD)
 			_exit(1);
 
 		cfd = open(tmpfile, O_RDONLY);
 		if (cfd < 0)
 			_exit(1);
 
 		/*
 		 * We want at least two threads blocked on the file lock since
 		 * the SIGSTOP from PT_ATTACH may kick one of them out of
 		 * sleep.
 		 */
 		if (pthread_create(&t[0], NULL, flock_thread, &cfd) != 0)
 			_exit(1);
 		if (pthread_create(&t[1], NULL, flock_thread, &cfd) != 0)
 			_exit(1);
 		if (pthread_join(t[0], NULL) != 0)
 			_exit(1);
 		if (pthread_join(t[1], NULL) != 0)
 			_exit(1);
 		_exit(0);
 	}
 
 	ATF_REQUIRE(flock(fd, LOCK_EX) == 0);
 
 	error = pthread_barrier_wait(&barrier);
 	ATF_REQUIRE(error == 0 || error == PTHREAD_BARRIER_SERIAL_THREAD);
 
 	/*
 	 * Give the child some time to block. Is there a better way to do this?
 	 */
 	sleep(1);
 
 	/*
 	 * Attach and give the child 3 seconds to stop.
 	 */
 	ATF_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) == 0);
 	for (i = 0; i < 3; i++) {
 		wpid = waitpid(child, &status, WNOHANG);
 		if (wpid == child && WIFSTOPPED(status) &&
 		    WSTOPSIG(status) == SIGSTOP)
 			break;
 		sleep(1);
 	}
 	ATF_REQUIRE_MSG(i < 3, "failed to stop child process after PT_ATTACH");
 
 	ATF_REQUIRE(ptrace(PT_DETACH, child, NULL, 0) == 0);
 
 	ATF_REQUIRE(flock(fd, LOCK_UN) == 0);
 	ATF_REQUIRE(unlink(tmpfile) == 0);
 	ATF_REQUIRE(close(fd) == 0);
 }
 
 static void
 sigusr1_step_handler(int sig)
 {
 
 	CHILD_REQUIRE(sig == SIGUSR1);
 	raise(SIGABRT);
 }
 
 /*
  * Verify that PT_STEP with a signal invokes the signal before
  * stepping the next instruction (and that the next instruction is
  * stepped correctly).
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_STEP_with_signal);
 ATF_TC_BODY(ptrace__PT_STEP_with_signal, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		signal(SIGUSR1, sigusr1_step_handler);
 		raise(SIGABRT);
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next stop should report the SIGABRT in the child body. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGABRT);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT);
 
 	/* Step the child process inserting SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_STEP, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* The next stop should report the SIGABRT in the signal handler. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGABRT);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT);
 
 	/* Continue the child process discarding the signal. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next stop should report a trace trap from PT_STEP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP);
 	ATF_REQUIRE(pl.pl_siginfo.si_code == TRAP_TRACE);
 
 	/* Continue the child to let it exit. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 #ifdef HAVE_BREAKPOINT
 /*
  * Verify that a SIGTRAP event with the TRAP_BRKPT code is reported
  * for a breakpoint trap.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__breakpoint_siginfo);
 ATF_TC_BODY(ptrace__breakpoint_siginfo, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		breakpoint();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report hitting the breakpoint. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP);
 	ATF_REQUIRE(pl.pl_siginfo.si_code == TRAP_BRKPT);
 
 	/* Kill the child process. */
 	ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0);
 
 	/* The last wait() should report the SIGKILL. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGKILL);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 #endif /* HAVE_BREAKPOINT */
 
 /*
  * Verify that a SIGTRAP event with the TRAP_TRACE code is reported
  * for a single-step trap from PT_STEP.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__step_siginfo);
 ATF_TC_BODY(ptrace__step_siginfo, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Step the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_STEP, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report a single-step trap. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP);
 	ATF_REQUIRE(pl.pl_siginfo.si_code == TRAP_TRACE);
 
 	/* Continue the child process. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 #if defined(HAVE_BREAKPOINT) && defined(SKIP_BREAK)
 static void *
 continue_thread(void *arg __unused)
 {
 	breakpoint();
 	return (NULL);
 }
 
 static __dead2 void
 continue_thread_main(void)
 {
 	pthread_t threads[2];
 
 	CHILD_REQUIRE(pthread_create(&threads[0], NULL, continue_thread,
 	    NULL) == 0);
 	CHILD_REQUIRE(pthread_create(&threads[1], NULL, continue_thread,
 	    NULL) == 0);
 	CHILD_REQUIRE(pthread_join(threads[0], NULL) == 0);
 	CHILD_REQUIRE(pthread_join(threads[1], NULL) == 0);
 	exit(1);
 }
 
 /*
  * Ensure that PT_CONTINUE clears the status of the thread that
  * triggered the stop even if a different thread's LWP was passed to
  * PT_CONTINUE.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_different_thread);
 ATF_TC_BODY(ptrace__PT_CONTINUE_different_thread, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	lwpid_t lwps[2];
 	bool hit_break[2];
 	struct reg reg;
 	int i, j, status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		continue_thread_main();
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 	    sizeof(pl)) != -1);
 
 	ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* One of the new threads should report it's birth. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) ==
 	    (PL_FLAG_BORN | PL_FLAG_SCX));
 	lwps[0] = pl.pl_lwpid;
 
 	/*
 	 * Suspend this thread to ensure both threads are alive before
 	 * hitting the breakpoint.
 	 */
 	ATF_REQUIRE(ptrace(PT_SUSPEND, lwps[0], NULL, 0) != -1);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* Second thread should report it's birth. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) ==
 	    (PL_FLAG_BORN | PL_FLAG_SCX));
 	ATF_REQUIRE(pl.pl_lwpid != lwps[0]);
 	lwps[1] = pl.pl_lwpid;
 
 	/* Resume both threads waiting for breakpoint events. */
 	hit_break[0] = hit_break[1] = false;
 	ATF_REQUIRE(ptrace(PT_RESUME, lwps[0], NULL, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* One thread should report a breakpoint. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP &&
 	    pl.pl_siginfo.si_code == TRAP_BRKPT);
 	if (pl.pl_lwpid == lwps[0])
 		i = 0;
 	else
 		i = 1;
 	hit_break[i] = true;
 	ATF_REQUIRE(ptrace(PT_GETREGS, pl.pl_lwpid, (caddr_t)&reg, 0) != -1);
 	SKIP_BREAK(&reg);
 	ATF_REQUIRE(ptrace(PT_SETREGS, pl.pl_lwpid, (caddr_t)&reg, 0) != -1);
 
 	/*
 	 * Resume both threads but pass the other thread's LWPID to
 	 * PT_CONTINUE.
 	 */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, lwps[i ^ 1], (caddr_t)1, 0) == 0);
 
 	/*
 	 * Will now get two thread exit events and one more breakpoint
 	 * event.
 	 */
 	for (j = 0; j < 3; j++) {
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		ATF_REQUIRE(WIFSTOPPED(status));
 		ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 		ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 		    sizeof(pl)) != -1);
 		
 		if (pl.pl_lwpid == lwps[0])
 			i = 0;
 		else
 			i = 1;
 
 		ATF_REQUIRE_MSG(lwps[i] != 0, "event for exited thread");
 		if (pl.pl_flags & PL_FLAG_EXITED) {
 			ATF_REQUIRE_MSG(hit_break[i],
 			    "exited thread did not report breakpoint");
 			lwps[i] = 0;
 		} else {
 			ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0);
 			ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP &&
 			    pl.pl_siginfo.si_code == TRAP_BRKPT);
 			ATF_REQUIRE_MSG(!hit_break[i],
 			    "double breakpoint event");
 			hit_break[i] = true;
 			ATF_REQUIRE(ptrace(PT_GETREGS, pl.pl_lwpid, (caddr_t)&reg,
 			    0) != -1);
 			SKIP_BREAK(&reg);
 			ATF_REQUIRE(ptrace(PT_SETREGS, pl.pl_lwpid, (caddr_t)&reg,
 			    0) != -1);
 		}
 
 		ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 	}
 
 	/* Both threads should have exited. */
 	ATF_REQUIRE(lwps[0] == 0);
 	ATF_REQUIRE(lwps[1] == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 #endif
 
 /*
  * Verify that PT_LWPINFO doesn't return stale siginfo.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_LWPINFO_stale_siginfo);
 ATF_TC_BODY(ptrace__PT_LWPINFO_stale_siginfo, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int events, status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		raise(SIGABRT);
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next stop should report the SIGABRT in the child body. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGABRT);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT);
 
 	/*
 	 * Continue the process ignoring the signal, but enabling
 	 * syscall traps.
 	 */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/*
 	 * The next stop should report a system call entry from
 	 * exit().  PL_FLAGS_SI should not be set.
 	 */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 	ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) == 0);
 
 	/* Disable syscall tracing and continue the child to let it exit. */
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	events &= ~PTRACE_SYSCALL;
 	ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * A simple test of PT_GET_SC_ARGS and PT_GET_SC_RET.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__syscall_args);
 ATF_TC_BODY(ptrace__syscall_args, tc)
 {
 	struct ptrace_lwpinfo pl;
 	struct ptrace_sc_ret psr;
 	pid_t fpid, wpid;
 	register_t args[2];
 	int events, status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		kill(getpid(), 0);
 		close(3);
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/*
 	 * Continue the process ignoring the signal, but enabling
 	 * syscall traps.
 	 */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/*
 	 * The next stop should be the syscall entry from getpid().
 	 */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 	ATF_REQUIRE(pl.pl_syscall_code == SYS_getpid);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/*
 	 * The next stop should be the syscall exit from getpid().
 	 */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX);
 	ATF_REQUIRE(pl.pl_syscall_code == SYS_getpid);
 
 	ATF_REQUIRE(ptrace(PT_GET_SC_RET, wpid, (caddr_t)&psr,
 	    sizeof(psr)) != -1);
 	ATF_REQUIRE(psr.sr_error == 0);
 	ATF_REQUIRE(psr.sr_retval[0] == wpid);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/*
 	 * The next stop should be the syscall entry from kill().
 	 */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 	ATF_REQUIRE(pl.pl_syscall_code == SYS_kill);
 	ATF_REQUIRE(pl.pl_syscall_narg == 2);
 
 	ATF_REQUIRE(ptrace(PT_GET_SC_ARGS, wpid, (caddr_t)args,
 	    sizeof(args)) != -1);
 	ATF_REQUIRE(args[0] == wpid);
 	ATF_REQUIRE(args[1] == 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/*
 	 * The next stop should be the syscall exit from kill().
 	 */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX);
 	ATF_REQUIRE(pl.pl_syscall_code == SYS_kill);
 
 	ATF_REQUIRE(ptrace(PT_GET_SC_RET, wpid, (caddr_t)&psr,
 	    sizeof(psr)) != -1);
 	ATF_REQUIRE(psr.sr_error == 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/*
 	 * The next stop should be the syscall entry from close().
 	 */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 	ATF_REQUIRE(pl.pl_syscall_code == SYS_close);
 	ATF_REQUIRE(pl.pl_syscall_narg == 1);
 
 	ATF_REQUIRE(ptrace(PT_GET_SC_ARGS, wpid, (caddr_t)args,
 	    sizeof(args)) != -1);
 	ATF_REQUIRE(args[0] == 3);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/*
 	 * The next stop should be the syscall exit from close().
 	 */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX);
 	ATF_REQUIRE(pl.pl_syscall_code == SYS_close);
 
 	ATF_REQUIRE(ptrace(PT_GET_SC_RET, wpid, (caddr_t)&psr,
 	    sizeof(psr)) != -1);
 	ATF_REQUIRE(psr.sr_error == EBADF);
 
 	/* Disable syscall tracing and continue the child to let it exit. */
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	events &= ~PTRACE_SYSCALL;
 	ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that when the process is traced that it isn't reparent
  * to the init process when we close all process descriptors.
  */
 ATF_TC(ptrace__proc_reparent);
 ATF_TC_HEAD(ptrace__proc_reparent, tc)
 {
 
 	atf_tc_set_md_var(tc, "timeout", "2");
 }
 ATF_TC_BODY(ptrace__proc_reparent, tc)
 {
 	pid_t traced, debuger, wpid;
 	int pd, status;
 
 	traced = pdfork(&pd, 0);
 	ATF_REQUIRE(traced >= 0);
 	if (traced == 0) {
 		raise(SIGSTOP);
 		exit(0);
 	}
 	ATF_REQUIRE(pd >= 0);
 
 	debuger = fork();
 	ATF_REQUIRE(debuger >= 0);
 	if (debuger == 0) {
 		/* The traced process is reparented to debuger. */
 		ATF_REQUIRE(ptrace(PT_ATTACH, traced, 0, 0) == 0);
 		wpid = waitpid(traced, &status, 0);
 		ATF_REQUIRE(wpid == traced);
 		ATF_REQUIRE(WIFSTOPPED(status));
 		ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 		ATF_REQUIRE(close(pd) == 0);
 		ATF_REQUIRE(ptrace(PT_DETACH, traced, (caddr_t)1, 0) == 0);
 
 		/* We closed pd so we should not have any child. */
 		wpid = wait(&status);
 		ATF_REQUIRE(wpid == -1);
 		ATF_REQUIRE(errno == ECHILD);
 
 		exit(0);
 	}
 
 	ATF_REQUIRE(close(pd) == 0);
 	wpid = waitpid(debuger, &status, 0);
 	ATF_REQUIRE(wpid == debuger);
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	/* Check if we still have any child. */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Ensure that traced processes created with pdfork(2) are visible to
  * waitid(P_ALL).
  */
 ATF_TC_WITHOUT_HEAD(ptrace__procdesc_wait_child);
 ATF_TC_BODY(ptrace__procdesc_wait_child, tc)
 {
 	pid_t child, wpid;
 	int pd, status;
 
 	child = pdfork(&pd, 0);
 	ATF_REQUIRE(child >= 0);
 
 	if (child == 0) {
 		trace_me();
 		(void)raise(SIGSTOP);
 		exit(0);
 	}
 
 	wpid = waitpid(child, &status, 0);
 	ATF_REQUIRE(wpid == child);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == child);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1);
 
 	/*
 	 * If process was created by pdfork, the return code have to
 	 * be collected through process descriptor.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 
 	ATF_REQUIRE(close(pd) != -1);
 }
 
 /*
  * Ensure that traced processes created with pdfork(2) are not visible
  * after returning to parent - waitid(P_ALL).
  */
 ATF_TC_WITHOUT_HEAD(ptrace__procdesc_reparent_wait_child);
 ATF_TC_BODY(ptrace__procdesc_reparent_wait_child, tc)
 {
 	pid_t traced, debuger, wpid;
 	int pd, status;
 
+	if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false))
+		atf_tc_skip("https://bugs.freebsd.org/243605");
+
 	traced = pdfork(&pd, 0);
 	ATF_REQUIRE(traced >= 0);
 	if (traced == 0) {
 		raise(SIGSTOP);
 		exit(0);
 	}
 	ATF_REQUIRE(pd >= 0);
 
 	debuger = fork();
 	ATF_REQUIRE(debuger >= 0);
 	if (debuger == 0) {
 		/* The traced process is reparented to debuger. */
 		ATF_REQUIRE(ptrace(PT_ATTACH, traced, 0, 0) == 0);
 		wpid = waitpid(traced, &status, 0);
 		ATF_REQUIRE(wpid == traced);
 		ATF_REQUIRE(WIFSTOPPED(status));
 		ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 		/* Allow process to die. */
 		ATF_REQUIRE(ptrace(PT_CONTINUE, traced, (caddr_t)1, 0) == 0);
 		wpid = waitpid(traced, &status, 0);
 		ATF_REQUIRE(wpid == traced);
 		ATF_REQUIRE(WIFEXITED(status));
 		ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 		/* Reparent back to the orginal process. */
 		ATF_REQUIRE(close(pd) == 0);
 		exit(0);
 	}
 
 	wpid = waitpid(debuger, &status, 0);
 	ATF_REQUIRE(wpid == debuger);
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	/*
 	 * We have a child but it has a process descriptori
 	 * so we should not be able to collect it process.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 
 	ATF_REQUIRE(close(pd) == 0);
 }
 
 ATF_TP_ADD_TCS(tp)
 {
 
 	ATF_TP_ADD_TC(tp, ptrace__parent_wait_after_trace_me);
 	ATF_TP_ADD_TC(tp, ptrace__parent_wait_after_attach);
 	ATF_TP_ADD_TC(tp, ptrace__parent_sees_exit_after_child_debugger);
 	ATF_TP_ADD_TC(tp, ptrace__parent_sees_exit_after_unrelated_debugger);
 	ATF_TP_ADD_TC(tp, ptrace__parent_exits_before_child);
 	ATF_TP_ADD_TC(tp, ptrace__follow_fork_both_attached);
 	ATF_TP_ADD_TC(tp, ptrace__follow_fork_child_detached);
 	ATF_TP_ADD_TC(tp, ptrace__follow_fork_parent_detached);
 	ATF_TP_ADD_TC(tp, ptrace__follow_fork_both_attached_unrelated_debugger);
 	ATF_TP_ADD_TC(tp,
 	    ptrace__follow_fork_child_detached_unrelated_debugger);
 	ATF_TP_ADD_TC(tp,
 	    ptrace__follow_fork_parent_detached_unrelated_debugger);
 	ATF_TP_ADD_TC(tp, ptrace__getppid);
 	ATF_TP_ADD_TC(tp, ptrace__new_child_pl_syscall_code_fork);
 	ATF_TP_ADD_TC(tp, ptrace__new_child_pl_syscall_code_vfork);
 	ATF_TP_ADD_TC(tp, ptrace__new_child_pl_syscall_code_thread);
 	ATF_TP_ADD_TC(tp, ptrace__lwp_events);
 	ATF_TP_ADD_TC(tp, ptrace__lwp_events_exec);
 	ATF_TP_ADD_TC(tp, ptrace__siginfo);
 	ATF_TP_ADD_TC(tp, ptrace__ptrace_exec_disable);
 	ATF_TP_ADD_TC(tp, ptrace__ptrace_exec_enable);
 	ATF_TP_ADD_TC(tp, ptrace__event_mask);
 	ATF_TP_ADD_TC(tp, ptrace__ptrace_vfork);
 	ATF_TP_ADD_TC(tp, ptrace__ptrace_vfork_follow);
 #ifdef HAVE_BREAKPOINT
 	ATF_TP_ADD_TC(tp, ptrace__PT_KILL_breakpoint);
 #endif
 	ATF_TP_ADD_TC(tp, ptrace__PT_KILL_system_call);
 	ATF_TP_ADD_TC(tp, ptrace__PT_KILL_threads);
 	ATF_TP_ADD_TC(tp, ptrace__PT_KILL_competing_signal);
 	ATF_TP_ADD_TC(tp, ptrace__PT_KILL_competing_stop);
 	ATF_TP_ADD_TC(tp, ptrace__PT_KILL_with_signal_full_sigqueue);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_system_call_entry);
 	ATF_TP_ADD_TC(tp,
 	    ptrace__PT_CONTINUE_with_signal_system_call_entry_and_exit);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_full_sigqueue);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_masked_full_sigqueue);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_change_sig);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_sigtrap_system_call_entry);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_mix);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_kqueue);
 	ATF_TP_ADD_TC(tp, ptrace__killed_with_sigmask);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_sigmask);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_thread_sigmask);
 	ATF_TP_ADD_TC(tp, ptrace__parent_terminate_with_pending_sigstop1);
 	ATF_TP_ADD_TC(tp, ptrace__parent_terminate_with_pending_sigstop2);
 	ATF_TP_ADD_TC(tp, ptrace__event_mask_sigkill_discard);
 	ATF_TP_ADD_TC(tp, ptrace__PT_ATTACH_with_SBDRY_thread);
 	ATF_TP_ADD_TC(tp, ptrace__PT_STEP_with_signal);
 #ifdef HAVE_BREAKPOINT
 	ATF_TP_ADD_TC(tp, ptrace__breakpoint_siginfo);
 #endif
 	ATF_TP_ADD_TC(tp, ptrace__step_siginfo);
 #if defined(HAVE_BREAKPOINT) && defined(SKIP_BREAK)
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_different_thread);
 #endif
 	ATF_TP_ADD_TC(tp, ptrace__PT_LWPINFO_stale_siginfo);
 	ATF_TP_ADD_TC(tp, ptrace__syscall_args);
 	ATF_TP_ADD_TC(tp, ptrace__proc_reparent);
 	ATF_TP_ADD_TC(tp, ptrace__procdesc_wait_child);
 	ATF_TP_ADD_TC(tp, ptrace__procdesc_reparent_wait_child);
 
 	return (atf_no_error());
 }
Index: projects/clang1000-import/tests/sys/netinet/Makefile
===================================================================
--- projects/clang1000-import/tests/sys/netinet/Makefile	(revision 357178)
+++ projects/clang1000-import/tests/sys/netinet/Makefile	(revision 357179)
@@ -1,22 +1,24 @@
 # $FreeBSD$
 
+PACKAGE=	tests
+
 TESTSDIR=	${TESTSBASE}/sys/netinet
 BINDIR=		${TESTSDIR}
 
 ATF_TESTS_C=	ip_reass_test \
 		so_reuseport_lb_test \
 		socket_afinet
 
 ATF_TESTS_SH=	fibs_test redirect
 
 PROGS=	udp_dontroute tcp_user_cookie
 
 ${PACKAGE}FILES+=		redirect.py
 
 ${PACKAGE}FILESMODE_redirect.py=0555
 
 MAN=
 
 WARNS?=	6
 
 .include <bsd.test.mk>
Index: projects/clang1000-import/tools/build/Makefile
===================================================================
--- projects/clang1000-import/tools/build/Makefile	(revision 357178)
+++ projects/clang1000-import/tools/build/Makefile	(revision 357179)
@@ -1,158 +1,176 @@
 # $FreeBSD$
 
 .PATH: ${.CURDIR}/../../include
 
 LIB=		egacy
 SRC=
-INCSGROUPS=	INCS SYSINCS CASPERINC
+INCSGROUPS=	INCS SYSINCS CASPERINC UFSINCS FFSINCS MSDOSFSINCS DISKINCS
 INCS=
 
 SYSINCSDIR=	${INCLUDEDIR}/sys
 CASPERINCDIR=	${INCLUDEDIR}/casper
+# Also add ufs/ffs/msdosfs/disk headers to allow building makefs as a bootstrap tool
+UFSINCSDIR=	${INCLUDEDIR}/ufs/ufs
+FFSINCSDIR=	${INCLUDEDIR}/ufs/ffs
+MSDOSFSINCSDIR=	${INCLUDEDIR}/fs/msdosfs
+DISKINCSDIR=	${INCLUDEDIR}/sys/disk
 
 BOOTSTRAPPING?=	0
 
 _WITH_PWCACHEDB!= grep -c pwcache_groupdb /usr/include/grp.h || true
 .if ${_WITH_PWCACHEDB} == 0
 .PATH: ${.CURDIR}/../../contrib/libc-pwcache
 CFLAGS+=	-I${.CURDIR}/../../contrib/libc-pwcache \
 		-I${.CURDIR}/../../lib/libc/include
 SRCS+=		pwcache.c
 .endif
 
 _WITH_STRSVIS!=	grep -c strsvis /usr/include/vis.h || true
 .if ${_WITH_STRSVIS} == 0
 .PATH: ${.CURDIR}/../../contrib/libc-vis
 SRCS+=		vis.c
 CFLAGS+=	-I${.CURDIR}/../../contrib/libc-vis \
 		-I${.CURDIR}/../../lib/libc/include
 .endif
 
 _WITH_REALLOCARRAY!= grep -c reallocarray /usr/include/stdlib.h || true
 .if ${_WITH_REALLOCARRAY} == 0
 .PATH: ${.CURDIR}/../../lib/libc/stdlib
 INCS+=		stdlib.h
 SRCS+=		reallocarray.c
 CFLAGS+=	-I${.CURDIR}/../../lib/libc/include
 .endif
 
 _WITH_UTIMENS!= grep -c utimensat /usr/include/sys/stat.h || true
 .if ${_WITH_UTIMENS} == 0
 SYSINCS+=	stat.h
 SRCS+=		futimens.c utimensat.c
 .endif
 
 _WITH_EXPLICIT_BZERO!= grep -c explicit_bzero /usr/include/strings.h || true
 .if ${_WITH_EXPLICIT_BZERO} == 0
 .PATH: ${SRCTOP}/sys/libkern
 INCS+=		strings.h
 SRCS+=		explicit_bzero.c
 .endif
 
 .if exists(/usr/include/capsicum_helpers.h)
 _WITH_CAPH_ENTER!= grep -c caph_enter /usr/include/capsicum_helpers.h || true
 _WITH_CAPH_RIGHTS_LIMIT!= grep -c caph_rights_limit /usr/include/capsicum_helpers.h || true
 .endif
 .if !defined(_WITH_CAPH_ENTER) || ${_WITH_CAPH_ENTER} == 0 || ${_WITH_CAPH_RIGHTS_LIMIT} == 0
 .PATH: ${SRCTOP}/lib/libcapsicum
 INCS+=		capsicum_helpers.h
 .PATH: ${SRCTOP}/lib/libcasper/libcasper
 INCS+=		libcasper.h
 .endif
 
 CASPERINC+=	${SRCTOP}/lib/libcasper/services/cap_fileargs/cap_fileargs.h
 
 .if empty(SRCS)
 SRCS=		dummy.c
 .endif
 
 .if defined(CROSS_BUILD_TESTING)
 SUBDIR=		cross-build
 .endif
+
+# To allow bootstrapping makefs on FreeBSD 11 or non-FreeBSD systems:
+UFSINCS+=	${SRCTOP}/sys/ufs/ufs/dinode.h
+UFSINCS+=	${SRCTOP}/sys/ufs/ufs/dir.h
+FFSINCS+=	${SRCTOP}/sys/ufs/ffs/fs.h
+
+MSDOSFSINCS+=	${SRCTOP}/sys/fs/msdosfs/bootsect.h
+MSDOSFSINCS+=	${SRCTOP}/sys/fs/msdosfs/bpb.h
+MSDOSFSINCS+=	${SRCTOP}/sys/fs/msdosfs/denode.h
+MSDOSFSINCS+=	${SRCTOP}/sys/fs/msdosfs/direntry.h
+MSDOSFSINCS+=	${SRCTOP}/sys/fs/msdosfs/fat.h
+MSDOSFSINCS+=	${SRCTOP}/sys/fs/msdosfs/msdosfsmount.h
+DISKINCS+=	${SRCTOP}/sys/sys/disk/bsd.h
 
 # Needed to build config (since it uses libnv)
 SYSINCS+=	${SRCTOP}/sys/sys/nv.h ${SRCTOP}/sys/sys/cnv.h \
 		${SRCTOP}/sys/sys/dnv.h
 
 # We want to run the build with only ${WORLDTMP} in $PATH to ensure we don't
 # accidentally run tools that are incompatible but happen to be in $PATH.
 # This is especially important when building on Linux/MacOS where many of the
 # programs used during the build accept different flags or generate different
 # output. On those platforms we only symlink the tools known to be compatible
 # (e.g. basic utilities such as mkdir) into ${WORLDTMP} and build all others
 # from the FreeBSD sources during the bootstrap-tools stage.
 
 # basic commands: It is fine to use the host version for all of these even on
 # Linux/MacOS since we only use flags that are supported by all of them.
 _host_tools_to_symlink=	basename bzip2 bunzip2 chmod chown cmp comm cp date \
 	dirname echo env false find fmt gzip gunzip head hostname id ln ls \
 	mkdir mv nice patch rm realpath sh sleep stat tee touch tr true uname \
 	uniq wc which
 
 # We also need a symlink to the absolute path to the make binary used for
 # the toplevel makefile. This is not necessarily the same as `which make`
 # since e.g. on Linux and MacOS that will be GNU make.
 _make_abs!=	which "${MAKE}"
 _host_abs_tools_to_symlink=	${_make_abs}:make ${_make_abs}:bmake
 
 host-symlinks:
 	@echo "Linking host tools into ${DESTDIR}/bin"
 .for _tool in ${_host_tools_to_symlink}
 	@if [ ! -e "${DESTDIR}/bin/${_tool}" ]; then \
 		source_path=`which ${_tool}`; \
 		if [ ! -e "$${source_path}" ] ; then \
 			echo "Cannot find host tool '${_tool}'"; false; \
 		fi; \
 		ln -sfnv "$${source_path}" "${DESTDIR}/bin/${_tool}"; \
 	fi
 .endfor
 .for _tool in ${_host_abs_tools_to_symlink}
 	@source_path="${_tool:S/:/ /:[1]}"; \
 	target_path="${DESTDIR}/bin/${_tool:S/:/ /:[2]}"; \
 	if [ ! -e "$${target_path}" ] ; then \
 		if [ ! -e "$${source_path}" ] ; then \
 			echo "Host tool '${src_path}' is missing"; false; \
 		fi; \
 		ln -sfnv "$${source_path}" "$${target_path}"; \
 	fi
 .endfor
 .if exists(/usr/libexec/flua)
 	ln -sf /usr/libexec/flua ${DESTDIR}/usr/libexec/flua
 .endif
 
 # Create all the directories that are needed during the legacy, bootstrap-tools
 # and cross-tools stages. We do this here using mkdir since mtree may not exist
 # yet (this happens if we are crossbuilding from Linux/Mac).
 INSTALLDIR_LIST= \
 	bin \
 	lib/casper \
 	lib/geom \
 	usr/include/casper \
 	usr/include/private/zstd \
 	usr/lib \
 	usr/libexec
 
 installdirs:
 	mkdir -p ${INSTALLDIR_LIST:S,^,${DESTDIR}/,}
 
 # Link usr/bin, sbin, and usr/sbin to bin so that it doesn't matter whether a
 # bootstrap tool was added to WORLTMP with a symlink or by building it in the
 # bootstrap-tools phase. We could also overrride BINDIR when building bootstrap
 # tools but adding the symlinks is easier and means all tools are also
 # in the directory that they are installed to normally.
 
 .for _dir in sbin usr/sbin usr/bin
 # delete existing directories from before r340157
 	@if [ ! -L ${DESTDIR}/${_dir} ]; then \
 	    echo "removing old non-symlink ${DESTDIR}/${_dir}"; \
 	    rm -rf "${DESTDIR}/${_dir}"; \
 	fi
 .endfor
 	ln -sfn bin ${DESTDIR}/sbin
 	ln -sfn ../bin ${DESTDIR}/usr/bin
 	ln -sfn ../bin ${DESTDIR}/usr/sbin
 .for _group in ${INCSGROUPS:NINCS}
 	mkdir -p "${DESTDIR}/${${_group}DIR}"
 .endfor
 
 .include <bsd.lib.mk>
Index: projects/clang1000-import/usr.bin/xohtml/xohtml.sh
===================================================================
--- projects/clang1000-import/usr.bin/xohtml/xohtml.sh	(revision 357178)
+++ projects/clang1000-import/usr.bin/xohtml/xohtml.sh	(revision 357179)
@@ -1,87 +1,87 @@
 #!/bin/sh
 # $FreeBSD$
 #!/bin/sh
 #
 # Copyright (c) 2014, Juniper Networks, Inc.
 # All rights reserved.
 # This SOFTWARE is licensed under the LICENSE provided in the
 # ../Copyright file. By downloading, installing, copying, or otherwise
 # using the SOFTWARE, you agree to be bound by the terms of that
 # LICENSE.
 # Phil Shafer, July 2014
 #
 
 BASE=/usr/share/libxo
-VERSION=1.3.1
+VERSION=1.4.0
 CMD=cat
 DONE=
 WEB=http://juniper.github.io/libxo/${VERSION}/xohtml
 
 do_help () {
     echo "xohtml: wrap libxo-enabled output in HTML"
     echo "Usage: xohtml [options] [command [arguments]]"
     echo "Valid options are:"
     echo "    -b <basepath> | --base <basepath>"
     echo "    -c <command> | --command <command>"
     echo "    -f <output-file> | --file <output-file>"
     exit 1
 }
 
 while [ -z "$DONE" -a ! -z "$1" ]; do
     case "$1" in
         -b|--base)
             shift;
             BASE="$1";
 	    shift;
             ;;
         -c|--command)
             shift;
             CMD="$1";
 	    shift;
             ;;
         -f|--file)
             shift;
             FILE="$1";
 	    shift;
 	    exec > "$FILE";
             ;;
         -w|--web)
             shift;
             BASE="${WEB}";
             ;;
 
 	-*)
 	    do_help
 	    ;;
 	*)
 	    DONE=1;
 	    XX=$1;
 	    shift;
 	    CMD="$XX --libxo=html $@"
 	    ;;
     esac
 done
 
 if [ "$CMD" = "cat" -a -t 0 ]; then
     do_help
 fi
 
 echo '<html>'
 echo '<head>'
 echo '<meta http-equiv="content-type" content="text/html; charset=utf-8"/>'
 echo '<link rel="stylesheet" href="'$BASE'/xohtml.css">'
 echo '<link rel="stylesheet" href="'$BASE'/external/jquery.qtip.css"/>'
 echo '<script type="text/javascript" src="'$BASE'/external/jquery.js"></script>'
 echo '<script type="text/javascript" src="'$BASE'/external/jquery.qtip.js"></script>'
 echo '<script type="text/javascript" src="'$BASE'/xohtml.js"></script>'
 echo '<script>'
 echo '</script>'
 echo '</head>'
 echo '<body>'
 
 $CMD
 
 echo '</body>'
 echo '</html>'
 
 exit 0
Index: projects/clang1000-import/usr.sbin/makefs/msdos/msdosfs_denode.c
===================================================================
--- projects/clang1000-import/usr.sbin/makefs/msdos/msdosfs_denode.c	(revision 357178)
+++ projects/clang1000-import/usr.sbin/makefs/msdos/msdosfs_denode.c	(revision 357179)
@@ -1,382 +1,378 @@
 /*	$NetBSD: msdosfs_denode.c,v 1.7 2015/03/29 05:52:59 agc Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
  * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
  * All rights reserved.
  * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Written by Paul Popelka (paulp@uts.amdahl.com)
  *
  * You can do anything you want with this software, just don't say you wrote
  * it, and don't remove this notice.
  *
  * This software is provided "as is".
  *
  * The author supplies this software to be publicly redistributed on the
  * understanding that the author is not responsible for the correct
  * functioning of this software in any circumstances and is not liable for
  * any damages caused by this software.
  *
  * October 1992
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/errno.h>
-#include <sys/vnode.h>
 
 #include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <util.h>
 
 #include "ffs/buf.h"
 
 #include <fs/msdosfs/bpb.h>
 #include <fs/msdosfs/direntry.h>
 #include <fs/msdosfs/denode.h>
 #include <fs/msdosfs/fat.h>
 #include <fs/msdosfs/msdosfsmount.h>
 
 #include "makefs.h"
 #include "msdos.h"
 
 
 /*
  * If deget() succeeds it returns with the gotten denode locked().
  *
  * pmp	     - address of msdosfsmount structure of the filesystem containing
  *	       the denode of interest.  The pm_dev field and the address of
  *	       the msdosfsmount structure are used.
  * dirclust  - which cluster bp contains, if dirclust is 0 (root directory)
  *	       diroffset is relative to the beginning of the root directory,
  *	       otherwise it is cluster relative.
  * diroffset - offset past begin of cluster of denode we want
  * depp	     - returns the address of the gotten denode.
  */
 int
 deget(struct msdosfsmount *pmp, u_long dirclust, u_long diroffset,
     struct denode **depp)
 {
 	int error;
 	uint64_t inode;
 	struct direntry *direntptr;
 	struct denode *ldep;
 	struct buf *bp;
 
 	MSDOSFS_DPRINTF(("deget(pmp %p, dirclust %lu, diroffset %lx, depp %p)\n",
 	    pmp, dirclust, diroffset, depp));
 
 	/*
 	 * On FAT32 filesystems, root is a (more or less) normal
 	 * directory
 	 */
 	if (FAT32(pmp) && dirclust == MSDOSFSROOT)
 		dirclust = pmp->pm_rootdirblk;
 
 	inode = (uint64_t)pmp->pm_bpcluster * dirclust + diroffset;
 
 	ldep = ecalloc(1, sizeof(*ldep));
 	ldep->de_vnode = NULL;
 	ldep->de_flag = 0;
 	ldep->de_dirclust = dirclust;
 	ldep->de_diroffset = diroffset;
 	ldep->de_inode = inode;
 	ldep->de_pmp = pmp;
 	ldep->de_refcnt = 1;
 	fc_purge(ldep, 0);	/* init the FAT cache for this denode */
 	/*
 	 * Copy the directory entry into the denode area of the vnode.
 	 */
 	if ((dirclust == MSDOSFSROOT
 	     || (FAT32(pmp) && dirclust == pmp->pm_rootdirblk))
 	    && diroffset == MSDOSFSROOT_OFS) {
 		/*
 		 * Directory entry for the root directory. There isn't one,
 		 * so we manufacture one. We should probably rummage
 		 * through the root directory and find a label entry (if it
 		 * exists), and then use the time and date from that entry
 		 * as the time and date for the root denode.
 		 */
 		ldep->de_vnode = (struct vnode *)-1;
 
 		ldep->de_Attributes = ATTR_DIRECTORY;
 		ldep->de_LowerCase = 0;
 		if (FAT32(pmp))
 			ldep->de_StartCluster = pmp->pm_rootdirblk;
 			/* de_FileSize will be filled in further down */
 		else {
 			ldep->de_StartCluster = MSDOSFSROOT;
 			ldep->de_FileSize = pmp->pm_rootdirsize * DEV_BSIZE;
 		}
 		/*
 		 * fill in time and date so that dos2unixtime() doesn't
 		 * spit up when called from msdosfs_getattr() with root
 		 * denode
 		 */
 		ldep->de_CHun = 0;
 		ldep->de_CTime = 0x0000;	/* 00:00:00	 */
 		ldep->de_CDate = (0 << DD_YEAR_SHIFT) | (1 << DD_MONTH_SHIFT)
 		    | (1 << DD_DAY_SHIFT);
 		/* Jan 1, 1980	 */
 		ldep->de_ADate = ldep->de_CDate;
 		ldep->de_MTime = ldep->de_CTime;
 		ldep->de_MDate = ldep->de_CDate;
 		/* leave the other fields as garbage */
 	} else {
 		error = readep(pmp, dirclust, diroffset, &bp, &direntptr);
 		if (error) {
 			ldep->de_Name[0] = SLOT_DELETED;
 
 			*depp = NULL;
 			return (error);
 		}
 		(void)DE_INTERNALIZE(ldep, direntptr);
 		brelse(bp);
 	}
 
 	/*
 	 * Fill in a few fields of the vnode and finish filling in the
 	 * denode.  Then return the address of the found denode.
 	 */
 	if (ldep->de_Attributes & ATTR_DIRECTORY) {
 		/*
 		 * Since DOS directory entries that describe directories
 		 * have 0 in the filesize field, we take this opportunity
 		 * to find out the length of the directory and plug it into
 		 * the denode structure.
 		 */
 		u_long size;
 
 		/*
 		 * XXX it sometimes happens that the "." entry has cluster
 		 * number 0 when it shouldn't.  Use the actual cluster number
 		 * instead of what is written in directory entry.
 		 */
 		if (diroffset == 0 && ldep->de_StartCluster != dirclust) {
 			MSDOSFS_DPRINTF(("deget(): \".\" entry at clust %lu != %lu\n",
 			    dirclust, ldep->de_StartCluster));
 
 			ldep->de_StartCluster = dirclust;
 		}
 
 		if (ldep->de_StartCluster != MSDOSFSROOT) {
 			error = pcbmap(ldep, 0xffff, 0, &size, 0);
 			if (error == E2BIG) {
 				ldep->de_FileSize = de_cn2off(pmp, size);
 				error = 0;
 			} else {
 				MSDOSFS_DPRINTF(("deget(): pcbmap returned %d\n",
 				    error));
 			}
 		}
 	}
 	*depp = ldep;
 	return (0);
 }
 
 /*
  * Truncate the file described by dep to the length specified by length.
  */
 int
 detrunc(struct denode *dep, u_long length, int flags, struct ucred *cred)
 {
 	int error;
 	int allerror;
 	u_long eofentry;
 	u_long chaintofree;
 	daddr_t bn;
 	int boff;
 	int isadir = dep->de_Attributes & ATTR_DIRECTORY;
 	struct buf *bp;
 	struct msdosfsmount *pmp = dep->de_pmp;
 
 	MSDOSFS_DPRINTF(("detrunc(): file %s, length %lu, flags %x\n",
 	    dep->de_Name, length, flags));
 
 	/*
 	 * Disallow attempts to truncate the root directory since it is of
 	 * fixed size.  That's just the way dos filesystems are.  We use
 	 * the VROOT bit in the vnode because checking for the directory
 	 * bit and a startcluster of 0 in the denode is not adequate to
 	 * recognize the root directory at this point in a file or
 	 * directory's life.
 	 */
 	if (dep->de_vnode != NULL && !FAT32(pmp)) {
 		MSDOSFS_DPRINTF(("detrunc(): can't truncate root directory, "
 		    "clust %ld, offset %ld\n",
 		    dep->de_dirclust, dep->de_diroffset));
 
 		return (EINVAL);
 	}
 
 	if (dep->de_FileSize < length)
 		return deextend(dep, length, cred);
 
 	/*
 	 * If the desired length is 0 then remember the starting cluster of
 	 * the file and set the StartCluster field in the directory entry
 	 * to 0.  If the desired length is not zero, then get the number of
 	 * the last cluster in the shortened file.  Then get the number of
 	 * the first cluster in the part of the file that is to be freed.
 	 * Then set the next cluster pointer in the last cluster of the
 	 * file to CLUST_EOFE.
 	 */
 	if (length == 0) {
 		chaintofree = dep->de_StartCluster;
 		dep->de_StartCluster = 0;
 		eofentry = ~0;
 	} else {
 		error = pcbmap(dep, de_clcount(pmp, length) - 1, 0,
 		    &eofentry, 0);
 		if (error) {
 			MSDOSFS_DPRINTF(("detrunc(): pcbmap fails %d\n",
 			    error));
 			return (error);
 		}
 	}
 
 	fc_purge(dep, de_clcount(pmp, length));
 
 	/*
 	 * If the new length is not a multiple of the cluster size then we
 	 * must zero the tail end of the new last cluster in case it
 	 * becomes part of the file again because of a seek.
 	 */
 	if ((boff = length & pmp->pm_crbomask) != 0) {
 		if (isadir) {
 			bn = cntobn(pmp, eofentry);
 			error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster,
 			    0, &bp);
 			if (error) {
 				brelse(bp);
 				MSDOSFS_DPRINTF(("detrunc(): bread fails %d\n",
 				    error));
 
 				return (error);
 			}
 			memset(bp->b_data + boff, 0, pmp->pm_bpcluster - boff);
-			if (flags & IO_SYNC)
 				bwrite(bp);
-			else
-				bdwrite(bp);
 		}
 	}
 
 	/*
 	 * Write out the updated directory entry.  Even if the update fails
 	 * we free the trailing clusters.
 	 */
 	dep->de_FileSize = length;
 	if (!isadir)
 		dep->de_flag |= DE_UPDATE|DE_MODIFIED;
 	MSDOSFS_DPRINTF(("detrunc(): allerror %d, eofentry %lu\n",
 	    allerror, eofentry));
 
 	/*
 	 * If we need to break the cluster chain for the file then do it
 	 * now.
 	 */
 	if (eofentry != ~0) {
 		error = fatentry(FAT_GET_AND_SET, pmp, eofentry,
 				 &chaintofree, CLUST_EOFE);
 		if (error) {
 			MSDOSFS_DPRINTF(("detrunc(): fatentry errors %d\n",
 			    error));
 			return (error);
 		}
 		fc_setcache(dep, FC_LASTFC, de_cluster(pmp, length - 1),
 		    eofentry);
 	}
 
 	/*
 	 * Now free the clusters removed from the file because of the
 	 * truncation.
 	 */
 	if (chaintofree != 0 && !MSDOSFSEOF(pmp, chaintofree))
 		freeclusterchain(pmp, chaintofree);
 
 	return (allerror);
 }
 
 /*
  * Extend the file described by dep to length specified by length.
  */
 int
 deextend(struct denode *dep, u_long length, struct ucred *cred)
 {
 	struct msdosfsmount *pmp = dep->de_pmp;
 	u_long count;
 	int error;
 
 	/*
 	 * The root of a DOS filesystem cannot be extended.
 	 */
 	if (dep->de_vnode != NULL && !FAT32(pmp))
 		return (EINVAL);
 
 	/*
 	 * Directories cannot be extended.
 	 */
 	if (dep->de_Attributes & ATTR_DIRECTORY)
 		return (EISDIR);
 
 	if (length <= dep->de_FileSize)
 		return (E2BIG);
 
 	/*
 	 * Compute the number of clusters to allocate.
 	 */
 	count = de_clcount(pmp, length) - de_clcount(pmp, dep->de_FileSize);
 	if (count > 0) {
 		if (count > pmp->pm_freeclustercount)
 			return (ENOSPC);
 		error = extendfile(dep, count, NULL, NULL, DE_CLEAR);
 		if (error) {
 			/* truncate the added clusters away again */
 			(void) detrunc(dep, dep->de_FileSize, 0, cred);
 			return (error);
 		}
 	}
 
 	/*
 	 * Zero extend file range; ubc_zerorange() uses ubc_alloc() and a
 	 * memset(); we set the write size so ubc won't read in file data that
 	 * is zero'd later.
 	 */
 	dep->de_FileSize = length;
 	dep->de_flag |= DE_UPDATE | DE_MODIFIED;
 	return 0;
 }
Index: projects/clang1000-import/usr.sbin/makefs/msdos/msdosfs_vnops.c
===================================================================
--- projects/clang1000-import/usr.sbin/makefs/msdos/msdosfs_vnops.c	(revision 357178)
+++ projects/clang1000-import/usr.sbin/makefs/msdos/msdosfs_vnops.c	(revision 357179)
@@ -1,641 +1,645 @@
 /*	$NetBSD: msdosfs_vnops.c,v 1.19 2017/04/13 17:10:12 christos Exp $ */
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
  * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
  * All rights reserved.
  * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Written by Paul Popelka (paulp@uts.amdahl.com)
  *
  * You can do anything you want with this software, just don't say you wrote
  * it, and don't remove this notice.
  *
  * This software is provided "as is".
  *
  * The author supplies this software to be publicly redistributed on the
  * understanding that the author is not responsible for the correct
  * functioning of this software in any circumstances and is not liable for
  * any damages caused by this software.
  *
  * October 1992
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/clock.h>
 #include <sys/errno.h>
 #include <sys/mman.h>
 #include <sys/time.h>
 
 #include <fcntl.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
 
 #include "ffs/buf.h"
 #include <fs/msdosfs/bpb.h>
 #include "msdos/direntry.h"
 #include <fs/msdosfs/denode.h>
 #include <fs/msdosfs/fat.h>
 #include <fs/msdosfs/msdosfsmount.h>
 
 #include "makefs.h"
 #include "msdos.h"
 
 /*
  * Some general notes:
  *
  * In the ufs filesystem the inodes, superblocks, and indirect blocks are
  * read/written using the vnode for the filesystem. Blocks that represent
  * the contents of a file are read/written using the vnode for the file
  * (including directories when they are read/written as files). This
  * presents problems for the dos filesystem because data that should be in
  * an inode (if dos had them) resides in the directory itself.	Since we
  * must update directory entries without the benefit of having the vnode
  * for the directory we must use the vnode for the filesystem.	This means
  * that when a directory is actually read/written (via read, write, or
  * readdir, or seek) we must use the vnode for the filesystem instead of
  * the vnode for the directory as would happen in ufs. This is to insure we
  * retrieve the correct block from the buffer cache since the hash value is
  * based upon the vnode address and the desired block number.
  */
 
 static int msdosfs_wfile(const char *, struct denode *, fsnode *);
 static void unix2fattime(const struct timespec *tsp, uint16_t *ddp,
     uint16_t *dtp);
 
 static void
 msdosfs_times(struct denode *dep, const struct stat *st)
 {
 	if (stampst.st_ino)
 		st = &stampst;
 
+#ifdef HAVE_STRUCT_STAT_BIRTHTIME
 	unix2fattime(&st->st_birthtim, &dep->de_CDate, &dep->de_CTime);
+#else
+	unix2fattime(&st->st_ctim, &dep->de_CDate, &dep->de_CTime);
+#endif
 	unix2fattime(&st->st_atim, &dep->de_ADate, NULL);
 	unix2fattime(&st->st_mtim, &dep->de_MDate, &dep->de_MTime);
 }
 
 static void
 unix2fattime(const struct timespec *tsp, uint16_t *ddp, uint16_t *dtp)
 {
 	time_t t1;
 	struct tm lt = {0};
 
 	t1 = tsp->tv_sec;
 	localtime_r(&t1, &lt);
 
 	unsigned long fat_time = ((lt.tm_year - 80) << 25) |
             ((lt.tm_mon + 1) << 21) |
             (lt.tm_mday << 16) |
             (lt.tm_hour << 11) |
             (lt.tm_min << 5) |
             (lt.tm_sec >> 1);
 
 	if (ddp != NULL)
 		*ddp = (uint16_t)(fat_time >> 16);
 	if (dtp != NULL)
 		*dtp = (uint16_t)fat_time;
 }
 
 /*
  * When we search a directory the blocks containing directory entries are
  * read and examined.  The directory entries contain information that would
  * normally be in the inode of a unix filesystem.  This means that some of
  * a directory's contents may also be in memory resident denodes (sort of
  * an inode).  This can cause problems if we are searching while some other
  * process is modifying a directory.  To prevent one process from accessing
  * incompletely modified directory information we depend upon being the
  * sole owner of a directory block.  bread/brelse provide this service.
  * This being the case, when a process modifies a directory it must first
  * acquire the disk block that contains the directory entry to be modified.
  * Then update the disk block and the denode, and then write the disk block
  * out to disk.	 This way disk blocks containing directory entries and in
  * memory denode's will be in synch.
  */
 static int
 msdosfs_findslot(struct denode *dp, struct componentname *cnp)
 {
 	daddr_t bn;
 	int error;
 	int slotcount;
 	int slotoffset = 0;
 	int frcn;
 	u_long cluster;
 	int blkoff;
 	u_int diroff;
 	int blsize;
 	struct msdosfsmount *pmp;
 	struct buf *bp = 0;
 	struct direntry *dep;
 	u_char dosfilename[12];
 	int wincnt = 1;
 	int chksum = -1, chksum_ok;
 	int olddos = 1;
 
 	pmp = dp->de_pmp;
 
 	switch (unix2dosfn((const u_char *)cnp->cn_nameptr, dosfilename,
 	    cnp->cn_namelen, 0)) {
 	case 0:
 		return (EINVAL);
 	case 1:
 		break;
 	case 2:
 		wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr,
 		    cnp->cn_namelen) + 1;
 		break;
 	case 3:
 		olddos = 0;
 		wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr,
 		    cnp->cn_namelen) + 1;
 		break;
 	}
 
 	if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
 		wincnt = 1;
 
 	/*
 	 * Suppress search for slots unless creating
 	 * file and at end of pathname, in which case
 	 * we watch for a place to put the new file in
 	 * case it doesn't already exist.
 	 */
 	slotcount = 0;
 	MSDOSFS_DPRINTF(("%s(): dos filename: %s\n", __func__, dosfilename));
 	/*
 	 * Search the directory pointed at by vdp for the name pointed at
 	 * by cnp->cn_nameptr.
 	 */
 	/*
 	 * The outer loop ranges over the clusters that make up the
 	 * directory.  Note that the root directory is different from all
 	 * other directories.  It has a fixed number of blocks that are not
 	 * part of the pool of allocatable clusters.  So, we treat it a
 	 * little differently. The root directory starts at "cluster" 0.
 	 */
 	diroff = 0;
 	for (frcn = 0; diroff < dp->de_FileSize; frcn++) {
 		if ((error = pcbmap(dp, frcn, &bn, &cluster, &blsize)) != 0) {
 			if (error == E2BIG)
 				break;
 			return (error);
 		}
 		error = bread(pmp->pm_devvp, bn, blsize, 0, &bp);
 		if (error) {
 			return (error);
 		}
 		for (blkoff = 0; blkoff < blsize;
 		     blkoff += sizeof(struct direntry),
 		     diroff += sizeof(struct direntry)) {
 			dep = (struct direntry *)(bp->b_data + blkoff);
 			/*
 			 * If the slot is empty and we are still looking
 			 * for an empty then remember this one.	 If the
 			 * slot is not empty then check to see if it
 			 * matches what we are looking for.  If the slot
 			 * has never been filled with anything, then the
 			 * remainder of the directory has never been used,
 			 * so there is no point in searching it.
 			 */
 			if (dep->deName[0] == SLOT_EMPTY ||
 			    dep->deName[0] == SLOT_DELETED) {
 				/*
 				 * Drop memory of previous long matches
 				 */
 				chksum = -1;
 
 				if (slotcount < wincnt) {
 					slotcount++;
 					slotoffset = diroff;
 				}
 				if (dep->deName[0] == SLOT_EMPTY) {
 					brelse(bp);
 					goto notfound;
 				}
 			} else {
 				/*
 				 * If there wasn't enough space for our
 				 * winentries, forget about the empty space
 				 */
 				if (slotcount < wincnt)
 					slotcount = 0;
 
 				/*
 				 * Check for Win95 long filename entry
 				 */
 				if (dep->deAttributes == ATTR_WIN95) {
 					if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
 						continue;
 
 					chksum = winChkName(
 					    (const u_char *)cnp->cn_nameptr,
 					    cnp->cn_namelen,
 					    (struct winentry *)dep, chksum);
 					continue;
 				}
 
 				/*
 				 * Ignore volume labels (anywhere, not just
 				 * the root directory).
 				 */
 				if (dep->deAttributes & ATTR_VOLUME) {
 					chksum = -1;
 					continue;
 				}
 
 				/*
 				 * Check for a checksum or name match
 				 */
 				chksum_ok = (chksum == winChksum(dep->deName));
 				if (!chksum_ok
 				    && (!olddos || memcmp(dosfilename, dep->deName, 11))) {
 					chksum = -1;
 					continue;
 				}
 				MSDOSFS_DPRINTF(("%s(): match blkoff %d, diroff %u\n",
 				    __func__, blkoff, diroff));
 				/*
 				 * Remember where this directory
 				 * entry came from for whoever did
 				 * this lookup.
 				 */
 				dp->de_fndoffset = diroff;
 				dp->de_fndcnt = 0;
 
 				return EEXIST;
 			}
 		}	/* for (blkoff = 0; .... */
 		/*
 		 * Release the buffer holding the directory cluster just
 		 * searched.
 		 */
 		brelse(bp);
 	}	/* for (frcn = 0; ; frcn++) */
 
 notfound:
 	/*
 	 * We hold no disk buffers at this point.
 	 */
 
 	/*
 	 * If we get here we didn't find the entry we were looking for. But
 	 * that's ok if we are creating or renaming and are at the end of
 	 * the pathname and the directory hasn't been removed.
 	 */
 	MSDOSFS_DPRINTF(("%s(): refcnt %ld, slotcount %d, slotoffset %d\n",
 	    __func__, dp->de_refcnt, slotcount, slotoffset));
 	/*
 	 * Fixup the slot description to point to the place where
 	 * we might put the new DOS direntry (putting the Win95
 	 * long name entries before that)
 	 */
 	if (!slotcount) {
 		slotcount = 1;
 		slotoffset = diroff;
 	}
 	if (wincnt > slotcount) {
 		slotoffset += sizeof(struct direntry) * (wincnt - slotcount);
 	}
 
 	/*
 	 * Return an indication of where the new directory
 	 * entry should be put.
 	 */
 	dp->de_fndoffset = slotoffset;
 	dp->de_fndcnt = wincnt - 1;
 
 	/*
 	 * We return with the directory locked, so that
 	 * the parameters we set up above will still be
 	 * valid if we actually decide to do a direnter().
 	 * We return ni_vp == NULL to indicate that the entry
 	 * does not currently exist; we leave a pointer to
 	 * the (locked) directory inode in ndp->ni_dvp.
 	 *
 	 * NB - if the directory is unlocked, then this
 	 * information cannot be used.
 	 */
 	return 0;
 }
 
 /*
  * Create a regular file. On entry the directory to contain the file being
  * created is locked.  We must release before we return.
  */
 struct denode *
 msdosfs_mkfile(const char *path, struct denode *pdep, fsnode *node)
 {
 	struct componentname cn;
 	struct denode ndirent;
 	struct denode *dep;
 	int error;
 	struct stat *st = &node->inode->st;
 
 	cn.cn_nameptr = node->name;
 	cn.cn_namelen = strlen(node->name);
 
 	MSDOSFS_DPRINTF(("%s(name %s, mode 0%o size %zu)\n",
 	    __func__, node->name, st->st_mode, (size_t)st->st_size));
 
 	/*
 	 * If this is the root directory and there is no space left we
 	 * can't do anything.  This is because the root directory can not
 	 * change size.
 	 */
 	if (pdep->de_StartCluster == MSDOSFSROOT
 	    && pdep->de_fndoffset >= pdep->de_FileSize) {
 		error = ENOSPC;
 		goto bad;
 	}
 
 	/*
 	 * Create a directory entry for the file, then call createde() to
 	 * have it installed. NOTE: DOS files are always executable.  We
 	 * use the absence of the owner write bit to make the file
 	 * readonly.
 	 */
 	memset(&ndirent, 0, sizeof(ndirent));
 	if ((error = uniqdosname(pdep, &cn, ndirent.de_Name)) != 0)
 		goto bad;
 
 	ndirent.de_Attributes = (st->st_mode & S_IWUSR) ?
 				ATTR_ARCHIVE : ATTR_ARCHIVE | ATTR_READONLY;
 	ndirent.de_StartCluster = 0;
 	ndirent.de_FileSize = 0;
 	ndirent.de_pmp = pdep->de_pmp;
 	ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
 	msdosfs_times(&ndirent, &node->inode->st);
 
 	if ((error = msdosfs_findslot(pdep, &cn)) != 0)
 		goto bad;
 	if ((error = createde(&ndirent, pdep, &dep, &cn)) != 0)
 		goto bad;
 	if ((error = msdosfs_wfile(path, dep, node)) != 0)
 		goto bad;
 	return dep;
 
 bad:
 	errno = error;
 	return NULL;
 }
 static int
 msdosfs_updatede(struct denode *dep)
 {
 	struct buf *bp;
 	struct direntry *dirp;
 	int error;
 
 	dep->de_flag &= ~DE_MODIFIED;
 	error = readde(dep, &bp, &dirp);
 	if (error)
 		return error;
 	DE_EXTERNALIZE(dirp, dep);
 	error = bwrite(bp);
 	return error;
 }
 
 /*
  * Write data to a file or directory.
  */
 static int
 msdosfs_wfile(const char *path, struct denode *dep, fsnode *node)
 {
 	int error, fd;
 	size_t osize = dep->de_FileSize;
 	struct stat *st = &node->inode->st;
 	size_t nsize, offs;
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct buf *bp;
 	char *dat;
 	u_long cn = 0;
 
 	error = 0;	/* XXX: gcc/vax */
 	MSDOSFS_DPRINTF(("%s(diroff %lu, dirclust %lu, startcluster %lu)\n",
 	    __func__, dep->de_diroffset, dep->de_dirclust,
 	    dep->de_StartCluster));
 	if (st->st_size == 0)
 		return 0;
 
 	/* Don't bother to try to write files larger than the fs limit */
 	if (st->st_size > MSDOSFS_FILESIZE_MAX)
 		return EFBIG;
 
 	nsize = st->st_size;
 	MSDOSFS_DPRINTF(("%s(nsize=%zu, osize=%zu)\n", __func__, nsize, osize));
 	if (nsize > osize) {
 		if ((error = deextend(dep, nsize, NULL)) != 0)
 			return error;
 		if ((error = msdosfs_updatede(dep)) != 0)
 			return error;
 	}
 
 	if ((fd = open(path, O_RDONLY)) == -1) {
 		error = errno;
 		MSDOSFS_DPRINTF(("open %s: %s", path, strerror(error)));
 		return error;
 	}
 
 	if ((dat = mmap(0, nsize, PROT_READ, MAP_FILE | MAP_PRIVATE, fd, 0))
 	    == MAP_FAILED) {
 		error = errno;
 		MSDOSFS_DPRINTF(("%s: mmap %s: %s", __func__, node->name,
 		    strerror(error)));
 		close(fd);
 		goto out;
 	}
 	close(fd);
 
 	for (offs = 0; offs < nsize;) {
 		int blsize, cpsize;
 		daddr_t bn;
 		u_long on = offs & pmp->pm_crbomask;
 
 		if ((error = pcbmap(dep, cn++, &bn, NULL, &blsize)) != 0) {
 			MSDOSFS_DPRINTF(("%s: pcbmap %lu",
 			    __func__, (unsigned long)bn));
 			goto out;
 		}
 
 		MSDOSFS_DPRINTF(("%s(cn=%lu, bn=%llu, blsize=%d)\n",
 		    __func__, cn, (unsigned long long)bn, blsize));
 		if ((error = bread(pmp->pm_devvp, bn, blsize, 0, &bp)) != 0) {
 			MSDOSFS_DPRINTF(("bread %d\n", error));
 			goto out;
 		}
 		cpsize = MIN((nsize - offs), blsize - on);
 		memcpy(bp->b_data + on, dat + offs, cpsize);
 		bwrite(bp);
 		offs += cpsize;
 	}
 
 	munmap(dat, nsize);
 	return 0;
 out:
 	munmap(dat, nsize);
 	return error;
 }
 
 static const struct {
 	struct direntry dot;
 	struct direntry dotdot;
 } dosdirtemplate = {
 	{	".          ",				/* the . entry */
 		ATTR_DIRECTORY,				/* file attribute */
 		0,					/* reserved */
 		0, { 0, 0 }, { 0, 0 },			/* create time & date */
 		{ 0, 0 },				/* access date */
 		{ 0, 0 },				/* high bits of start cluster */
 		{ 210, 4 }, { 210, 4 },			/* modify time & date */
 		{ 0, 0 },				/* startcluster */
 		{ 0, 0, 0, 0 }				/* filesize */
 	},
 	{	"..         ",				/* the .. entry */
 		ATTR_DIRECTORY,				/* file attribute */
 		0,					/* reserved */
 		0, { 0, 0 }, { 0, 0 },			/* create time & date */
 		{ 0, 0 },				/* access date */
 		{ 0, 0 },				/* high bits of start cluster */
 		{ 210, 4 }, { 210, 4 },			/* modify time & date */
 		{ 0, 0 },				/* startcluster */
 		{ 0, 0, 0, 0 }				/* filesize */
 	}
 };
 
 struct denode *
 msdosfs_mkdire(const char *path, struct denode *pdep, fsnode *node) {
 	struct denode ndirent;
 	struct denode *dep;
 	struct componentname cn;
 	struct msdosfsmount *pmp = pdep->de_pmp;
 	int error;
 	u_long newcluster, pcl, bn;
 	struct direntry *denp;
 	struct buf *bp;
 
 	cn.cn_nameptr = node->name;
 	cn.cn_namelen = strlen(node->name);
 	/*
 	 * If this is the root directory and there is no space left we
 	 * can't do anything.  This is because the root directory can not
 	 * change size.
 	 */
 	if (pdep->de_StartCluster == MSDOSFSROOT
 	    && pdep->de_fndoffset >= pdep->de_FileSize) {
 		error = ENOSPC;
 		goto bad2;
 	}
 
 	/*
 	 * Allocate a cluster to hold the about to be created directory.
 	 */
 	error = clusteralloc(pmp, 0, 1, CLUST_EOFE, &newcluster, NULL);
 	if (error)
 		goto bad2;
 
 	memset(&ndirent, 0, sizeof(ndirent));
 	ndirent.de_pmp = pmp;
 	ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
 	msdosfs_times(&ndirent, &node->inode->st);
 
 	/*
 	 * Now fill the cluster with the "." and ".." entries. And write
 	 * the cluster to disk.	 This way it is there for the parent
 	 * directory to be pointing at if there were a crash.
 	 */
 	bn = cntobn(pmp, newcluster);
 	MSDOSFS_DPRINTF(("%s(newcluster %lu, bn=%lu)\n",
 	    __func__, newcluster, bn));
 	/* always succeeds */
 	bp = getblk(pmp->pm_devvp, bn, pmp->pm_bpcluster, 0, 0, 0);
 	memset(bp->b_data, 0, pmp->pm_bpcluster);
 	memcpy(bp->b_data, &dosdirtemplate, sizeof dosdirtemplate);
 	denp = (struct direntry *)bp->b_data;
 	putushort(denp[0].deStartCluster, newcluster);
 	putushort(denp[0].deCDate, ndirent.de_CDate);
 	putushort(denp[0].deCTime, ndirent.de_CTime);
 	denp[0].deCHundredth = ndirent.de_CHun;
 	putushort(denp[0].deADate, ndirent.de_ADate);
 	putushort(denp[0].deMDate, ndirent.de_MDate);
 	putushort(denp[0].deMTime, ndirent.de_MTime);
 	pcl = pdep->de_StartCluster;
 	MSDOSFS_DPRINTF(("%s(pcl %lu, rootdirblk=%lu)\n", __func__, pcl,
 	    pmp->pm_rootdirblk));
 	if (FAT32(pmp) && pcl == pmp->pm_rootdirblk)
 		pcl = 0;
 	putushort(denp[1].deStartCluster, pcl);
 	putushort(denp[1].deCDate, ndirent.de_CDate);
 	putushort(denp[1].deCTime, ndirent.de_CTime);
 	denp[1].deCHundredth = ndirent.de_CHun;
 	putushort(denp[1].deADate, ndirent.de_ADate);
 	putushort(denp[1].deMDate, ndirent.de_MDate);
 	putushort(denp[1].deMTime, ndirent.de_MTime);
 	if (FAT32(pmp)) {
 		putushort(denp[0].deHighClust, newcluster >> 16);
 		putushort(denp[1].deHighClust, pdep->de_StartCluster >> 16);
 	} else {
 		putushort(denp[0].deHighClust, 0);
 		putushort(denp[1].deHighClust, 0);
 	}
 
 	if ((error = bwrite(bp)) != 0)
 		goto bad;
 
 	/*
 	 * Now build up a directory entry pointing to the newly allocated
 	 * cluster.  This will be written to an empty slot in the parent
 	 * directory.
 	 */
 	if ((error = uniqdosname(pdep, &cn, ndirent.de_Name)) != 0)
 		goto bad;
 
 	ndirent.de_Attributes = ATTR_DIRECTORY;
 	ndirent.de_StartCluster = newcluster;
 	ndirent.de_FileSize = 0;
 	ndirent.de_pmp = pdep->de_pmp;
 	if ((error = msdosfs_findslot(pdep, &cn)) != 0)
 		goto bad;
 	if ((error = createde(&ndirent, pdep, &dep, &cn)) != 0)
 		goto bad;
 	if ((error = msdosfs_updatede(dep)) != 0)
 		goto bad;
 	return dep;
 
 bad:
 	clusterfree(pmp, newcluster, NULL);
 bad2:
 	errno = error;
 	return NULL;
 }
Index: projects/clang1000-import
===================================================================
--- projects/clang1000-import	(revision 357178)
+++ projects/clang1000-import	(revision 357179)

Property changes on: projects/clang1000-import
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r357119-357178