Index: projects/ifnet/etc/rc.d/pflog
===================================================================
--- projects/ifnet/etc/rc.d/pflog	(revision 281172)
+++ projects/ifnet/etc/rc.d/pflog	(revision 281173)
@@ -1,105 +1,105 @@
 #!/bin/sh
 #
 # $FreeBSD$
 #
 
 # PROVIDE: pflog
 # REQUIRE: FILESYSTEMS netif
 # KEYWORD: nojail
 
 . /etc/rc.subr
 
 name="pflog"
 rcvar="pflog_enable"
 command="/sbin/pflogd"
 pidfile="/var/run/pflogd.pid"
 start_precmd="pflog_prestart"
 stop_postcmd="pflog_poststop"
 extra_commands="reload resync"
 
 # for backward compatibility
 resync_cmd="pflog_resync"
 
 pflog_prestart()
 {
 	load_kld pflog || return 1
 
 	# create pflog_dev interface if needed
 	if ! ifconfig $pflog_dev > /dev/null 2>&1; then
 		if ! ifconfig $pflog_dev create; then
 			warn "could not create $pflog_dev."
 			return 1
 		fi
 	fi
 
 	# set pflog_dev interface to up state
 	if ! ifconfig $pflog_dev up; then
 		warn "could not bring up $pflog_dev."
 		return 1
 	fi
 
-	# -p flag requires striping pidfile's leading /var/run and trailing .pid
+	# -p flag requires stripping pidfile's leading /var/run and trailing .pid
 	pidfile=$(echo $pidfile | sed -e 's|/var/run/||' -e 's|.pid$||')
 
 	# prepare the command line for pflogd
 	rc_flags="-p $pidfile -f $pflog_logfile -i $pflog_dev $rc_flags"
 
 	# report we're ready to run pflogd
 	return 0
 }
 
 pflog_poststop()
 {
 	if ! ifconfig $pflog_dev down; then
 		warn "could not bring down $pflog_dev."
 		return 1
 	fi
 
 	if [ "$pflog_instances" ] && [ -n "$pflog_instances" ]; then
 		rm $pidfile
 	fi
 
 	return 0
 }
 
 # for backward compatibility
 pflog_resync()
 {
 	run_rc_command reload
 }
 
 load_rc_config $name
 
 # Check if spawning multiple pflogd and told what to spawn
 if [ -n "$2" ]; then
 	# Set required variables
 	eval pflog_dev=\$pflog_${2}_dev
 	eval pflog_logfile=\$pflog_${2}_logfile
 	eval pflog_flags=\$pflog_${2}_flags
 	# Check that required vars have non-zero length, warn if not.
 	if [ -z $pflog_dev ]; then
 		warn "pflog_dev not set"
 		continue
 	fi
 	if [ -z $pflog_logfile ]; then
 		warn "pflog_logfile not set"
 		continue
 	fi
 
 	# Provide a unique pidfile name for pflogd -p <pidfile> flag
 	pidfile="/var/run/pflogd.$2.pid"
 
 	# Override service name and execute command
 	name=$pflog_dev
 	run_rc_command "$1"
 # Check if spawning multiple pflogd and not told what to spawn
 elif [ "$pflog_instances" ] && [ -n "$pflog_instances" ]; then
 	# Interate through requested instances.
 	for i in $pflog_instances; do
 		/etc/rc.d/pflog $1 $i
 	done
 else
 	# Typical case, spawn single instance only.
 	pflog_dev=${pflog_dev:-"pflog0"}
 	run_rc_command "$1"
 fi
Index: projects/ifnet/etc
===================================================================
--- projects/ifnet/etc	(revision 281172)
+++ projects/ifnet/etc	(revision 281173)

Property changes on: projects/ifnet/etc
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/etc:r281144-281172
Index: projects/ifnet/lib/libc/sys/vfork.2
===================================================================
--- projects/ifnet/lib/libc/sys/vfork.2	(revision 281172)
+++ projects/ifnet/lib/libc/sys/vfork.2	(revision 281173)
@@ -1,129 +1,129 @@
 .\" Copyright (c) 1980, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 4. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"     @(#)vfork.2	8.1 (Berkeley) 6/4/93
 .\" $FreeBSD$
 .\"
-.Dd November 13, 2009
+.Dd April 6, 2015
 .Dt VFORK 2
 .Os
 .Sh NAME
 .Nm vfork
 .Nd create a new process without copying the address space
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
 .In unistd.h
 .Ft pid_t
 .Fn vfork void
 .Sh DESCRIPTION
 The
 .Fn vfork
 system call
 can be used to create new processes without fully copying the address
 space of the old process, which is horrendously inefficient in a paged
 environment.
 It is useful when the purpose of
 .Xr fork 2
 would have been to create a new system context for an
 .Xr execve 2 .
 The
 .Fn vfork
 system call
 differs from
 .Xr fork 2
 in that the child borrows the parent's memory and thread of
 control until a call to
 .Xr execve 2
 or an exit (either by a call to
 .Xr _exit 2
 or abnormally).
 The parent process is suspended while the child is using its resources.
 .Pp
 The
 .Fn vfork
 system call
 returns 0 in the child's context and (later) the pid of the child in
 the parent's context.
 .Pp
 The
 .Fn vfork
 system call
 can normally be used just like
 .Xr fork 2 .
 It does not work, however, to return while running in the child's context
 from the procedure that called
 .Fn vfork
 since the eventual return from
 .Fn vfork
 would then return to a no longer existent stack frame.
 Be careful, also, to call
 .Xr _exit 2
 rather than
 .Xr exit 3
 if you cannot
 .Xr execve 2 ,
 since
 .Xr exit 3
 will flush and close standard I/O channels, and thereby mess up the
 parent processes standard I/O data structures.
 (Even with
 .Xr fork 2
 it is wrong to call
 .Xr exit 3
 since buffered data would then be flushed twice.)
 .Sh RETURN VALUES
 Same as for
 .Xr fork 2 .
 .Sh SEE ALSO
 .Xr _exit 2 ,
 .Xr execve 2 ,
 .Xr fork 2 ,
 .Xr rfork 2 ,
 .Xr sigaction 2 ,
 .Xr wait 2 ,
 .Xr exit 3
 .Sh HISTORY
 The
 .Fn vfork
 system call appeared in
-.Bx 2.9 .
+.Bx 3 .
 .Sh BUGS
 To avoid a possible deadlock situation,
 processes that are children in the middle
 of a
 .Fn vfork
 are never sent
 .Dv SIGTTOU
 or
 .Dv SIGTTIN
 signals; rather,
 output or
 .Xr ioctl 2
 calls
 are allowed
 and input attempts result in an end-of-file indication.
Index: projects/ifnet/lib/libc
===================================================================
--- projects/ifnet/lib/libc	(revision 281172)
+++ projects/ifnet/lib/libc	(revision 281173)

Property changes on: projects/ifnet/lib/libc
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/lib/libc:r281144-281172
Index: projects/ifnet/sys/arm/include/metadata.h
===================================================================
--- projects/ifnet/sys/arm/include/metadata.h	(revision 281172)
+++ projects/ifnet/sys/arm/include/metadata.h	(revision 281173)
@@ -1,35 +1,42 @@
 /*-
  * Copyright (c) 2003 Peter Wemm <peter@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_METADATA_H_
 #define	_MACHINE_METADATA_H_
 
 #define	MODINFOMD_BOOTINFO	0x1001
 #define	MODINFOMD_DTBP		0x1002
+#define	MODINFOMD_EFI_MAP	0x1003
+
+struct efi_map_header {
+	uint64_t	memory_size;
+	uint64_t	descriptor_size;
+	uint32_t	descriptor_version;
+};
 
 #endif /* !_MACHINE_METADATA_H_ */
Index: projects/ifnet/sys/arm64/include/atomic.h
===================================================================
--- projects/ifnet/sys/arm64/include/atomic.h	(revision 281172)
+++ projects/ifnet/sys/arm64/include/atomic.h	(revision 281173)
@@ -1,574 +1,730 @@
 /*-
  * Copyright (c) 2013 Andrew Turner <andrew@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_ATOMIC_H_
 #define	_MACHINE_ATOMIC_H_
 
 #define	isb()  __asm __volatile("isb" : : : "memory")
 #define	dsb()  __asm __volatile("dsb sy" : : : "memory")
 #define	dmb()  __asm __volatile("dmb sy" : : : "memory")
 
 #define	mb()   dmb()
 #define	wmb()  dmb()
 #define	rmb()  dmb()
 
 static __inline void
 atomic_add_32(volatile uint32_t *p, uint32_t val)
 {
 	uint32_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldxr	%w0, [%2]      \n"
 	    "   add	%w0, %w0, %w3  \n"
 	    "   stxr	%w1, %w0, [%2] \n"
             "   cbnz	%w1, 1b        \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
 	);
 }
 
 static __inline void
 atomic_clear_32(volatile uint32_t *p, uint32_t val)
 {
 	uint32_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldxr	%w0, [%2]      \n"
 	    "   bic	%w0, %w0, %w3  \n"
 	    "   stxr	%w1, %w0, [%2] \n"
             "   cbnz	%w1, 1b        \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
 	);
 }
 
 static __inline int
 atomic_cmpset_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
 {
 	uint32_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: mov	%w1, #1        \n"
 	    "   ldxr	%w0, [%2]      \n"
 	    "   cmp	%w0, %w3       \n"
 	    "   b.ne	2f             \n"
 	    "   stxr	%w1, %w4, [%2] \n"
             "   cbnz	%w1, 1b        \n"
 	    "2:"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval)
 	    : : "cc"
 	);
 
 	return (!res);
 }
 
 static __inline uint32_t
 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
 {
 	uint32_t tmp, ret;
 	int res;
 
 	__asm __volatile(
 	    "1: ldxr	%w4, [%2]      \n"
 	    "   add	%w0, %w4, %w3  \n"
 	    "   stxr	%w1, %w0, [%2] \n"
             "   cbnz	%w1, 1b        \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val), "=&r"(ret) : : "cc"
 	);
 
 	return (ret);
 }
 
 static __inline uint32_t
 atomic_readandclear_32(volatile uint32_t *p)
 {
 	uint32_t tmp, ret;
 	int res;
 
 	__asm __volatile(
 	    "   mov	%w0, #0        \n"
 	    "1: ldxr	%w3, [%2]      \n"
 	    "   stxr	%w1, %w0, [%2] \n"
             "   cbnz	%w1, 1b        \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "=&r"(ret) : : "cc"
 	);
 
 	return (ret);
 }
 
 static __inline void
 atomic_set_32(volatile uint32_t *p, uint32_t val)
 {
 	uint32_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldxr	%w0, [%2]      \n"
 	    "   orr	%w0, %w0, %w3  \n"
 	    "   stxr	%w1, %w0, [%2] \n"
             "   cbnz	%w1, 1b        \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
 	);
 }
 
 static __inline void
 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
 {
 	uint32_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldxr	%w0, [%2]      \n"
 	    "   sub	%w0, %w0, %w3  \n"
 	    "   stxr	%w1, %w0, [%2] \n"
             "   cbnz	%w1, 1b        \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
 	);
 }
 
 #define	atomic_add_int		atomic_add_32
 #define	atomic_clear_int	atomic_clear_32
 #define	atomic_cmpset_int	atomic_cmpset_32
 #define	atomic_fetchadd_int	atomic_fetchadd_32
 #define	atomic_readandclear_int	atomic_readandclear_32
 #define	atomic_set_int		atomic_set_32
 #define	atomic_subtract_int	atomic_subtract_32
 
-
 static __inline void
 atomic_add_acq_32(volatile uint32_t *p, uint32_t val)
 {
 	uint32_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldaxr	%w0, [%2]      \n"
 	    "   add	%w0, %w0, %w3  \n"
-	    "   stlxr	%w1, %w0, [%2] \n"
+	    "   stxr	%w1, %w0, [%2] \n"
             "   cbnz	%w1, 1b        \n"
 	    "2:"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
 	);
 }
 
 static __inline void
 atomic_clear_acq_32(volatile uint32_t *p, uint32_t val)
 {
 	uint32_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldaxr	%w0, [%2]      \n"
 	    "   bic	%w0, %w0, %w3  \n"
-	    "   stlxr	%w1, %w0, [%2] \n"
+	    "   stxr	%w1, %w0, [%2] \n"
             "   cbnz	%w1, 1b        \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
 	);
 }
 
 static __inline int
 atomic_cmpset_acq_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
 {
 	uint32_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: mov	%w1, #1        \n"
 	    "   ldaxr	%w0, [%2]      \n"
 	    "   cmp	%w0, %w3       \n"
 	    "   b.ne	2f             \n"
-	    "   stlxr	%w1, %w4, [%2] \n"
+	    "   stxr	%w1, %w4, [%2] \n"
             "   cbnz	%w1, 1b        \n"
 	    "2:"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval)
 	    : : "cc", "memory"
 	);
 
 	return (!res);
 }
 
 static __inline uint32_t
 atomic_load_acq_32(volatile uint32_t *p)
 {
 	uint32_t ret;
 
-	ret = *p;
-	dmb();
+	__asm __volatile(
+	    "ldar	%w0, [%1] \n"
+	    : "=&r" (ret) : "r" (p) : "memory");
 
 	return (ret);
 }
 
 static __inline void
 atomic_set_acq_32(volatile uint32_t *p, uint32_t val)
 {
 	uint32_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldaxr	%w0, [%2]      \n"
 	    "   orr	%w0, %w0, %w3  \n"
-	    "   stlxr	%w1, %w0, [%2] \n"
+	    "   stxr	%w1, %w0, [%2] \n"
             "   cbnz	%w1, 1b        \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
 	);
 }
 
 static __inline void
 atomic_subtract_acq_32(volatile uint32_t *p, uint32_t val)
 {
 	uint32_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldaxr	%w0, [%2]      \n"
 	    "   sub	%w0, %w0, %w3  \n"
+	    "   stxr	%w1, %w0, [%2] \n"
+            "   cbnz	%w1, 1b        \n"
+	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
+	);
+}
+
+#define	atomic_add_acq_int	atomic_add_acq_32
+#define	atomic_clear_acq_int	atomic_clear_acq_32
+#define	atomic_cmpset_acq_int	atomic_cmpset_acq_32
+#define	atomic_load_acq_int	atomic_load_acq_32
+#define	atomic_set_acq_int	atomic_set_acq_32
+#define	atomic_subtract_acq_int	atomic_subtract_acq_32
+
+/* The atomic functions currently are both acq and rel, we should fix this. */
+
+static __inline void
+atomic_add_rel_32(volatile uint32_t *p, uint32_t val)
+{
+	uint32_t tmp;
+	int res;
+
+	__asm __volatile(
+	    "1: ldxr	%w0, [%2]      \n"
+	    "   add	%w0, %w0, %w3  \n"
 	    "   stlxr	%w1, %w0, [%2] \n"
             "   cbnz	%w1, 1b        \n"
+	    "2:"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
 	);
 }
 
 static __inline void
+atomic_clear_rel_32(volatile uint32_t *p, uint32_t val)
+{
+	uint32_t tmp;
+	int res;
+
+	__asm __volatile(
+	    "1: ldxr	%w0, [%2]      \n"
+	    "   bic	%w0, %w0, %w3  \n"
+	    "   stlxr	%w1, %w0, [%2] \n"
+            "   cbnz	%w1, 1b        \n"
+	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
+	);
+}
+
+static __inline int
+atomic_cmpset_rel_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
+{
+	uint32_t tmp;
+	int res;
+
+	__asm __volatile(
+	    "1: mov	%w1, #1        \n"
+	    "   ldxr	%w0, [%2]      \n"
+	    "   cmp	%w0, %w3       \n"
+	    "   b.ne	2f             \n"
+	    "   stlxr	%w1, %w4, [%2] \n"
+            "   cbnz	%w1, 1b        \n"
+	    "2:"
+	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval)
+	    : : "cc", "memory"
+	);
+
+	return (!res);
+}
+
+static __inline void
+atomic_set_rel_32(volatile uint32_t *p, uint32_t val)
+{
+	uint32_t tmp;
+	int res;
+
+	__asm __volatile(
+	    "1: ldxr	%w0, [%2]      \n"
+	    "   orr	%w0, %w0, %w3  \n"
+	    "   stlxr	%w1, %w0, [%2] \n"
+            "   cbnz	%w1, 1b        \n"
+	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
+	);
+}
+
+static __inline void
 atomic_store_rel_32(volatile uint32_t *p, uint32_t val)
 {
 
-	dmb();
-	*p = val;
+	__asm __volatile(
+	    "stlr	%w0, [%1] \n"
+	    : : "r" (val), "r" (p) : "memory");
 }
 
-#define	atomic_add_acq_int	atomic_add_acq_32
-#define	atomic_clear_acq_int	atomic_add_acq_32
-#define	atomic_cmpset_acq_int	atomic_cmpset_acq_32
-#define	atomic_load_acq_int	atomic_load_acq_32
-#define	atomic_set_acq_int	atomic_set_acq_32
-#define	atomic_subtract_acq_int	atomic_subtract_acq_32
+static __inline void
+atomic_subtract_rel_32(volatile uint32_t *p, uint32_t val)
+{
+	uint32_t tmp;
+	int res;
 
-/* The atomic functions currently are both acq and rel, we should fix this. */
-#define	atomic_add_rel_32	atomic_add_acq_32
-#define	atomic_clear_rel_32	atomic_add_acq_32
-#define	atomic_cmpset_rel_32	atomic_cmpset_acq_32
-#define	atomic_set_rel_32	atomic_set_acq_32
-#define	atomic_subtract_rel_32	atomic_subtract_acq_32
+	__asm __volatile(
+	    "1: ldxr	%w0, [%2]      \n"
+	    "   sub	%w0, %w0, %w3  \n"
+	    "   stlxr	%w1, %w0, [%2] \n"
+            "   cbnz	%w1, 1b        \n"
+	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
+	);
+}
 
 #define	atomic_add_rel_int	atomic_add_rel_32
 #define	atomic_clear_rel_int	atomic_add_rel_32
 #define	atomic_cmpset_rel_int	atomic_cmpset_rel_32
 #define	atomic_set_rel_int	atomic_set_rel_32
 #define	atomic_subtract_rel_int	atomic_subtract_rel_32
 #define	atomic_store_rel_int	atomic_store_rel_32
 
 
 static __inline void
 atomic_add_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldxr	%0, [%2]      \n"
 	    "   add	%0, %0, %3    \n"
 	    "   stxr	%w1, %0, [%2] \n"
             "   cbnz	%w1, 1b       \n"
 	    : "=&r" (tmp), "=&r" (res), "+r" (p), "+r" (val) : : "cc"
 	);
 }
 
 static __inline void
 atomic_clear_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldxr	%0, [%2]      \n"
 	    "   bic	%0, %0, %3    \n"
 	    "   stxr	%w1, %0, [%2] \n"
             "   cbnz	%w1, 1b       \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
 	);
 }
 
 static __inline int
 atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
 {
 	uint64_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: mov	%w1, #1       \n"
 	    "   ldxr	%0, [%2]      \n"
 	    "   cmp	%0, %3        \n"
 	    "   b.ne	2f            \n"
 	    "   stxr	%w1, %4, [%2] \n"
             "   cbnz	%w1, 1b       \n"
 	    "2:"
 	    : "=&r" (tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval)
 	    : : "cc", "memory"
 	);
 
 	return (!res);
 }
 
 static __inline uint64_t
 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t tmp, ret;
 	int res;
 
 	__asm __volatile(
 	    "1: ldxr	%4, [%2]      \n"
 	    "   add	%0, %4, %3    \n"
 	    "   stxr	%w1, %0, [%2] \n"
             "   cbnz	%w1, 1b       \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val), "=&r"(ret) : : "cc"
 	);
 
 	return (ret);
 }
 
 static __inline uint64_t
 atomic_readandclear_64(volatile uint64_t *p)
 {
 	uint64_t tmp, ret;
 	int res;
 
 	__asm __volatile(
 	    "   mov	%0, #0        \n"
 	    "1: ldxr	%3, [%2]      \n"
 	    "   stxr	%w1, %0, [%2] \n"
             "   cbnz	%w1, 1b       \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "=&r"(ret) : : "cc"
 	);
 
 	return (ret);
 }
 
 static __inline void
 atomic_set_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldxr	%0, [%2]      \n"
 	    "   orr	%0, %0, %3    \n"
 	    "   stxr	%w1, %0, [%2] \n"
             "   cbnz	%w1, 1b       \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
 	);
 }
 
 static __inline void
 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldxr	%0, [%2]      \n"
 	    "   sub	%0, %0, %3    \n"
 	    "   stxr	%w1, %0, [%2] \n"
             "   cbnz	%w1, 1b       \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
 	);
 }
 
 static __inline uint64_t
 atomic_swap_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t old;
 	int res;
 
 	__asm __volatile(
 	    "1: ldxr	%0, [%2]      \n"
 	    "   stxr	%w1, %3, [%2] \n"
             "   cbnz	%w1, 1b       \n"
 	    : "=&r"(old), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
 	);
 
 	return (old);
 }
 
 #define	atomic_add_long			atomic_add_64
 #define	atomic_clear_long		atomic_clear_64
 #define	atomic_cmpset_long		atomic_cmpset_64
 #define	atomic_fetchadd_long		atomic_fetchadd_64
 #define	atomic_readandclear_long	atomic_readandclear_64
 #define	atomic_set_long			atomic_set_64
 #define	atomic_subtract_long		atomic_subtract_64
 
 #define	atomic_add_ptr			atomic_add_64
 #define	atomic_clear_ptr		atomic_clear_64
 #define	atomic_cmpset_ptr		atomic_cmpset_64
 #define	atomic_fetchadd_ptr		atomic_fetchadd_64
 #define	atomic_readandclear_ptr		atomic_readandclear_64
 #define	atomic_set_ptr			atomic_set_64
 #define	atomic_subtract_ptr		atomic_subtract_64
 
 static __inline void
 atomic_add_acq_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldaxr	%0, [%2]      \n"
 	    "   add	%0, %0, %3    \n"
-	    "   stlxr	%w1, %0, [%2] \n"
+	    "   stxr	%w1, %0, [%2] \n"
             "   cbnz	%w1, 1b       \n"
 	    "2:"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
 	);
 }
 
 static __inline void
 atomic_clear_acq_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldaxr	%0, [%2]      \n"
 	    "   bic	%0, %0, %3    \n"
-	    "   stlxr	%w1, %0, [%2] \n"
+	    "   stxr	%w1, %0, [%2] \n"
             "   cbnz	%w1, 1b       \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
 	);
 }
 
 static __inline int
 atomic_cmpset_acq_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
 {
 	uint64_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: mov	%w1, #1       \n"
 	    "   ldaxr	%0, [%2]      \n"
 	    "   cmp	%0, %3        \n"
 	    "   b.ne	2f            \n"
-	    "   stlxr	%w1, %4, [%2] \n"
+	    "   stxr	%w1, %4, [%2] \n"
             "   cbnz	%w1, 1b       \n"
 	    "2:"
 	    : "=&r" (tmp), "=&r" (res), "+r" (p), "+r" (cmpval), "+r" (newval)
 	    : : "cc", "memory"
 	);
 
 	return (!res);
 }
 
 static __inline uint64_t
 atomic_load_acq_64(volatile uint64_t *p)
 {
 	uint64_t ret;
 
-	ret = *p;
-	dmb();
+	__asm __volatile(
+	    "ldar	%0, [%1] \n"
+	    : "=&r" (ret) : "r" (p) : "memory");
 
 	return (ret);
 }
 
 static __inline void
 atomic_set_acq_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldaxr	%0, [%2]      \n"
 	    "   orr	%0, %0, %3    \n"
-	    "   stlxr	%w1, %0, [%2] \n"
+	    "   stxr	%w1, %0, [%2] \n"
             "   cbnz	%w1, 1b       \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
 	);
 }
 
 static __inline void
 atomic_subtract_acq_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t tmp;
 	int res;
 
 	__asm __volatile(
 	    "1: ldaxr	%0, [%2]      \n"
 	    "   sub	%0, %0, %3    \n"
-	    "   stlxr	%w1, %0, [%2] \n"
+	    "   stxr	%w1, %0, [%2] \n"
             "   cbnz	%w1, 1b       \n"
 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
 	);
 }
 
-static __inline void
-atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
-{
-
-	dmb();
-	*p = val;
-}
-
 #define	atomic_add_acq_long		atomic_add_acq_64
 #define	atomic_clear_acq_long		atomic_add_acq_64
 #define	atomic_cmpset_acq_long		atomic_cmpset_acq_64
 #define	atomic_load_acq_long		atomic_load_acq_64
 #define	atomic_set_acq_long		atomic_set_acq_64
 #define	atomic_subtract_acq_long	atomic_subtract_acq_64
 
 #define	atomic_add_acq_ptr		atomic_add_acq_64
 #define	atomic_clear_acq_ptr		atomic_add_acq_64
 #define	atomic_cmpset_acq_ptr		atomic_cmpset_acq_64
 #define	atomic_load_acq_ptr		atomic_load_acq_64
 #define	atomic_set_acq_ptr		atomic_set_acq_64
 #define	atomic_subtract_acq_ptr		atomic_subtract_acq_64
 
 /*
  * TODO: The atomic functions currently are both acq and rel, we should fix
  * this.
  */
-#define	atomic_add_rel_64		atomic_add_acq_64
-#define	atomic_clear_rel_64		atomic_add_acq_64
-#define	atomic_cmpset_rel_64		atomic_cmpset_acq_64
-#define	atomic_set_rel_64		atomic_set_acq_64
-#define	atomic_subtract_rel_64		atomic_subtract_acq_64
+static __inline void
+atomic_add_rel_64(volatile uint64_t *p, uint64_t val)
+{
+	uint64_t tmp;
+	int res;
 
+	__asm __volatile(
+	    "1: ldxr	%0, [%2]      \n"
+	    "   add	%0, %0, %3    \n"
+	    "   stlxr	%w1, %0, [%2] \n"
+            "   cbnz	%w1, 1b       \n"
+	    "2:"
+	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
+	);
+}
+
+static __inline void
+atomic_clear_rel_64(volatile uint64_t *p, uint64_t val)
+{
+	uint64_t tmp;
+	int res;
+
+	__asm __volatile(
+	    "1: ldxr	%0, [%2]      \n"
+	    "   bic	%0, %0, %3    \n"
+	    "   stlxr	%w1, %0, [%2] \n"
+            "   cbnz	%w1, 1b       \n"
+	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
+	);
+}
+
+static __inline int
+atomic_cmpset_rel_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
+{
+	uint64_t tmp;
+	int res;
+
+	__asm __volatile(
+	    "1: mov	%w1, #1       \n"
+	    "   ldxr	%0, [%2]      \n"
+	    "   cmp	%0, %3        \n"
+	    "   b.ne	2f            \n"
+	    "   stlxr	%w1, %4, [%2] \n"
+            "   cbnz	%w1, 1b       \n"
+	    "2:"
+	    : "=&r" (tmp), "=&r" (res), "+r" (p), "+r" (cmpval), "+r" (newval)
+	    : : "cc", "memory"
+	);
+
+	return (!res);
+}
+
+static __inline void
+atomic_set_rel_64(volatile uint64_t *p, uint64_t val)
+{
+	uint64_t tmp;
+	int res;
+
+	__asm __volatile(
+	    "1: ldxr	%0, [%2]      \n"
+	    "   orr	%0, %0, %3    \n"
+	    "   stlxr	%w1, %0, [%2] \n"
+            "   cbnz	%w1, 1b       \n"
+	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
+	);
+}
+
+static __inline void
+atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
+{
+
+	__asm __volatile(
+	    "stlr	%0, [%1] \n"
+	    : : "r" (val), "r" (p) : "memory");
+}
+
+static __inline void
+atomic_subtract_rel_64(volatile uint64_t *p, uint64_t val)
+{
+	uint64_t tmp;
+	int res;
+
+	__asm __volatile(
+	    "1: ldxr	%0, [%2]      \n"
+	    "   sub	%0, %0, %3    \n"
+	    "   stlxr	%w1, %0, [%2] \n"
+            "   cbnz	%w1, 1b       \n"
+	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
+	);
+}
+
 #define	atomic_add_rel_long		atomic_add_rel_64
-#define	atomic_clear_rel_long		atomic_add_rel_64
+#define	atomic_clear_rel_long		atomic_clear_rel_64
 #define	atomic_cmpset_rel_long		atomic_cmpset_rel_64
 #define	atomic_set_rel_long		atomic_set_rel_64
 #define	atomic_subtract_rel_long	atomic_subtract_rel_64
 #define	atomic_store_rel_long		atomic_store_rel_64
 
 #define	atomic_add_rel_ptr		atomic_add_rel_64
-#define	atomic_clear_rel_ptr		atomic_add_rel_64
+#define	atomic_clear_rel_ptr		atomic_clear_rel_64
 #define	atomic_cmpset_rel_ptr		atomic_cmpset_rel_64
 #define	atomic_set_rel_ptr		atomic_set_rel_64
 #define	atomic_subtract_rel_ptr		atomic_subtract_rel_64
 #define	atomic_store_rel_ptr		atomic_store_rel_64
 
 #endif /* _MACHINE_ATOMIC_H_ */
 
Index: projects/ifnet/sys/boot/efi/Makefile
===================================================================
--- projects/ifnet/sys/boot/efi/Makefile	(revision 281172)
+++ projects/ifnet/sys/boot/efi/Makefile	(revision 281173)
@@ -1,17 +1,17 @@
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 SUBDIR=		libefi
 
 .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "arm"
 .if ${MK_FDT} != "no"
 SUBDIR+=	fdt
 .endif
 .endif
 
-.if ${MACHINE_CPUARCH} == "amd64"
+.if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "arm"
 SUBDIR+=	loader boot1
 .endif
 
 .include <bsd.subdir.mk>
Index: projects/ifnet/sys/boot/efi/boot1/fat.tmpl.bz2.uu
===================================================================
--- projects/ifnet/sys/boot/efi/boot1/fat.tmpl.bz2.uu	(revision 281172)
+++ projects/ifnet/sys/boot/efi/boot1/fat.tmpl.bz2.uu	(nonexistent)
@@ -1,20 +0,0 @@
-FAT template boot filesystem created by generate-fat.sh
-DO NOT EDIT
-$FreeBSD$
-begin 644 fat.tmpl.bz2
-M0EIH.3%!629362AK*D(`&I+____[ZZKJZ_^N_ZO^Z_Z_OJ[L`4`!7I0$#&$"
-M0$!$3&(<P`(;J*C:0E0E#30&AH`T````9#0```9````#)ZF0:,-3U/409,`)
-M@`"8`C3",````$R:8F@P`C`````"24U,D>I-DTU,)ZAZ0VA-!M0T'J`>H#"9
-M'I#0-H&HQI&0&3&FH>H>*`JHHU3V]1%/4/2``T#0`!H``#0`````#1H,@``6
-M'1&G'&@?$6[T#A)?X8$A160"20BO#")0J4TB1*4GXF$B4I,&>43+=_?K=#3*
-M6]<E0HE`UBF?(J%8BRF#?8OQ2'D)`)(EL2;F4.'R>R"ZNKJZI,9*68E8*E2Q
-M4J5*E3'(1830A"$(12A-"<(0A#]VD)H0A"$,>I0FA"$(0I\>P^=F5:M6K5JU
-M:DI3:64UN;[7%5B]Y-^\]@_K@B:N\/,5F%&H<\G#IXQXAEFC&D?![6%0'6MR
-MX1@@%FC"FD`M7,/SXFNG:2`'-0<-C$8^+$N.7M1B,^6)9,DV9,0A\OL<:C"L
-ML1V&,<\9YRB>XV#BG")'6NKRK^("UF2XO?_L!#29">MGDF$R3).!PX&%E,4C
-M''=(FL1.`_3?CN@-IB2PI3!FF\<8X.X@D,>CA90I)#M$XRPNDFJELL<3=1?8
-M2B7\5Z64,!7Z;EEBW-MXN-4IJ@W$462]-*\YCR,-B,5[W?=3&L/U>SX,WV#\
-M\B`:I"'0Z)5"$1B.E)(K[5I4RS`%R$>Y\D0NR*,;<9CZ:^V3P(I?D<D#!UC)
-D^M-HEE3SAN-8O0FQ$(`$(DF`?ZQ]'U2F_XNY(IPH2!0UE2$`
-`
-end

Property changes on: projects/ifnet/sys/boot/efi/boot1/fat.tmpl.bz2.uu
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/ifnet/sys/boot/efi/boot1/Makefile
===================================================================
--- projects/ifnet/sys/boot/efi/boot1/Makefile	(revision 281172)
+++ projects/ifnet/sys/boot/efi/boot1/Makefile	(revision 281173)
@@ -1,90 +1,100 @@
 # $FreeBSD$
 
 MAN=
 
 .include <bsd.own.mk>
 
 # In-tree GCC does not support __attribute__((ms_abi)).
 .if ${COMPILER_TYPE} != "gcc"
 
 MK_SSP=		no
 
 PROG=		loader.sym
 INTERNALPROG=
 
 # architecture-specific loader code
 SRCS=	boot1.c reloc.c start.S
 
 CFLAGS+=	-fPIC
 CFLAGS+=	-I.
 CFLAGS+=	-I${.CURDIR}/../include
 CFLAGS+=	-I${.CURDIR}/../include/${MACHINE_CPUARCH}
 CFLAGS+=	-I${.CURDIR}/../../../contrib/dev/acpica/include
 CFLAGS+=	-I${.CURDIR}/../../..
 
 # Always add MI sources and REGULAR efi loader bits
 .PATH:		${.CURDIR}/../loader/arch/${MACHINE_CPUARCH} ${.CURDIR}/../../common
 CFLAGS+=	-I${.CURDIR}/../../common
 
 FILES=	boot1.efi boot1.efifat
 FILESMODE_boot1.efi=	${BINMODE}
 
 LDSCRIPT=	${.CURDIR}/../loader/arch/${MACHINE_CPUARCH}/ldscript.${MACHINE_CPUARCH}
 LDFLAGS=	-Wl,-T${LDSCRIPT} -Wl,-Bsymbolic -shared
 
 .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
 LDFLAGS+=	-Wl,-znocombreloc
 .endif
 
+.if ${MACHINE_CPUARCH} == "arm"
+#
+# Add libstand for the __aeabi_* functions used by the compiler
+#
+DPADD+=		${LIBSTAND}
+LDADD+=		-lstand
+.endif
+
 ${PROG}:	${LDSCRIPT}
 
 OBJCOPY?=	objcopy
 OBJDUMP?=	objdump
 
 .if ${MACHINE_CPUARCH} == "amd64"
 EFI_TARGET=	efi-app-x86_64
 .elif ${MACHINE_CPUARCH} == "i386"
 EFI_TARGET=	efi-app-ia32
+.else
+EFI_TARGET=	binary
 .endif
 
 boot1.efi: loader.sym
 	if [ `${OBJDUMP} -t ${.ALLSRC} | fgrep '*UND*' | wc -l` != 0 ]; then \
 		${OBJDUMP} -t ${.ALLSRC} | fgrep '*UND*'; \
 		exit 1; \
 	fi
-	${OBJCOPY} -j .text -j .sdata -j .data \
+	${OBJCOPY} -j .peheader -j .text -j .sdata -j .data \
 		-j .dynamic -j .dynsym -j .rel.dyn \
 		-j .rela.dyn -j .reloc -j .eh_frame -j set_Xcommand_set \
 		--output-target=${EFI_TARGET} ${.ALLSRC} ${.TARGET}
 
 boot1.o: ${.CURDIR}/../../common/ufsread.c
 
 # The following inserts out objects into a template FAT file system
 # created by generate-fat.sh
 
 .include "${.CURDIR}/Makefile.fat"
 
 boot1.efifat: boot1.efi
 	echo ${.OBJDIR}
-	uudecode ${.CURDIR}/fat.tmpl.bz2.uu
-	mv fat.tmpl.bz2 ${.TARGET}.bz2
+	uudecode ${.CURDIR}/fat-${MACHINE_CPUARCH}.tmpl.bz2.uu
+	mv fat-${MACHINE_CPUARCH}.tmpl.bz2 ${.TARGET}.bz2
 	bzip2 -f -d ${.TARGET}.bz2
 	dd if=boot1.efi of=${.TARGET} seek=${BOOT1_OFFSET} conv=notrunc
 
 CLEANFILES= boot1.efi boot1.efifat
 
 .endif # ${COMPILER_TYPE} != "gcc"
 
 .include <bsd.prog.mk>
 
 .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
 beforedepend ${OBJS}: machine x86
 
 CLEANFILES+=   machine x86
 
 machine:
 	ln -sf ${.CURDIR}/../../../amd64/include machine
 
 x86:
 	ln -sf ${.CURDIR}/../../../x86/include x86
 .endif
Index: projects/ifnet/sys/boot/efi/boot1/boot1.c
===================================================================
--- projects/ifnet/sys/boot/efi/boot1/boot1.c	(revision 281172)
+++ projects/ifnet/sys/boot/efi/boot1/boot1.c	(revision 281173)
@@ -1,573 +1,573 @@
 /*-
  * Copyright (c) 1998 Robert Nordier
  * All rights reserved.
  * Copyright (c) 2001 Robert Drehmel
  * All rights reserved.
  * Copyright (c) 2014 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms are freely
  * permitted provided that the above copyright notice and this
  * paragraph and the following disclaimer are duplicated in all
  * such forms.
  *
  * This software is provided "AS IS" and without any express or
  * implied warranties, including, without limitation, the implied
  * warranties of merchantability and fitness for a particular
  * purpose.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/dirent.h>
 #include <machine/elf.h>
 #include <machine/stdarg.h>
 
 #include <efi.h>
 #include <eficonsctl.h>
 
 #define _PATH_LOADER	"/boot/loader.efi"
 #define _PATH_KERNEL	"/boot/kernel/kernel"
 
 #define BSIZEMAX	16384
 
 typedef int putc_func_t(char c, void *arg);
 
 struct sp_data {
 	char	*sp_buf;
 	u_int	sp_len;
 	u_int	sp_size;
 };
 
 static const char digits[] = "0123456789abcdef";
 
 static void panic(const char *fmt, ...) __dead2;
 static int printf(const char *fmt, ...);
 static int putchar(char c, void *arg);
 static int vprintf(const char *fmt, va_list ap);
 static int vsnprintf(char *str, size_t sz, const char *fmt, va_list ap);
 
 static int __printf(const char *fmt, putc_func_t *putc, void *arg, va_list ap);
 static int __putc(char c, void *arg);
 static int __puts(const char *s, putc_func_t *putc, void *arg);
 static int __sputc(char c, void *arg);
 static char *__uitoa(char *buf, u_int val, int base);
 static char *__ultoa(char *buf, u_long val, int base);
 
 static int domount(EFI_DEVICE_PATH *device, EFI_BLOCK_IO *blkio, int quiet);
 static void load(const char *fname);
 
-EFI_SYSTEM_TABLE *systab;
-EFI_HANDLE *image;
+static EFI_SYSTEM_TABLE *systab;
+static EFI_HANDLE *image;
 
 static void
 bcopy(const void *src, void *dst, size_t len)
 {
 	const char *s = src;
 	char *d = dst;
 
 	while (len-- != 0)
 		*d++ = *s++;
 }
 
 static void
 memcpy(void *dst, const void *src, size_t len)
 {
 	bcopy(src, dst, len);
 }
 
 static void
 bzero(void *b, size_t len)
 {
 	char *p = b;
 
 	while (len-- != 0)
 		*p++ = 0;
 }
 
 static int
 strcmp(const char *s1, const char *s2)
 {
 	for (; *s1 == *s2 && *s1; s1++, s2++)
 		;
 	return ((u_char)*s1 - (u_char)*s2);
 }
 
 static EFI_GUID BlockIoProtocolGUID = BLOCK_IO_PROTOCOL;
 static EFI_GUID DevicePathGUID = DEVICE_PATH_PROTOCOL;
 static EFI_GUID LoadedImageGUID = LOADED_IMAGE_PROTOCOL;
 static EFI_GUID ConsoleControlGUID = EFI_CONSOLE_CONTROL_PROTOCOL_GUID;
 
 static EFI_BLOCK_IO *bootdev;
 static EFI_DEVICE_PATH *bootdevpath;
 static EFI_HANDLE *bootdevhandle;
 
 EFI_STATUS efi_main(EFI_HANDLE Ximage, EFI_SYSTEM_TABLE* Xsystab)
 {
 	EFI_HANDLE handles[128];
 	EFI_BLOCK_IO *blkio;
 	UINTN i, nparts = sizeof(handles), cols, rows, max_dim, best_mode;
 	EFI_STATUS status;
 	EFI_DEVICE_PATH *devpath;
 	EFI_BOOT_SERVICES *BS;
 	EFI_CONSOLE_CONTROL_PROTOCOL *ConsoleControl = NULL;
 	SIMPLE_TEXT_OUTPUT_INTERFACE *conout = NULL;
 	char *path = _PATH_LOADER;
 
 	systab = Xsystab;
 	image = Ximage;
 
 	BS = systab->BootServices;
 	status = BS->LocateProtocol(&ConsoleControlGUID, NULL,
 	    (VOID **)&ConsoleControl);
 	if (status == EFI_SUCCESS)
 		(void)ConsoleControl->SetMode(ConsoleControl,
 		    EfiConsoleControlScreenText);
 	/*
 	 * Reset the console and find the best text mode.
 	 */
 	conout = systab->ConOut;
 	conout->Reset(conout, TRUE);
 	max_dim = best_mode = 0;
 	for (i = 0; ; i++) {
 		status = conout->QueryMode(conout, i,
 		    &cols, &rows);
 		if (EFI_ERROR(status))
 			break;
 		if (cols * rows > max_dim) {
 			max_dim = cols * rows;
 			best_mode = i;
 		}
 	}
 	if (max_dim > 0)
 		conout->SetMode(conout, best_mode);
 	conout->EnableCursor(conout, TRUE);
 	conout->ClearScreen(conout);
 
 	printf("\n"
 	       ">> FreeBSD EFI boot block\n");
 	printf("   Loader path: %s\n", path);
 
 	status = systab->BootServices->LocateHandle(ByProtocol,
 	    &BlockIoProtocolGUID, NULL, &nparts, handles);
 	nparts /= sizeof(handles[0]);
 
 	for (i = 0; i < nparts; i++) {
 		status = systab->BootServices->HandleProtocol(handles[i],
 		    &DevicePathGUID, (void **)&devpath);
 		if (EFI_ERROR(status))
 			continue;
 
 		while (!IsDevicePathEnd(NextDevicePathNode(devpath)))
 			devpath = NextDevicePathNode(devpath);
 
 		status = systab->BootServices->HandleProtocol(handles[i],
 		    &BlockIoProtocolGUID, (void **)&blkio);
 		if (EFI_ERROR(status))
 			continue;
 
 		if (!blkio->Media->LogicalPartition)
 			continue;
 
 		if (domount(devpath, blkio, 1) >= 0)
 			break;
 	}
 
 	if (i == nparts)
 		panic("No bootable partition found");
 
 	bootdevhandle = handles[i];
 	load(path);
 
 	panic("Load failed");
 
 	return EFI_SUCCESS;
 }
 
 static int
 dskread(void *buf, u_int64_t lba, int nblk)
 {
 	EFI_STATUS status;
 	int size;
 
 	lba = lba / (bootdev->Media->BlockSize / DEV_BSIZE);
 	size = nblk * DEV_BSIZE;
 	status = bootdev->ReadBlocks(bootdev, bootdev->Media->MediaId, lba,
 	    size, buf);
 
 	if (EFI_ERROR(status))
 		return (-1);
 
 	return (0);
 }
 
 #include "ufsread.c"
 
 static ssize_t
 fsstat(ufs_ino_t inode)
 {
 #ifndef UFS2_ONLY
 	static struct ufs1_dinode dp1;
 	ufs1_daddr_t addr1;
 #endif
 #ifndef UFS1_ONLY
 	static struct ufs2_dinode dp2;
 #endif
 	static struct fs fs;
 	static ufs_ino_t inomap;
 	char *blkbuf;
 	void *indbuf;
 	size_t n, nb, size, off, vboff;
 	ufs_lbn_t lbn;
 	ufs2_daddr_t addr2, vbaddr;
 	static ufs2_daddr_t blkmap, indmap;
 	u_int u;
 
 	blkbuf = dmadat->blkbuf;
 	indbuf = dmadat->indbuf;
 	if (!dsk_meta) {
 		inomap = 0;
 		for (n = 0; sblock_try[n] != -1; n++) {
 			if (dskread(dmadat->sbbuf, sblock_try[n] / DEV_BSIZE,
 			    SBLOCKSIZE / DEV_BSIZE))
 				return -1;
 			memcpy(&fs, dmadat->sbbuf, sizeof(struct fs));
 			if ((
 #if defined(UFS1_ONLY)
 			    fs.fs_magic == FS_UFS1_MAGIC
 #elif defined(UFS2_ONLY)
 			    (fs.fs_magic == FS_UFS2_MAGIC &&
 			    fs.fs_sblockloc == sblock_try[n])
 #else
 			    fs.fs_magic == FS_UFS1_MAGIC ||
 			    (fs.fs_magic == FS_UFS2_MAGIC &&
 			    fs.fs_sblockloc == sblock_try[n])
 #endif
 			    ) &&
 			    fs.fs_bsize <= MAXBSIZE &&
 			    fs.fs_bsize >= sizeof(struct fs))
 				break;
 		}
 		if (sblock_try[n] == -1) {
 			printf("Not ufs\n");
 			return -1;
 		}
 		dsk_meta++;
 	} else
 		memcpy(&fs, dmadat->sbbuf, sizeof(struct fs));
 	if (!inode)
 		return 0;
 	if (inomap != inode) {
 		n = IPERVBLK(&fs);
 		if (dskread(blkbuf, INO_TO_VBA(&fs, n, inode), DBPERVBLK))
 			return -1;
 		n = INO_TO_VBO(n, inode);
 #if defined(UFS1_ONLY)
 		memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n,
 		    sizeof(struct ufs1_dinode));
 #elif defined(UFS2_ONLY)
 		memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n,
 		    sizeof(struct ufs2_dinode));
 #else
 		if (fs.fs_magic == FS_UFS1_MAGIC)
 			memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n,
 			    sizeof(struct ufs1_dinode));
 		else
 			memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n,
 			    sizeof(struct ufs2_dinode));
 #endif
 		inomap = inode;
 		fs_off = 0;
 		blkmap = indmap = 0;
 	}
 	size = DIP(di_size);
 	n = size - fs_off;
 	return (n);
 }
 
 static struct dmadat __dmadat;
 
 static int
 domount(EFI_DEVICE_PATH *device, EFI_BLOCK_IO *blkio, int quiet)
 {
 
 	dmadat = &__dmadat;
 	bootdev = blkio;
 	bootdevpath = device;
 	if (fsread(0, NULL, 0)) {
 		if (!quiet)
 			printf("domount: can't read superblock\n");
 		return (-1);
 	}
 	if (!quiet)
 		printf("Succesfully mounted UFS filesystem\n");
 	return (0);
 }
 
 static void
 load(const char *fname)
 {
 	ufs_ino_t ino;
 	EFI_STATUS status;
 	EFI_HANDLE loaderhandle;
 	EFI_LOADED_IMAGE *loaded_image;
 	void *buffer;
 	size_t bufsize;
 
 	if ((ino = lookup(fname)) == 0) {
 		printf("File %s not found\n", fname);
 		return;
 	}
 
 	bufsize = fsstat(ino);
 	status = systab->BootServices->AllocatePool(EfiLoaderData,
 	    bufsize, &buffer);
 	fsread(ino, buffer, bufsize);
 
 	/* XXX: For secure boot, we need our own loader here */
 	status = systab->BootServices->LoadImage(TRUE, image, bootdevpath,
 	    buffer, bufsize, &loaderhandle);
 	if (EFI_ERROR(status))
 		printf("LoadImage failed with error %d\n", status);
 
 	status = systab->BootServices->HandleProtocol(loaderhandle,
 	    &LoadedImageGUID, (VOID**)&loaded_image);
 	if (EFI_ERROR(status))
 		printf("HandleProtocol failed with error %d\n", status);
 
 	loaded_image->DeviceHandle = bootdevhandle;
 
 	status = systab->BootServices->StartImage(loaderhandle, NULL, NULL);
 	if (EFI_ERROR(status))
 		printf("StartImage failed with error %d\n", status);
 }
 
 static void
 panic(const char *fmt, ...)
 {
 	char buf[128];
 	va_list ap;
 
 	va_start(ap, fmt);
 	vsnprintf(buf, sizeof buf, fmt, ap);
 	printf("panic: %s\n", buf);
 	va_end(ap);
 
 	while (1) {}
 }
 
 static int
 printf(const char *fmt, ...)
 {
 	va_list ap;
 	int ret;
 
 	/* Don't annoy the user as we probe for partitions */
 	if (strcmp(fmt,"Not ufs\n") == 0)
 		return 0;
 
 	va_start(ap, fmt);
 	ret = vprintf(fmt, ap);
 	va_end(ap);
 	return (ret);
 }
 
 static int
 putchar(char c, void *arg)
 {
 	CHAR16 buf[2];
 
 	if (c == '\n') {
 		buf[0] = '\r';
 		buf[1] = 0;
 		systab->ConOut->OutputString(systab->ConOut, buf);
 	}
 	buf[0] = c;
 	buf[1] = 0;
 	systab->ConOut->OutputString(systab->ConOut, buf);
 	return (1);
 }
 
 static int
 vprintf(const char *fmt, va_list ap)
 {
 	int ret;
 
 	ret = __printf(fmt, putchar, 0, ap);
 	return (ret);
 }
 
 static int
 vsnprintf(char *str, size_t sz, const char *fmt, va_list ap)
 {
 	struct sp_data sp;
 	int ret;
 
 	sp.sp_buf = str;
 	sp.sp_len = 0;
 	sp.sp_size = sz;
 	ret = __printf(fmt, __sputc, &sp, ap);
 	return (ret);
 }
 
 static int
 __printf(const char *fmt, putc_func_t *putc, void *arg, va_list ap)
 {
 	char buf[(sizeof(long) * 8) + 1];
 	char *nbuf;
 	u_long ul;
 	u_int ui;
 	int lflag;
 	int sflag;
 	char *s;
 	int pad;
 	int ret;
 	int c;
 
 	nbuf = &buf[sizeof buf - 1];
 	ret = 0;
 	while ((c = *fmt++) != 0) {
 		if (c != '%') {
 			ret += putc(c, arg);
 			continue;
 		}
 		lflag = 0;
 		sflag = 0;
 		pad = 0;
 reswitch:	c = *fmt++;
 		switch (c) {
 		case '#':
 			sflag = 1;
 			goto reswitch;
 		case '%':
 			ret += putc('%', arg);
 			break;
 		case 'c':
 			c = va_arg(ap, int);
 			ret += putc(c, arg);
 			break;
 		case 'd':
 			if (lflag == 0) {
 				ui = (u_int)va_arg(ap, int);
 				if (ui < (int)ui) {
 					ui = -ui;
 					ret += putc('-', arg);
 				}
 				s = __uitoa(nbuf, ui, 10);
 			} else {
 				ul = (u_long)va_arg(ap, long);
 				if (ul < (long)ul) {
 					ul = -ul;
 					ret += putc('-', arg);
 				}
 				s = __ultoa(nbuf, ul, 10);
 			}
 			ret += __puts(s, putc, arg);
 			break;
 		case 'l':
 			lflag = 1;
 			goto reswitch;
 		case 'o':
 			if (lflag == 0) {
 				ui = (u_int)va_arg(ap, u_int);
 				s = __uitoa(nbuf, ui, 8);
 			} else {
 				ul = (u_long)va_arg(ap, u_long);
 				s = __ultoa(nbuf, ul, 8);
 			}
 			ret += __puts(s, putc, arg);
 			break;
 		case 'p':
 			ul = (u_long)va_arg(ap, void *);
 			s = __ultoa(nbuf, ul, 16);
 			ret += __puts("0x", putc, arg);
 			ret += __puts(s, putc, arg);
 			break;
 		case 's':
 			s = va_arg(ap, char *);
 			ret += __puts(s, putc, arg);
 			break;
 		case 'u':
 			if (lflag == 0) {
 				ui = va_arg(ap, u_int);
 				s = __uitoa(nbuf, ui, 10);
 			} else {
 				ul = va_arg(ap, u_long);
 				s = __ultoa(nbuf, ul, 10);
 			}
 			ret += __puts(s, putc, arg);
 			break;
 		case 'x':
 			if (lflag == 0) {
 				ui = va_arg(ap, u_int);
 				s = __uitoa(nbuf, ui, 16);
 			} else {
 				ul = va_arg(ap, u_long);
 				s = __ultoa(nbuf, ul, 16);
 			}
 			if (sflag)
 				ret += __puts("0x", putc, arg);
 			ret += __puts(s, putc, arg);
 			break;
 		case '0': case '1': case '2': case '3': case '4':
 		case '5': case '6': case '7': case '8': case '9':
 			pad = pad * 10 + c - '0';
 			goto reswitch;
 		default:
 			break;
 		}
 	}
 	return (ret);
 }
 
 static int
 __sputc(char c, void *arg)
 {
 	struct sp_data *sp;
 
 	sp = arg;
 	if (sp->sp_len < sp->sp_size)
 		sp->sp_buf[sp->sp_len++] = c;
 	sp->sp_buf[sp->sp_len] = '\0';
 	return (1);
 }
 
 static int
 __puts(const char *s, putc_func_t *putc, void *arg)
 {
 	const char *p;
 	int ret;
 
 	ret = 0;
 	for (p = s; *p != '\0'; p++)
 		ret += putc(*p, arg);
 	return (ret);
 }
 
 static char *
 __uitoa(char *buf, u_int ui, int base)
 {
 	char *p;
 
 	p = buf;
 	*p = '\0';
 	do
 		*--p = digits[ui % base];
 	while ((ui /= base) != 0);
 	return (p);
 }
 
 static char *
 __ultoa(char *buf, u_long ul, int base)
 {
 	char *p;
 
 	p = buf;
 	*p = '\0';
 	do
 		*--p = digits[ul % base];
 	while ((ul /= base) != 0);
 	return (p);
 }
Index: projects/ifnet/sys/boot/efi/boot1/fat-amd64.tmpl.bz2.uu
===================================================================
--- projects/ifnet/sys/boot/efi/boot1/fat-amd64.tmpl.bz2.uu	(nonexistent)
+++ projects/ifnet/sys/boot/efi/boot1/fat-amd64.tmpl.bz2.uu	(revision 281173)
@@ -0,0 +1,20 @@
+FAT template boot filesystem created by generate-fat.sh
+DO NOT EDIT
+$FreeBSD$
+begin 644 fat-amd64.tmpl.bz2
+M0EIH.3%!629362AK*D(`&I+____[ZZKJZ_^N_ZO^Z_Z_OJ[L`4`!7I0$#&$"
+M0$!$3&(<P`(;J*C:0E0E#30&AH`T````9#0```9````#)ZF0:,-3U/409,`)
+M@`"8`C3",````$R:8F@P`C`````"24U,D>I-DTU,)ZAZ0VA-!M0T'J`>H#"9
+M'I#0-H&HQI&0&3&FH>H>*`JHHU3V]1%/4/2``T#0`!H``#0`````#1H,@``6
+M'1&G'&@?$6[T#A)?X8$A160"20BO#")0J4TB1*4GXF$B4I,&>43+=_?K=#3*
+M6]<E0HE`UBF?(J%8BRF#?8OQ2'D)`)(EL2;F4.'R>R"ZNKJZI,9*68E8*E2Q
+M4J5*E3'(1830A"$(12A-"<(0A#]VD)H0A"$,>I0FA"$(0I\>P^=F5:M6K5JU
+M:DI3:64UN;[7%5B]Y-^\]@_K@B:N\/,5F%&H<\G#IXQXAEFC&D?![6%0'6MR
+MX1@@%FC"FD`M7,/SXFNG:2`'-0<-C$8^+$N.7M1B,^6)9,DV9,0A\OL<:C"L
+ML1V&,<\9YRB>XV#BG")'6NKRK^("UF2XO?_L!#29">MGDF$R3).!PX&%E,4C
+M''=(FL1.`_3?CN@-IB2PI3!FF\<8X.X@D,>CA90I)#M$XRPNDFJELL<3=1?8
+M2B7\5Z64,!7Z;EEBW-MXN-4IJ@W$462]-*\YCR,-B,5[W?=3&L/U>SX,WV#\
+M\B`:I"'0Z)5"$1B.E)(K[5I4RS`%R$>Y\D0NR*,;<9CZ:^V3P(I?D<D#!UC)
+D^M-HEE3SAN-8O0FQ$(`$(DF`?ZQ]'U2F_XNY(IPH2!0UE2$`
+`
+end

Property changes on: projects/ifnet/sys/boot/efi/boot1/fat-amd64.tmpl.bz2.uu
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: projects/ifnet/sys/boot/efi/boot1/fat-arm.tmpl.bz2.uu
===================================================================
--- projects/ifnet/sys/boot/efi/boot1/fat-arm.tmpl.bz2.uu	(nonexistent)
+++ projects/ifnet/sys/boot/efi/boot1/fat-arm.tmpl.bz2.uu	(revision 281173)
@@ -0,0 +1,26 @@
+FAT template boot filesystem created by generate-fat.sh
+DO NOT EDIT
+$FreeBSD$
+begin 644 fat-arm.tmpl.bz2
+M0EIH.3%!629365NH-?4`&T!_____ZZ[J[_ZN_ZO^J_Z[OJ_^J^J[^KZNKNNJ
+MZNKNZOJ^P`+\#$``0`&AD:,@TTTT-&C30#$R&FF1H:!B&)D&F@```-&AB::#
+M1HP0-,AD`T8F(TTP2JII&?^I5/]4`@TTT-&(T8FAB:&@T:8F(Q!@!`9,1DR9
+M`-,$:,FC)B&FC"8"#3"#(-,0`&AD:,@TTTT-&C30#$R&FF1H:!B&)D&F@```
+M-&AB::#1HP0-,AD`T8F(TTP*HI)D\DFDVC0AIIH]3(:-``80>HTTR!IH::#$
+M-`Q`:`/2:::&)IDP"/0$T:>IFD]$R8---3HUK2<PNK%<6\J]BA",-*(A%:0B
+M(B#G5%F8B$$(68C_:!`A#OL<HAB+JZ6UHRRU>*K9].C:!IWS-2UK9M<WC]W[
+M+]QW,9%V2,?<"ZEO9B("$,I.0ZFE66K/,<N6+8ITS$J3))F2I4HJ5*E2I74J
+M:$J5*E2I4JE5*I*I-2I4J5.;84I4J5*E2I6ZU4Q*E2I4J5+X<SOK,65E965D
+MT:($(0A"$(0A"$(0C>.3@8,&#!@P8(0A"$(0@0A"$(0[;_L-&C1HT:-&B+YP
+M,&#!@P8,$"$(0A"-?NW$*YY:V9IQ,:B93+AX^A7B),5HN_4JV=2\Y,:-+W'Q
+MKQKVU7KA+YR'.:*V#48N-"7<`%:TT4D`/;N;SZM9X,V(@!D'=P==+O)9*\H8
+MI8W<L9:.AU[N;G"QEHLZBWB/2B.SKCGRM):%ZK3-2U3ZV1;%MUZX:^?X_Y@N
+MM=0SN1R7Z\&PN,I8VVWKMS$1-X41%"V)-;+V9:MI5;.+M*TMZ]K7HQ-ALY1\
+M4LG)\#5/I7#7-D_1<KUPE\OW)<YEW=.GMJ%$MUF)TE9N)8[M[6LIEXF@H6?S
+MW%U89M5M+:LW6(\?7Z6I:U4F>IM*Q<2E)KFG;%M&U;INV]<)Q%^P'*<]T6R;
+M^7`P.+I+N(HA%=#(^Q0WVV0]=$2=5)>-XWKW7!95E7$<5QW)<IS6"PF@7&+H
+M&<ETI.`9F48V7/E&??ROG%<9FU^Y#E.8W+GL%]#L7+U=^CY91Q\+`K'-WPF4
+M.2S[EM@S38.8YKG.@P6_83I,-<G1DPY84N-)VMWK,/!;AIJ&3E%&7B`0A,`&
+3'65I7J6`B^?^+N2*<*$@MU!KZ@``
+`
+end

Property changes on: projects/ifnet/sys/boot/efi/boot1/fat-arm.tmpl.bz2.uu
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: projects/ifnet/sys/boot/efi/boot1/generate-fat.sh
===================================================================
--- projects/ifnet/sys/boot/efi/boot1/generate-fat.sh	(revision 281172)
+++ projects/ifnet/sys/boot/efi/boot1/generate-fat.sh	(revision 281173)
@@ -1,66 +1,68 @@
 #!/bin/sh
 
 # This script generates the dummy FAT filesystem used for the EFI boot
 # blocks. It uses newfs_msdos to generate a template filesystem with the
 # relevant interesting files. These are then found by grep, and the offsets
 # written to a Makefile snippet.
 #
 # Because it requires root, and because it is overkill, we do not
 # do this as part of the normal build. If makefs(8) grows workable FAT
 # support, this should be revisited.
 
 # $FreeBSD$
 
 FAT_SIZE=1600 			#Size in 512-byte blocks of the produced image
 
 BOOT1_SIZE=128k
 
 #
 # Known filenames
-# amd64: BOOTx64.efi
-# arm64: BOOTaa64.efi
+# amd64:   BOOTx64.efi
+# aarch64: BOOTaa64.efi
+# arm:     BOOTarm.efi
 #
-if [ -z "$1" ]; then
-	echo "Usage: $0 filename"
+if [ -z "$2" ]; then
+	echo "Usage: $0 arch boot-filename"
 	exit 1
 fi
 
-FILENAME=$1
+ARCH=$1
+FILENAME=$2
 
 # Generate 800K FAT image
-OUTPUT_FILE=fat.tmpl
+OUTPUT_FILE=fat-${ARCH}.tmpl
 
 dd if=/dev/zero of=$OUTPUT_FILE bs=512 count=$FAT_SIZE
 DEVICE=`mdconfig -a -f $OUTPUT_FILE`
 newfs_msdos -F 12 -L EFI $DEVICE
 mkdir stub
 mount -t msdosfs /dev/$DEVICE stub
 
 # Create and bless a directory for the boot loader
 mkdir -p stub/efi/boot
 
 # Make a dummy file for boot1
 echo 'Boot1 START' | dd of=stub/efi/boot/$FILENAME cbs=$BOOT1_SIZE count=1 conv=block
 
 umount stub
 mdconfig -d -u $DEVICE
 rmdir stub
 
 # Locate the offset of the fake file
 BOOT1_OFFSET=$(hd $OUTPUT_FILE | grep 'Boot1 START' | cut -f 1 -d ' ')
 
 # Convert to number of blocks
 BOOT1_OFFSET=$(echo 0x$BOOT1_OFFSET | awk '{printf("%x\n",$1/512);}')
 
 echo '# This file autogenerated by generate-fat.sh - DO NOT EDIT' > Makefile.fat
 echo '# $FreeBSD$' >> Makefile.fat
 echo "BOOT1_OFFSET=0x$BOOT1_OFFSET" >> Makefile.fat
 
 bzip2 $OUTPUT_FILE
 echo 'FAT template boot filesystem created by generate-fat.sh' > $OUTPUT_FILE.bz2.uu
 echo 'DO NOT EDIT' >> $OUTPUT_FILE.bz2.uu
 echo '$FreeBSD$' >> $OUTPUT_FILE.bz2.uu
 
 uuencode $OUTPUT_FILE.bz2 $OUTPUT_FILE.bz2 >> $OUTPUT_FILE.bz2.uu
 rm $OUTPUT_FILE.bz2
 
Index: projects/ifnet/sys/boot/efi/include/arm/efibind.h
===================================================================
--- projects/ifnet/sys/boot/efi/include/arm/efibind.h	(nonexistent)
+++ projects/ifnet/sys/boot/efi/include/arm/efibind.h	(revision 281173)
@@ -0,0 +1,165 @@
+/* $FreeBSD$ */
+/*++
+
+Copyright (c) 2004 - 2012, Intel Corporation. All rights reserved.
+
+This program and the accompanying materials
+are licensed and made available under the terms and conditions of the BSD License
+which accompanies this distribution.  The full text of the license may be found at
+http://opensource.org/licenses/bsd-license.php
+
+THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+
+Module Name:
+
+  EfiBind.h
+
+Abstract:
+
+  Processor or Compiler specific defines and types for IA-32.
+  We are using the ANSI C 2000 _t type definitions for basic types.
+  This it technically a violation of the coding standard, but they
+  are used to make EfiTypes.h portable. Code other than EfiTypes.h
+  should never use any ANSI C 2000 _t integer types.
+
+--*/
+
+#ifndef _EFI_BIND_H_
+#define _EFI_BIND_H_
+
+
+#define EFI_DRIVER_ENTRY_POINT(InitFunction)
+#define EFI_APPLICATION_ENTRY_POINT EFI_DRIVER_ENTRY_POINT
+
+
+//
+// Make sure we are useing the correct packing rules per EFI specification
+//
+#ifndef __GNUC__
+#pragma pack()
+#endif
+
+
+#ifdef __FreeBSD__
+#include <sys/stdint.h>
+#else
+//
+// Assume standard IA-32 alignment.
+// BugBug: Need to check portability of long long
+//
+typedef unsigned long long  uint64_t;
+typedef long long           int64_t;
+typedef unsigned int        uint32_t;
+typedef int                 int32_t;
+typedef unsigned short      uint16_t;
+typedef short               int16_t;
+typedef unsigned char       uint8_t;
+typedef signed char         int8_t;
+#endif
+
+typedef uint64_t   UINT64;
+typedef int64_t    INT64;
+typedef uint32_t   UINT32;
+typedef int32_t    INT32;
+typedef uint16_t   UINT16;
+typedef int16_t    INT16;
+typedef uint8_t    UINT8;
+typedef int8_t     INT8;
+
+#undef VOID
+#define VOID    void
+
+//
+// Native integer size in stdint.h
+//
+typedef uint32_t  UINTN;
+typedef int32_t   INTN;
+
+#define EFIERR(a)           (0x80000000 | a)
+#define EFI_ERROR_MASK      0x80000000
+#define EFIERR_OEM(a)       (0xc0000000 | a)
+
+//
+// Processor specific defines
+//
+#define EFI_MAX_BIT       0x80000000
+#define MAX_2_BITS        0xC0000000
+
+//
+// Maximum legal IA-32 address
+//
+#define EFI_MAX_ADDRESS   0xFFFFFFFF
+
+//
+//  Bad pointer value to use in check builds.
+//  if you see this value you are using uninitialized or free'ed data
+//
+#define EFI_BAD_POINTER          0xAFAFAFAF
+#define EFI_BAD_POINTER_AS_BYTE  0xAF
+
+#define EFI_DEADLOOP()    { volatile UINTN __iii; __iii = 1; while (__iii); }
+
+//
+// Inject a break point in the code to assist debugging for NT Emulation Environment
+// For real hardware, just put in a halt loop. Don't do a while(1) because the
+// compiler will optimize away the rest of the function following, so that you run out in
+// the weeds if you skip over it with a debugger.
+//
+#define EFI_BREAKPOINT EFI_DEADLOOP()
+
+
+//
+// Memory Fence forces serialization, and is needed to support out of order
+//  memory transactions. The Memory Fence is mainly used to make sure IO
+//  transactions complete in a deterministic sequence, and to syncronize locks
+//  an other MP code. Currently no memory fencing is required.
+//
+#define MEMORY_FENCE()
+
+//
+// Some compilers don't support the forward reference construct:
+//  typedef struct XXXXX. The forward reference is required for
+//  ANSI compatibility.
+//
+// The following macro provide a workaround for such cases.
+//
+
+
+#ifdef EFI_NO_INTERFACE_DECL
+  #define EFI_FORWARD_DECLARATION(x)
+#else
+  #define EFI_FORWARD_DECLARATION(x) typedef struct _##x x
+#endif
+
+
+//
+// Some C compilers optimize the calling conventions to increase performance.
+// EFIAPI is used to make all public APIs follow the standard C calling
+// convention.
+//
+#define EFIAPI
+
+
+
+//
+// For symbol name in GNU assembly code, an extra "_" is necessary
+//
+#if defined(__GNUC__)
+  ///
+  /// Private worker functions for ASM_PFX()
+  ///
+  #define _CONCATENATE(a, b)  __CONCATENATE(a, b)
+  #define __CONCATENATE(a, b) a ## b
+
+  ///
+  /// The __USER_LABEL_PREFIX__ macro predefined by GNUC represents the prefix
+  /// on symbols in assembly language.
+  ///
+  #define ASM_PFX(name) _CONCATENATE (__USER_LABEL_PREFIX__, name)
+
+#endif
+
+#define INTERFACE_DECL(x) struct x
+
+#endif

Property changes on: projects/ifnet/sys/boot/efi/include/arm/efibind.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/ifnet/sys/boot/efi/loader/Makefile
===================================================================
--- projects/ifnet/sys/boot/efi/loader/Makefile	(revision 281172)
+++ projects/ifnet/sys/boot/efi/loader/Makefile	(revision 281173)
@@ -1,120 +1,122 @@
 # $FreeBSD$
 
 MAN=
 
 .include <src.opts.mk>
 
 # In-tree GCC does not support __attribute__((ms_abi)).
 .if ${COMPILER_TYPE} != "gcc"
 
 MK_SSP=		no
 
 PROG=		loader.sym
 INTERNALPROG=
 
 .PATH: ${.CURDIR}/../../efi/loader
 # architecture-specific loader code
 SRCS=	autoload.c \
 	bootinfo.c \
 	conf.c \
 	copy.c \
 	devicename.c \
 	main.c \
 	smbios.c \
 	vers.c
 
 .PATH: ${.CURDIR}/arch/${MACHINE_CPUARCH}
 # For smbios.c
 .PATH: ${.CURDIR}/../../i386/libi386
 .include "${.CURDIR}/arch/${MACHINE_CPUARCH}/Makefile.inc"
 
 CFLAGS+=	-fPIC
 CFLAGS+=	-I${.CURDIR}
 CFLAGS+=	-I${.CURDIR}/arch/${MACHINE_CPUARCH}
 CFLAGS+=	-I${.CURDIR}/../include
 CFLAGS+=	-I${.CURDIR}/../include/${MACHINE_CPUARCH}
 CFLAGS+=	-I${.CURDIR}/../../../contrib/dev/acpica/include
 CFLAGS+=	-I${.CURDIR}/../../..
 CFLAGS+=	-I${.CURDIR}/../../i386/libi386
 CFLAGS+=	-DNO_PCI -DEFI
 
 .if ${MK_FORTH} != "no"
 BOOT_FORTH=	yes
 CFLAGS+=	-DBOOT_FORTH
 CFLAGS+=	-I${.CURDIR}/../../ficl
 CFLAGS+=	-I${.CURDIR}/../../ficl/${MACHINE_CPUARCH}
 LIBFICL=	${.OBJDIR}/../../ficl/libficl.a
 .endif
 
 LOADER_FDT_SUPPORT?=	no
 .if ${MK_FDT} != "no" && ${LOADER_FDT_SUPPORT} != "no"
 CFLAGS+=	-I${.CURDIR}/../../fdt
 CFLAGS+=	-I${.OBJDIR}/../../fdt
 CFLAGS+=	-DLOADER_FDT_SUPPORT
 LIBEFI_FDT=	${.OBJDIR}/../../efi/fdt/libefi_fdt.a
 LIBFDT=		${.OBJDIR}/../../fdt/libfdt.a
 .endif
 
 # Include bcache code.
 HAVE_BCACHE=    yes
 
 .if defined(EFI_STAGING_SIZE)
 CFLAGS+=	-DEFI_STAGING_SIZE=${EFI_STAGING_SIZE}
 .endif
 
 # Always add MI sources 
 .PATH:		${.CURDIR}/../../common
 .include	"${.CURDIR}/../../common/Makefile.inc"
 CFLAGS+=	-I${.CURDIR}/../../common
 
 FILES=	loader.efi
 FILESMODE_loader.efi=	${BINMODE}
 
 LDSCRIPT=	${.CURDIR}/arch/${MACHINE_CPUARCH}/ldscript.${MACHINE_CPUARCH}
 LDFLAGS=	-Wl,-T${LDSCRIPT} -Wl,-Bsymbolic -shared -Wl,-znocombreloc
 
 CLEANFILES=	vers.c loader.efi
 
 NEWVERSWHAT=	"EFI loader" ${MACHINE_CPUARCH}
 
 vers.c:	${.CURDIR}/../../common/newvers.sh ${.CURDIR}/../../efi/loader/version
 	sh ${.CURDIR}/../../common/newvers.sh ${.CURDIR}/version ${NEWVERSWHAT}
 
 OBJCOPY?=	objcopy
 OBJDUMP?=	objdump
 
 .if ${MACHINE_CPUARCH} == "amd64"
 EFI_TARGET=	efi-app-x86_64
 .elif ${MACHINE_CPUARCH} == "i386"
 EFI_TARGET=	efi-app-ia32
+.else
+EFI_TARGET=	binary
 .endif
 
 loader.efi: loader.sym
 	if [ `${OBJDUMP} -t ${.ALLSRC} | fgrep '*UND*' | wc -l` != 0 ]; then \
 		${OBJDUMP} -t ${.ALLSRC} | fgrep '*UND*'; \
 		exit 1; \
 	fi
-	${OBJCOPY} -j .text -j .sdata -j .data \
+	${OBJCOPY} -j .peheader -j .text -j .sdata -j .data \
 		-j .dynamic -j .dynsym -j .rel.dyn \
 		-j .rela.dyn -j .reloc -j .eh_frame -j set_Xcommand_set \
 		--output-target=${EFI_TARGET} ${.ALLSRC} ${.TARGET}
 
 LIBEFI=		${.OBJDIR}/../libefi/libefi.a
 
 DPADD=		${LIBFICL} ${LIBEFI} ${LIBFDT} ${LIBEFI_FDT} ${LIBSTAND} \
 		${LDSCRIPT}
 LDADD=		${LIBFICL} ${LIBEFI} ${LIBFDT} ${LIBEFI_FDT} ${LIBSTAND}
 
 .endif # ${COMPILER_TYPE} != "gcc"
 
 .include <bsd.prog.mk>
 
 beforedepend ${OBJS}: machine x86
 
 CLEANFILES+=   machine x86
 
 machine:
 	ln -sf ${.CURDIR}/../../../amd64/include machine
 
 x86:
 	ln -sf ${.CURDIR}/../../../x86/include x86
Index: projects/ifnet/sys/boot/efi/loader/arch/arm/Makefile.inc
===================================================================
--- projects/ifnet/sys/boot/efi/loader/arch/arm/Makefile.inc	(nonexistent)
+++ projects/ifnet/sys/boot/efi/loader/arch/arm/Makefile.inc	(revision 281173)
@@ -0,0 +1,5 @@
+# $FreeBSD$
+
+SRCS+=	exec.c \
+	start.S \
+	reloc.c

Property changes on: projects/ifnet/sys/boot/efi/loader/arch/arm/Makefile.inc
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/ifnet/sys/boot/efi/loader/arch/arm/exec.c
===================================================================
--- projects/ifnet/sys/boot/efi/loader/arch/arm/exec.c	(nonexistent)
+++ projects/ifnet/sys/boot/efi/loader/arch/arm/exec.c	(revision 281173)
@@ -0,0 +1,107 @@
+/*-
+ * Copyright (c) 2001 Benno Rice <benno@FreeBSD.org>
+ * Copyright (c) 2007 Semihalf, Rafal Jaworowski <raj@semihalf.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/linker.h>
+
+#include <machine/md_var.h>
+#include <machine/metadata.h>
+#include <machine/elf.h>
+
+#include <stand.h>
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "bootstrap.h"
+#include "loader_efi.h"
+
+extern vm_offset_t md_load(char *, vm_offset_t *);
+
+int
+__elfN(arm_load)(char *filename, u_int64_t dest,
+    struct preloaded_file **result)
+{
+	int r;
+
+	r = __elfN(loadfile)(filename, dest, result);
+	if (r != 0)
+		return (r);
+
+	return (0);
+}
+
+int
+__elfN(arm_exec)(struct preloaded_file *fp)
+{
+	struct file_metadata *fmp;
+	vm_offset_t modulep, kernend;
+	Elf_Ehdr *e;
+	int error;
+	void (*entry)(void *);
+	EFI_STATUS status;
+
+	if ((fmp = file_findmetadata(fp, MODINFOMD_ELFHDR)) == NULL)
+		return (EFTYPE);
+
+	e = (Elf_Ehdr *)&fmp->md_data;
+
+	if ((error = bi_load(fp->f_args, &modulep, &kernend)) != 0)
+		return (error);
+
+	entry = efi_translate(e->e_entry);
+	printf("Kernel entry at 0x%x...\n", (unsigned)entry);
+	printf("Kernel args: %s\n", fp->f_args);
+	printf("modulep: %#x\n", modulep);
+	printf("relocation_offset %llx\n", __elfN(relocation_offset));
+
+	status = BS->ExitBootServices(IH, efi_mapkey);
+	if (EFI_ERROR(status)) {
+		printf("%s: ExitBootServices() returned 0x%lx\n", __func__,
+		    (long)status);
+		return (EINVAL);
+	}
+
+	dev_cleanup();
+
+	(*entry)((void *)modulep);
+	panic("exec returned");
+}
+
+static struct file_format arm_elf = {
+	__elfN(arm_load),
+	__elfN(arm_exec)
+};
+
+struct file_format *file_formats[] = {
+	&arm_elf,
+	NULL
+};
+

Property changes on: projects/ifnet/sys/boot/efi/loader/arch/arm/exec.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/ifnet/sys/boot/efi/loader/arch/arm/ldscript.arm
===================================================================
--- projects/ifnet/sys/boot/efi/loader/arch/arm/ldscript.arm	(nonexistent)
+++ projects/ifnet/sys/boot/efi/loader/arch/arm/ldscript.arm	(revision 281173)
@@ -0,0 +1,86 @@
+/* $FreeBSD$ */
+OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm",
+	      "elf32-littlearm")
+OUTPUT_ARCH(arm)
+ENTRY(_start)
+SECTIONS
+{
+  /* Read-only sections, merged into text segment: */
+  . = 0;
+  ImageBase = .;
+  .peheader	: {
+    *(.peheader)
+  }
+  .text		: {
+    *(.text .stub .text.* .gnu.linkonce.t.*)
+    /* .gnu.warning sections are handled specially by elf32.em. */
+    *(.gnu.warning)
+    *(.gnu.linkonce.t*)
+  } =0
+  _etext = .;
+  PROVIDE (etext = .);
+  . = ALIGN(4096);
+  .data    :
+  {
+    *(.data)
+    *(.gnu.linkonce.d*)
+    *(.rodata)
+    *(.rodata.*)
+    CONSTRUCTORS
+  }
+  .data1   : { *(.data1) }
+  .got1           : { *(.got1) }
+  .dynamic        : { *(.dynamic) }
+  /* Put .ctors and .dtors next to the .got2 section, so that the pointers
+     get relocated with -mrelocatable. Also put in the .fixup pointers.
+     The current compiler no longer needs this, but keep it around for 2.7.2  */
+                PROVIDE (_GOT2_START_ = .);
+  .got2           :  { *(.got2) }
+                PROVIDE (__CTOR_LIST__ = .);
+  .ctors          : { *(.ctors) }
+                PROVIDE (__CTOR_END__ = .);
+                PROVIDE (__DTOR_LIST__ = .);
+  .dtors          : { *(.dtors) }
+                PROVIDE (__DTOR_END__ = .);
+                PROVIDE (_FIXUP_START_ = .);
+  .fixup          : { *(.fixup) }
+                PROVIDE (_FIXUP_END_ = .);
+                PROVIDE (_GOT2_END_ = .);
+                PROVIDE (_GOT_START_ = .);
+  .got            : { *(.got) }
+  .got.plt        : { *(.got.plt) }
+                PROVIDE (_GOT_END_ = .);
+  /* We want the small data sections together, so single-instruction offsets
+     can access them all, and initialized data all before uninitialized, so
+     we can shorten the on-disk segment size.  */
+  .sdata     : { *(.sdata) }
+  _edata  =  .;
+  PROVIDE (edata = .);
+  set_Xcommand_set	: {
+    __start_set_Xcommand_set = .;
+    *(set_Xcommand_set)
+    __stop_set_Xcommand_set = .;
+  }
+  __gp = .;
+   PROVIDE (__bss_start = .);
+  .sbss      :
+  {
+    *(.sbss)
+    *(.scommon)
+    *(.dynsbss)
+  }
+  .bss       :
+  {
+   *(.dynbss)
+   *(.bss)
+   *(COMMON)
+  }
+   PROVIDE (__bss_end = .);
+  .plt   : { *(.plt) }
+  .dynamic	: { *(.dynamic) }
+  .reloc	: { *(.reloc) }
+  .hash		: { *(.hash) }
+  .dynsym	: { *(.dynsym) }
+  .dynstr	: { *(.dynstr) }
+  _edata = .;
+}

Property changes on: projects/ifnet/sys/boot/efi/loader/arch/arm/ldscript.arm
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: projects/ifnet/sys/boot/efi/loader/arch/arm/reloc.c
===================================================================
--- projects/ifnet/sys/boot/efi/loader/arch/arm/reloc.c	(nonexistent)
+++ projects/ifnet/sys/boot/efi/loader/arch/arm/reloc.c	(revision 281173)
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 2008-2010 Rui Paulo <rpaulo@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <elf.h>
+#include <bootstrap.h>
+
+/*
+ * A simple relocator for ARM binaries.
+ */
+void
+_reloc(unsigned long ImageBase, Elf32_Dyn *dynamic)
+{
+	unsigned long relsz, relent;
+	unsigned long *newaddr;
+	Elf32_Rel *rel;
+	Elf32_Dyn *dynp;
+
+	/*
+	 * Find the relocation address, its size and the relocation entry.
+	 */
+	relsz = 0;
+	relent = 0;
+	for (dynp = dynamic; dynp->d_tag != DT_NULL; dynp++) {
+		switch (dynp->d_tag) {
+		case DT_REL:
+			rel = (Elf32_Rel *) ((unsigned long) dynp->d_un.d_ptr +
+			    ImageBase);
+			break;
+		case DT_RELSZ:
+			relsz = dynp->d_un.d_val;
+			break;
+		case DT_RELENT:
+			relent = dynp->d_un.d_val;
+			break;
+		default:
+			break;
+		}
+	}
+
+	/*
+	 * Perform the actual relocation.
+	 */
+	for (; relsz > 0; relsz -= relent) {
+		switch (ELF32_R_TYPE(rel->r_info)) {
+		case R_ARM_RELATIVE:
+			/* Address relative to the base address. */
+			newaddr = (unsigned long *)(ImageBase + rel->r_offset);
+			*newaddr += ImageBase;
+			break;
+		default:
+			/* XXX: do we need other relocations ? */
+			break;
+		}
+		rel = (Elf32_Rel *) ((caddr_t) rel + relent);
+	}
+}

Property changes on: projects/ifnet/sys/boot/efi/loader/arch/arm/reloc.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/ifnet/sys/boot/efi/loader/arch/arm/start.S
===================================================================
--- projects/ifnet/sys/boot/efi/loader/arch/arm/start.S	(nonexistent)
+++ projects/ifnet/sys/boot/efi/loader/arch/arm/start.S	(revision 281173)
@@ -0,0 +1,190 @@
+/*-
+ * Copyright (c) 2014, 2015 Andrew Turner
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <machine/asm.h>
+
+/*
+ * We need to be a PE32 file for EFI. On some architectures we can use
+ * objcopy to create the correct file, however on arm we need to do
+ * it ourselves.
+ */
+
+#define	IMAGE_FILE_MACHINE_ARM		0x01c2
+
+#define	IMAGE_SCN_CNT_CODE		0x00000020
+#define	IMAGE_SCN_CNT_INITIALIZED_DATA	0x00000040
+#define	IMAGE_SCN_MEM_DISCARDABLE	0x02000000
+#define	IMAGE_SCN_MEM_EXECUTE		0x20000000
+#define	IMAGE_SCN_MEM_READ		0x40000000
+
+	.section .peheader
+efi_start:
+	/* The MS-DOS Stub, only used to get the offset of the COFF header */
+	.ascii	"MZ"
+	.short	0
+	.space	0x38
+	.long	pe_sig - efi_start
+
+	/* The PE32 Signature. Needs to be 8-byte aligned */
+	.align	3
+pe_sig:
+	.ascii	"PE"
+	.short	0
+coff_head:
+	.short	IMAGE_FILE_MACHINE_ARM		/* ARM file */
+	.short	2				/* 2 Sections */
+	.long	0				/* Timestamp */
+	.long	0				/* No symbol table */
+	.long	0				/* No symbols */
+	.short	section_table - optional_header	/* Optional header size */
+	.short	0	/* Characteristics TODO: Fill in */
+
+optional_header:
+	.short	0x010b				/* PE32 (32-bit addressing) */
+	.byte	0				/* Major linker version */
+	.byte	0				/* Minor linker version */
+	.long	_edata - _end_header		/* Code size */
+	.long	0				/* No initialized data */
+	.long	0				/* No uninitialized data */
+	.long	_start - efi_start		/* Entry point */
+	.long	_end_header - efi_start		/* Start of code */
+	.long	0				/* Start of data */
+
+optional_windows_header:
+	.long	0				/* Image base */
+	.long	32				/* Section Alignment */
+	.long	8				/* File alignment */
+	.short	0				/* Major OS version */
+	.short	0				/* Minor OS version */
+	.short	0				/* Major image version */
+	.short	0				/* Minor image version */
+	.short	0				/* Major subsystem version */
+	.short	0				/* Minor subsystem version */
+	.long	0				/* Win32 version */
+	.long	_edata - efi_start		/* Image size */
+	.long	_end_header - efi_start		/* Header size */
+	.long	0				/* Checksum */
+	.short	0xa				/* Subsystem (EFI app) */
+	.short	0				/* DLL Characteristics */
+	.long	0				/* Stack reserve */
+	.long	0				/* Stack commit */
+	.long	0				/* Heap reserve */
+	.long	0				/* Heap commit */
+	.long	0				/* Loader flags */
+	.long	6				/* Number of RVAs */
+
+	/* RVAs: */
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+
+section_table:
+	/* We need a .reloc section for EFI */
+	.ascii	".reloc"
+	.byte	0
+	.byte	0				/* Pad to 8 bytes */
+	.long	0				/* Virtual size */
+	.long	0				/* Virtual address */
+	.long	0				/* Size of raw data */
+	.long	0				/* Pointer to raw data */
+	.long	0				/* Pointer to relocations */
+	.long	0				/* Pointer to line numbers */
+	.short	0				/* Number of relocations */
+	.short	0				/* Number of line numbers */
+	.long	(IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | \
+		 IMAGE_SCN_MEM_DISCARDABLE)	/* Characteristics */
+
+	/* The contents of the loader */
+	.ascii	".text"
+	.byte	0
+	.byte	0
+	.byte	0				/* Pad to 8 bytes */
+	.long	_edata - _end_header		/* Virtual size */
+	.long	_end_header - efi_start		/* Virtual address */
+	.long	_edata - _end_header		/* Size of raw data */
+	.long	_end_header - efi_start		/* Pointer to raw data */
+	.long	0				/* Pointer to relocations */
+	.long	0				/* Pointer to line numbers */
+	.short	0				/* Number of relocations */
+	.short	0				/* Number of line numbers */
+	.long	(IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE | \
+		 IMAGE_SCN_MEM_READ)		/* Characteristics */
+_end_header:
+
+	.text
+_start:
+	/* Save the boot params to the stack */
+	push	{r0, r1}
+
+	adr	r0, .Lbase
+	ldr	r1, [r0]
+	sub	r5, r0, r1
+
+	ldr	r0, .Limagebase
+	add	r0, r0, r5
+	ldr	r1, .Ldynamic
+	add	r1, r1, r5
+
+	bl	_C_LABEL(_reloc)
+
+	/* Zero the BSS, _reloc fixed the values for us */
+	ldr	r0, .Lbss
+	ldr	r1, .Lbssend
+	mov	r2, #0
+
+1:	cmp	r0, r1
+	bgt	2f
+	str	r2, [r0], #4
+	b	1b
+2:
+
+	pop	{r0, r1}
+	bl	_C_LABEL(efi_main)
+
+1:	WFI
+	b	1b
+
+.Lbase:
+	.word	.
+.Limagebase:
+	.word	ImageBase
+.Ldynamic:
+	.word	_DYNAMIC
+.Lbss:
+	.word	__bss_start
+.Lbssend:
+	.word	__bss_end
+
+.align	3
+stack:
+	.space 512
+stack_end:
+

Property changes on: projects/ifnet/sys/boot/efi/loader/arch/arm/start.S
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/ifnet/sys/boot/efi/loader/bootinfo.c
===================================================================
--- projects/ifnet/sys/boot/efi/loader/bootinfo.c	(revision 281172)
+++ projects/ifnet/sys/boot/efi/loader/bootinfo.c	(revision 281173)
@@ -1,412 +1,446 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2004, 2006 Marcel Moolenaar
  * Copyright (c) 2014 The FreeBSD Foundation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <string.h>
 #include <sys/param.h>
 #include <sys/reboot.h>
 #include <sys/linker.h>
 #include <sys/boot.h>
 #include <machine/cpufunc.h>
 #include <machine/elf.h>
 #include <machine/metadata.h>
 #include <machine/psl.h>
 
 #include <efi.h>
 #include <efilib.h>
 
 #include "bootstrap.h"
 #include "loader_efi.h"
 
 #if defined(__amd64__) || defined(__i386__)
 #include <machine/specialreg.h>
 #include "framebuffer.h"
 #endif
 
 #if defined(LOADER_FDT_SUPPORT)
 #include <fdt_platform.h>
 #endif
 
 UINTN efi_mapkey;
 
 static const char howto_switches[] = "aCdrgDmphsv";
 static int howto_masks[] = {
 	RB_ASKNAME, RB_CDROM, RB_KDB, RB_DFLTROOT, RB_GDB, RB_MULTIPLE,
 	RB_MUTE, RB_PAUSE, RB_SERIAL, RB_SINGLE, RB_VERBOSE
 };
 
 static int
 bi_getboothowto(char *kargs)
 {
 	const char *sw;
 	char *opts;
 	char *console;
 	int howto, i;
 
 	howto = 0;
 
 	/* Get the boot options from the environment first. */
 	for (i = 0; howto_names[i].ev != NULL; i++) {
 		if (getenv(howto_names[i].ev) != NULL)
 			howto |= howto_names[i].mask;
 	}
 
 	console = getenv("console");
 	if (console != NULL) {
 		if (strcmp(console, "comconsole") == 0)
 			howto |= RB_SERIAL;
 		if (strcmp(console, "nullconsole") == 0)
 			howto |= RB_MUTE;
 	}
 
 	/* Parse kargs */
 	if (kargs == NULL)
 		return (howto);
 
 	opts = strchr(kargs, '-');
 	while (opts != NULL) {
 		while (*(++opts) != '\0') {
 			sw = strchr(howto_switches, *opts);
 			if (sw == NULL)
 				break;
 			howto |= howto_masks[sw - howto_switches];
 		}
 		opts = strchr(opts, '-');
 	}
 
 	return (howto);
 }
 
 /*
  * Copy the environment into the load area starting at (addr).
  * Each variable is formatted as <name>=<value>, with a single nul
  * separating each variable, and a double nul terminating the environment.
  */
 static vm_offset_t
 bi_copyenv(vm_offset_t start)
 {
 	struct env_var *ep;
 	vm_offset_t addr, last;
 	size_t len;
 
 	addr = last = start;
 
 	/* Traverse the environment. */
 	for (ep = environ; ep != NULL; ep = ep->ev_next) {
 		len = strlen(ep->ev_name);
 		if (archsw.arch_copyin(ep->ev_name, addr, len) != len)
 			break;
 		addr += len;
 		if (archsw.arch_copyin("=", addr, 1) != 1)
 			break;
 		addr++;
 		if (ep->ev_value != NULL) {
 			len = strlen(ep->ev_value);
 			if (archsw.arch_copyin(ep->ev_value, addr, len) != len)
 				break;
 			addr += len;
 		}
 		if (archsw.arch_copyin("", addr, 1) != 1)
 			break;
 		last = ++addr;
 	}
 
 	if (archsw.arch_copyin("", last++, 1) != 1)
 		last = start;
 	return(last);
 }
 
 /*
  * Copy module-related data into the load area, where it can be
  * used as a directory for loaded modules.
  *
  * Module data is presented in a self-describing format.  Each datum
  * is preceded by a 32-bit identifier and a 32-bit size field.
  *
  * Currently, the following data are saved:
  *
  * MOD_NAME	(variable)		module name (string)
  * MOD_TYPE	(variable)		module type (string)
  * MOD_ARGS	(variable)		module parameters (string)
  * MOD_ADDR	sizeof(vm_offset_t)	module load address
  * MOD_SIZE	sizeof(size_t)		module size
  * MOD_METADATA	(variable)		type-specific metadata
  */
 #define	COPY32(v, a, c) {					\
 	uint32_t x = (v);					\
 	if (c)							\
 		archsw.arch_copyin(&x, a, sizeof(x));		\
 	a += sizeof(x);						\
 }
 
 #define	MOD_STR(t, a, s, c) {					\
 	COPY32(t, a, c);					\
 	COPY32(strlen(s) + 1, a, c);				\
 	if (c)							\
 		archsw.arch_copyin(s, a, strlen(s) + 1);	\
 	a += roundup(strlen(s) + 1, sizeof(u_long));		\
 }
 
 #define	MOD_NAME(a, s, c)	MOD_STR(MODINFO_NAME, a, s, c)
 #define	MOD_TYPE(a, s, c)	MOD_STR(MODINFO_TYPE, a, s, c)
 #define	MOD_ARGS(a, s, c)	MOD_STR(MODINFO_ARGS, a, s, c)
 
 #define	MOD_VAR(t, a, s, c) {					\
 	COPY32(t, a, c);					\
 	COPY32(sizeof(s), a, c);				\
 	if (c)							\
 		archsw.arch_copyin(&s, a, sizeof(s));		\
 	a += roundup(sizeof(s), sizeof(u_long));		\
 }
 
 #define	MOD_ADDR(a, s, c)	MOD_VAR(MODINFO_ADDR, a, s, c)
 #define	MOD_SIZE(a, s, c)	MOD_VAR(MODINFO_SIZE, a, s, c)
 
 #define	MOD_METADATA(a, mm, c) {				\
 	COPY32(MODINFO_METADATA | mm->md_type, a, c);		\
 	COPY32(mm->md_size, a, c);				\
 	if (c)							\
 		archsw.arch_copyin(mm->md_data, a, mm->md_size);	\
 	a += roundup(mm->md_size, sizeof(u_long));		\
 }
 
 #define	MOD_END(a, c) {						\
 	COPY32(MODINFO_END, a, c);				\
 	COPY32(0, a, c);					\
 }
 
 static vm_offset_t
 bi_copymodules(vm_offset_t addr)
 {
 	struct preloaded_file *fp;
 	struct file_metadata *md;
 	int c;
 	uint64_t v;
 
 	c = addr != 0;
 	/* Start with the first module on the list, should be the kernel. */
 	for (fp = file_findfile(NULL, NULL); fp != NULL; fp = fp->f_next) {
 		MOD_NAME(addr, fp->f_name, c); /* This must come first. */
 		MOD_TYPE(addr, fp->f_type, c);
 		if (fp->f_args)
 			MOD_ARGS(addr, fp->f_args, c);
 		v = fp->f_addr;
+#if defined(__arm__)
+		v -= __elfN(relocation_offset);
+#endif
 		MOD_ADDR(addr, v, c);
 		v = fp->f_size;
 		MOD_SIZE(addr, v, c);
 		for (md = fp->f_metadata; md != NULL; md = md->md_next)
 			if (!(md->md_type & MODINFOMD_NOCOPY))
 				MOD_METADATA(addr, md, c);
 	}
 	MOD_END(addr, c);
 	return(addr);
 }
 
 static int
 bi_load_efi_data(struct preloaded_file *kfp)
 {
 	EFI_MEMORY_DESCRIPTOR *mm;
 	EFI_PHYSICAL_ADDRESS addr;
 	EFI_STATUS status;
 	size_t efisz;
 	UINTN mmsz, pages, sz;
 	UINT32 mmver;
 	struct efi_map_header *efihdr;
 
 #if defined(__amd64__) || defined(__i386__)
 	struct efi_fb efifb;
 
 	if (efi_find_framebuffer(&efifb) == 0) {
 		printf("EFI framebuffer information:\n");
 		printf("addr, size     0x%lx, 0x%lx\n", efifb.fb_addr,
 		    efifb.fb_size);
 		printf("dimensions     %d x %d\n", efifb.fb_width,
 		    efifb.fb_height);
 		printf("stride         %d\n", efifb.fb_stride);
 		printf("masks          0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
 		    efifb.fb_mask_red, efifb.fb_mask_green, efifb.fb_mask_blue,
 		    efifb.fb_mask_reserved);
 
 		file_addmetadata(kfp, MODINFOMD_EFI_FB, sizeof(efifb), &efifb);
 	}
 #endif
 
 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
 
 	/*
 	 * Allocate enough pages to hold the bootinfo block and the memory
 	 * map EFI will return to us. The memory map has an unknown size,
 	 * so we have to determine that first. Note that the AllocatePages
 	 * call can itself modify the memory map, so we have to take that
 	 * into account as well. The changes to the memory map are caused
 	 * by splitting a range of free memory into two (AFAICT), so that
 	 * one is marked as being loader data.
 	 */
 	sz = 0;
 	BS->GetMemoryMap(&sz, NULL, &efi_mapkey, &mmsz, &mmver);
 	sz += mmsz;
 	sz = (sz + 0xf) & ~0xf;
 	pages = EFI_SIZE_TO_PAGES(sz + efisz);
 	status = BS->AllocatePages(AllocateAnyPages, EfiLoaderData, pages,
 	    &addr);
 	if (EFI_ERROR(status)) {
 		printf("%s: AllocatePages() returned 0x%lx\n", __func__,
 		    (long)status);
 		return (ENOMEM);
 	}
 
 	/*
 	 * Read the memory map and stash it after bootinfo. Align the
 	 * memory map on a 16-byte boundary (the bootinfo block is page
 	 * aligned).
 	 */
 	efihdr = (struct efi_map_header *)addr;
 	mm = (void *)((uint8_t *)efihdr + efisz);
 	sz = (EFI_PAGE_SIZE * pages) - efisz;
 	status = BS->GetMemoryMap(&sz, mm, &efi_mapkey, &mmsz, &mmver);
 	if (EFI_ERROR(status)) {
 		printf("%s: GetMemoryMap() returned 0x%lx\n", __func__,
 		    (long)status);
 		return (EINVAL);
 	}
 
 	efihdr->memory_size = sz;
 	efihdr->descriptor_size = mmsz;
 	efihdr->descriptor_version = mmver;
 
 	file_addmetadata(kfp, MODINFOMD_EFI_MAP, efisz + sz, efihdr);
 
 	return (0);
 }
 
 /*
  * Load the information expected by an amd64 kernel.
  *
  * - The 'boothowto' argument is constructed.
  * - The 'bootdev' argument is constructed.
  * - The 'bootinfo' struct is constructed, and copied into the kernel space.
  * - The kernel environment is copied into kernel space.
  * - Module metadata are formatted and placed in kernel space.
  */
 int
 bi_load(char *args, vm_offset_t *modulep, vm_offset_t *kernendp)
 {
 	struct preloaded_file *xp, *kfp;
 	struct devdesc *rootdev;
 	struct file_metadata *md;
 	vm_offset_t addr;
 	uint64_t kernend;
 	uint64_t envp;
 	vm_offset_t size;
 	char *rootdevname;
 	int howto;
 #if defined(LOADER_FDT_SUPPORT)
 	vm_offset_t dtbp;
 	int dtb_size;
 #endif
+#if defined(__arm__)
+	vm_offset_t vaddr;
+	int i;
+	/*
+	 * These metadata addreses must be converted for kernel after
+	 * relocation.
+	 */
+	uint32_t		mdt[] = {
+	    MODINFOMD_SSYM, MODINFOMD_ESYM, MODINFOMD_KERNEND,
+	    MODINFOMD_ENVP,
+#if defined(LOADER_FDT_SUPPORT)
+	    MODINFOMD_DTBP
+#endif
+	};
+#endif
 
 	howto = bi_getboothowto(args);
 
 	/*
 	 * Allow the environment variable 'rootdev' to override the supplied
 	 * device. This should perhaps go to MI code and/or have $rootdev
 	 * tested/set by MI code before launching the kernel.
 	 */
 	rootdevname = getenv("rootdev");
 	archsw.arch_getdev((void**)(&rootdev), rootdevname, NULL);
 	if (rootdev == NULL) {
 		printf("Can't determine root device.\n");
 		return(EINVAL);
 	}
 
 	/* Try reading the /etc/fstab file to select the root device */
 	getrootmount(efi_fmtdev((void *)rootdev));
 
 	addr = 0;
 	for (xp = file_findfile(NULL, NULL); xp != NULL; xp = xp->f_next) {
 		if (addr < (xp->f_addr + xp->f_size))
 			addr = xp->f_addr + xp->f_size;
 	}
 
 	/* Pad to a page boundary. */
 	addr = roundup(addr, PAGE_SIZE);
 
 	/* Copy our environment. */
 	envp = addr;
 	addr = bi_copyenv(addr);
 
 	/* Pad to a page boundary. */
 	addr = roundup(addr, PAGE_SIZE);
 
 #if defined(LOADER_FDT_SUPPORT)
 	/* Handle device tree blob */
 	dtbp = addr;
 	dtb_size = fdt_copy(addr);
 		
 	/* Pad to a page boundary */
 	if (dtb_size)
 		addr += roundup(dtb_size, PAGE_SIZE);
 #endif
 
 	kfp = file_findfile(NULL, "elf kernel");
 	if (kfp == NULL)
 		kfp = file_findfile(NULL, "elf64 kernel");
 	if (kfp == NULL)
 		panic("can't find kernel file");
 	kernend = 0;	/* fill it in later */
 	file_addmetadata(kfp, MODINFOMD_HOWTO, sizeof howto, &howto);
 	file_addmetadata(kfp, MODINFOMD_ENVP, sizeof envp, &envp);
 #if defined(LOADER_FDT_SUPPORT)
 	if (dtb_size)
 		file_addmetadata(kfp, MODINFOMD_DTBP, sizeof dtbp, &dtbp);
 	else
 		pager_output("WARNING! Trying to fire up the kernel, but no "
 		    "device tree blob found!\n");
 #endif
 	file_addmetadata(kfp, MODINFOMD_KERNEND, sizeof kernend, &kernend);
 
 	bi_load_efi_data(kfp);
 
 	/* Figure out the size and location of the metadata. */
 	*modulep = addr;
 	size = bi_copymodules(0);
 	kernend = roundup(addr + size, PAGE_SIZE);
 	*kernendp = kernend;
 
 	/* patch MODINFOMD_KERNEND */
 	md = file_findmetadata(kfp, MODINFOMD_KERNEND);
 	bcopy(&kernend, md->md_data, sizeof kernend);
+
+#if defined(__arm__)
+	*modulep -= __elfN(relocation_offset);
+
+	/* Do relocation fixup on metadata of each module. */
+	for (xp = file_findfile(NULL, NULL); xp != NULL; xp = xp->f_next) {
+		for (i = 0; i < sizeof mdt / sizeof mdt[0]; i++) {
+			md = file_findmetadata(xp, mdt[i]);
+			if (md) {
+				bcopy(md->md_data, &vaddr, sizeof vaddr);
+				vaddr -= __elfN(relocation_offset);
+				bcopy(&vaddr, md->md_data, sizeof vaddr);
+			}
+		}
+	}
+#endif
 
 	/* Copy module list and metadata. */
 	(void)bi_copymodules(addr);
 
 	return (0);
 }
Index: projects/ifnet/sys/boot/efi/loader/copy.c
===================================================================
--- projects/ifnet/sys/boot/efi/loader/copy.c	(revision 281172)
+++ projects/ifnet/sys/boot/efi/loader/copy.c	(revision 281173)
@@ -1,121 +1,133 @@
 /*-
  * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Benno Rice under sponsorship from
  * the FreeBSD Foundation.
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 
 #include <stand.h>
 #include <bootstrap.h>
 
 #include <efi.h>
 #include <efilib.h>
 
 #ifndef EFI_STAGING_SIZE
 #define	EFI_STAGING_SIZE	32
 #endif
 
 #define	STAGE_PAGES	((EFI_STAGING_SIZE) * 1024 * 1024 / 4096)
 
 EFI_PHYSICAL_ADDRESS	staging, staging_end;
 int			stage_offset_set = 0;
 ssize_t			stage_offset;
 
 int
 efi_copy_init(void)
 {
 	EFI_STATUS	status;
 
 	status = BS->AllocatePages(AllocateAnyPages, EfiLoaderData,
 	    STAGE_PAGES, &staging);
 	if (EFI_ERROR(status)) {
 		printf("failed to allocate staging area: %lu\n",
 		    (unsigned long)(status & EFI_ERROR_MASK));
 		return (status);
 	}
 	staging_end = staging + STAGE_PAGES * 4096;
 
+#ifdef __arm__
+	/* Round the kernel load address to a 2MiB value */
+	staging = roundup2(staging, 2 * 1024 * 1024);
+#endif
+
 	return (0);
+}
+
+void *
+efi_translate(vm_offset_t ptr)
+{
+
+	return ((void *)(ptr + stage_offset));
 }
 
 ssize_t
 efi_copyin(const void *src, vm_offset_t dest, const size_t len)
 {
 
 	if (!stage_offset_set) {
 		stage_offset = (vm_offset_t)staging - dest;
 		stage_offset_set = 1;
 	}
 
 	/* XXX: Callers do not check for failure. */
 	if (dest + stage_offset + len > staging_end) {
 		errno = ENOMEM;
 		return (-1);
 	}
 	bcopy(src, (void *)(dest + stage_offset), len);
 	return (len);
 }
 
 ssize_t
 efi_copyout(const vm_offset_t src, void *dest, const size_t len)
 {
 
 	/* XXX: Callers do not check for failure. */
 	if (src + stage_offset + len > staging_end) {
 		errno = ENOMEM;
 		return (-1);
 	}
 	bcopy((void *)(src + stage_offset), dest, len);
 	return (len);
 }
 
 
 ssize_t
 efi_readin(const int fd, vm_offset_t dest, const size_t len)
 {
 
 	if (dest + stage_offset + len > staging_end) {
 		errno = ENOMEM;
 		return (-1);
 	}
 	return (read(fd, (void *)(dest + stage_offset), len));
 }
 
 void
 efi_copy_finish(void)
 {
 	uint64_t	*src, *dst, *last;
 
 	src = (uint64_t *)staging;
 	dst = (uint64_t *)(staging - stage_offset);
 	last = (uint64_t *)(staging + STAGE_PAGES * EFI_PAGE_SIZE);
 
 	while (src < last)
 		*dst++ = *src++;
 }
Index: projects/ifnet/sys/boot
===================================================================
--- projects/ifnet/sys/boot	(revision 281172)
+++ projects/ifnet/sys/boot	(revision 281173)

Property changes on: projects/ifnet/sys/boot
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/boot:r281144-281172
Index: projects/ifnet/sys/dev/acpica/acpi.c
===================================================================
--- projects/ifnet/sys/dev/acpica/acpi.c	(revision 281172)
+++ projects/ifnet/sys/dev/acpica/acpi.c	(revision 281173)
@@ -1,4027 +1,4027 @@
 /*-
  * Copyright (c) 2000 Takanori Watanabe <takawata@jp.freebsd.org>
  * Copyright (c) 2000 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
  * Copyright (c) 2000, 2001 Michael Smith
  * Copyright (c) 2000 BSDi
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_acpi.h"
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/ioccom.h>
 #include <sys/reboot.h>
 #include <sys/sysctl.h>
 #include <sys/ctype.h>
 #include <sys/linker.h>
 #include <sys/power.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/timetc.h>
 
 #if defined(__i386__) || defined(__amd64__)
 #include <machine/pci_cfgreg.h>
 #endif
 #include <machine/resource.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <isa/isavar.h>
 #include <isa/pnpvar.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 #include <contrib/dev/acpica/include/accommon.h>
 #include <contrib/dev/acpica/include/acnamesp.h>
 
 #include <dev/acpica/acpivar.h>
 #include <dev/acpica/acpiio.h>
 
 #include <vm/vm_param.h>
 
 static MALLOC_DEFINE(M_ACPIDEV, "acpidev", "ACPI devices");
 
 /* Hooks for the ACPI CA debugging infrastructure */
 #define _COMPONENT	ACPI_BUS
 ACPI_MODULE_NAME("ACPI")
 
 static d_open_t		acpiopen;
 static d_close_t	acpiclose;
 static d_ioctl_t	acpiioctl;
 
 static struct cdevsw acpi_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_open =	acpiopen,
 	.d_close =	acpiclose,
 	.d_ioctl =	acpiioctl,
 	.d_name =	"acpi",
 };
 
 struct acpi_interface {
 	ACPI_STRING	*data;
 	int		num;
 };
 
 /* Global mutex for locking access to the ACPI subsystem. */
 struct mtx	acpi_mutex;
 struct callout	acpi_sleep_timer;
 
 /* Bitmap of device quirks. */
 int		acpi_quirks;
 
 /* Supported sleep states. */
 static BOOLEAN	acpi_sleep_states[ACPI_S_STATE_COUNT];
 
 static void	acpi_lookup(void *arg, const char *name, device_t *dev);
 static int	acpi_modevent(struct module *mod, int event, void *junk);
 static int	acpi_probe(device_t dev);
 static int	acpi_attach(device_t dev);
 static int	acpi_suspend(device_t dev);
 static int	acpi_resume(device_t dev);
 static int	acpi_shutdown(device_t dev);
 static device_t	acpi_add_child(device_t bus, u_int order, const char *name,
 			int unit);
 static int	acpi_print_child(device_t bus, device_t child);
 static void	acpi_probe_nomatch(device_t bus, device_t child);
 static void	acpi_driver_added(device_t dev, driver_t *driver);
 static int	acpi_read_ivar(device_t dev, device_t child, int index,
 			uintptr_t *result);
 static int	acpi_write_ivar(device_t dev, device_t child, int index,
 			uintptr_t value);
 static struct resource_list *acpi_get_rlist(device_t dev, device_t child);
 static void	acpi_reserve_resources(device_t dev);
 static int	acpi_sysres_alloc(device_t dev);
 static int	acpi_set_resource(device_t dev, device_t child, int type,
 			int rid, u_long start, u_long count);
 static struct resource *acpi_alloc_resource(device_t bus, device_t child,
 			int type, int *rid, u_long start, u_long end,
 			u_long count, u_int flags);
 static int	acpi_adjust_resource(device_t bus, device_t child, int type,
 			struct resource *r, u_long start, u_long end);
 static int	acpi_release_resource(device_t bus, device_t child, int type,
 			int rid, struct resource *r);
 static void	acpi_delete_resource(device_t bus, device_t child, int type,
 		    int rid);
 static uint32_t	acpi_isa_get_logicalid(device_t dev);
 static int	acpi_isa_get_compatid(device_t dev, uint32_t *cids, int count);
 static char	*acpi_device_id_probe(device_t bus, device_t dev, char **ids);
 static ACPI_STATUS acpi_device_eval_obj(device_t bus, device_t dev,
 		    ACPI_STRING pathname, ACPI_OBJECT_LIST *parameters,
 		    ACPI_BUFFER *ret);
 static ACPI_STATUS acpi_device_scan_cb(ACPI_HANDLE h, UINT32 level,
 		    void *context, void **retval);
 static ACPI_STATUS acpi_device_scan_children(device_t bus, device_t dev,
 		    int max_depth, acpi_scan_cb_t user_fn, void *arg);
 static int	acpi_set_powerstate(device_t child, int state);
 static int	acpi_isa_pnp_probe(device_t bus, device_t child,
 		    struct isa_pnp_id *ids);
 static void	acpi_probe_children(device_t bus);
 static void	acpi_probe_order(ACPI_HANDLE handle, int *order);
 static ACPI_STATUS acpi_probe_child(ACPI_HANDLE handle, UINT32 level,
 		    void *context, void **status);
 static void	acpi_sleep_enable(void *arg);
 static ACPI_STATUS acpi_sleep_disable(struct acpi_softc *sc);
 static ACPI_STATUS acpi_EnterSleepState(struct acpi_softc *sc, int state);
 static void	acpi_shutdown_final(void *arg, int howto);
 static void	acpi_enable_fixed_events(struct acpi_softc *sc);
 static BOOLEAN	acpi_has_hid(ACPI_HANDLE handle);
 static void	acpi_resync_clock(struct acpi_softc *sc);
 static int	acpi_wake_sleep_prep(ACPI_HANDLE handle, int sstate);
 static int	acpi_wake_run_prep(ACPI_HANDLE handle, int sstate);
 static int	acpi_wake_prep_walk(int sstate);
 static int	acpi_wake_sysctl_walk(device_t dev);
 static int	acpi_wake_set_sysctl(SYSCTL_HANDLER_ARGS);
 static void	acpi_system_eventhandler_sleep(void *arg, int state);
 static void	acpi_system_eventhandler_wakeup(void *arg, int state);
 static int	acpi_sname2sstate(const char *sname);
 static const char *acpi_sstate2sname(int sstate);
 static int	acpi_supported_sleep_state_sysctl(SYSCTL_HANDLER_ARGS);
 static int	acpi_sleep_state_sysctl(SYSCTL_HANDLER_ARGS);
 static int	acpi_debug_objects_sysctl(SYSCTL_HANDLER_ARGS);
 static int	acpi_pm_func(u_long cmd, void *arg, ...);
 static int	acpi_child_location_str_method(device_t acdev, device_t child,
 					       char *buf, size_t buflen);
 static int	acpi_child_pnpinfo_str_method(device_t acdev, device_t child,
 					      char *buf, size_t buflen);
 #if defined(__i386__) || defined(__amd64__)
 static void	acpi_enable_pcie(void);
 #endif
 static void	acpi_hint_device_unit(device_t acdev, device_t child,
 		    const char *name, int *unitp);
 static void	acpi_reset_interfaces(device_t dev);
 
 static device_method_t acpi_methods[] = {
     /* Device interface */
     DEVMETHOD(device_probe,		acpi_probe),
     DEVMETHOD(device_attach,		acpi_attach),
     DEVMETHOD(device_shutdown,		acpi_shutdown),
     DEVMETHOD(device_detach,		bus_generic_detach),
     DEVMETHOD(device_suspend,		acpi_suspend),
     DEVMETHOD(device_resume,		acpi_resume),
 
     /* Bus interface */
     DEVMETHOD(bus_add_child,		acpi_add_child),
     DEVMETHOD(bus_print_child,		acpi_print_child),
     DEVMETHOD(bus_probe_nomatch,	acpi_probe_nomatch),
     DEVMETHOD(bus_driver_added,		acpi_driver_added),
     DEVMETHOD(bus_read_ivar,		acpi_read_ivar),
     DEVMETHOD(bus_write_ivar,		acpi_write_ivar),
     DEVMETHOD(bus_get_resource_list,	acpi_get_rlist),
     DEVMETHOD(bus_set_resource,		acpi_set_resource),
     DEVMETHOD(bus_get_resource,		bus_generic_rl_get_resource),
     DEVMETHOD(bus_alloc_resource,	acpi_alloc_resource),
     DEVMETHOD(bus_adjust_resource,	acpi_adjust_resource),
     DEVMETHOD(bus_release_resource,	acpi_release_resource),
     DEVMETHOD(bus_delete_resource,	acpi_delete_resource),
     DEVMETHOD(bus_child_pnpinfo_str,	acpi_child_pnpinfo_str_method),
     DEVMETHOD(bus_child_location_str,	acpi_child_location_str_method),
     DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
     DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
     DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
     DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
     DEVMETHOD(bus_hint_device_unit,	acpi_hint_device_unit),
     DEVMETHOD(bus_get_domain,		acpi_get_domain),
 
     /* ACPI bus */
     DEVMETHOD(acpi_id_probe,		acpi_device_id_probe),
     DEVMETHOD(acpi_evaluate_object,	acpi_device_eval_obj),
     DEVMETHOD(acpi_pwr_for_sleep,	acpi_device_pwr_for_sleep),
     DEVMETHOD(acpi_scan_children,	acpi_device_scan_children),
 
     /* ISA emulation */
     DEVMETHOD(isa_pnp_probe,		acpi_isa_pnp_probe),
 
     DEVMETHOD_END
 };
 
 static driver_t acpi_driver = {
     "acpi",
     acpi_methods,
     sizeof(struct acpi_softc),
 };
 
 static devclass_t acpi_devclass;
 DRIVER_MODULE(acpi, nexus, acpi_driver, acpi_devclass, acpi_modevent, 0);
 MODULE_VERSION(acpi, 1);
 
 ACPI_SERIAL_DECL(acpi, "ACPI root bus");
 
 /* Local pools for managing system resources for ACPI child devices. */
 static struct rman acpi_rman_io, acpi_rman_mem;
 
 #define ACPI_MINIMUM_AWAKETIME	5
 
 /* Holds the description of the acpi0 device. */
 static char acpi_desc[ACPI_OEM_ID_SIZE + ACPI_OEM_TABLE_ID_SIZE + 2];
 
 SYSCTL_NODE(_debug, OID_AUTO, acpi, CTLFLAG_RD, NULL, "ACPI debugging");
 static char acpi_ca_version[12];
 SYSCTL_STRING(_debug_acpi, OID_AUTO, acpi_ca_version, CTLFLAG_RD,
 	      acpi_ca_version, 0, "Version of Intel ACPI-CA");
 
 /*
  * Allow overriding _OSI methods.
  */
 static char acpi_install_interface[256];
 TUNABLE_STR("hw.acpi.install_interface", acpi_install_interface,
     sizeof(acpi_install_interface));
 static char acpi_remove_interface[256];
 TUNABLE_STR("hw.acpi.remove_interface", acpi_remove_interface,
     sizeof(acpi_remove_interface));
 
 /* Allow users to dump Debug objects without ACPI debugger. */
 static int acpi_debug_objects;
 TUNABLE_INT("debug.acpi.enable_debug_objects", &acpi_debug_objects);
 SYSCTL_PROC(_debug_acpi, OID_AUTO, enable_debug_objects,
     CTLFLAG_RW | CTLTYPE_INT, NULL, 0, acpi_debug_objects_sysctl, "I",
     "Enable Debug objects");
 
 /* Allow the interpreter to ignore common mistakes in BIOS. */
 static int acpi_interpreter_slack = 1;
 TUNABLE_INT("debug.acpi.interpreter_slack", &acpi_interpreter_slack);
 SYSCTL_INT(_debug_acpi, OID_AUTO, interpreter_slack, CTLFLAG_RDTUN,
     &acpi_interpreter_slack, 1, "Turn on interpreter slack mode.");
 
 /* Ignore register widths set by FADT and use default widths instead. */
 static int acpi_ignore_reg_width = 1;
 TUNABLE_INT("debug.acpi.default_register_width", &acpi_ignore_reg_width);
 SYSCTL_INT(_debug_acpi, OID_AUTO, default_register_width, CTLFLAG_RDTUN,
     &acpi_ignore_reg_width, 1, "Ignore register widths set by FADT");
 
 #ifdef __amd64__
 /* Reset system clock while resuming.  XXX Remove once tested. */
 static int acpi_reset_clock = 1;
 TUNABLE_INT("debug.acpi.reset_clock", &acpi_reset_clock);
 SYSCTL_INT(_debug_acpi, OID_AUTO, reset_clock, CTLFLAG_RW,
     &acpi_reset_clock, 1, "Reset system clock while resuming.");
 #endif
 
 /* Allow users to override quirks. */
 TUNABLE_INT("debug.acpi.quirks", &acpi_quirks);
 
 static int acpi_susp_bounce;
 SYSCTL_INT(_debug_acpi, OID_AUTO, suspend_bounce, CTLFLAG_RW,
     &acpi_susp_bounce, 0, "Don't actually suspend, just test devices.");
 
 /*
  * ACPI can only be loaded as a module by the loader; activating it after
  * system bootstrap time is not useful, and can be fatal to the system.
  * It also cannot be unloaded, since the entire system bus hierarchy hangs
  * off it.
  */
 static int
 acpi_modevent(struct module *mod, int event, void *junk)
 {
     switch (event) {
     case MOD_LOAD:
 	if (!cold) {
 	    printf("The ACPI driver cannot be loaded after boot.\n");
 	    return (EPERM);
 	}
 	break;
     case MOD_UNLOAD:
 	if (!cold && power_pm_get_type() == POWER_PM_TYPE_ACPI)
 	    return (EBUSY);
 	break;
     default:
 	break;
     }
     return (0);
 }
 
 /*
  * Perform early initialization.
  */
 ACPI_STATUS
 acpi_Startup(void)
 {
     static int started = 0;
     ACPI_STATUS status;
     int val;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     /* Only run the startup code once.  The MADT driver also calls this. */
     if (started)
 	return_VALUE (AE_OK);
     started = 1;
 
     /*
      * Pre-allocate space for RSDT/XSDT and DSDT tables and allow resizing
      * if more tables exist.
      */
     if (ACPI_FAILURE(status = AcpiInitializeTables(NULL, 2, TRUE))) {
 	printf("ACPI: Table initialisation failed: %s\n",
 	    AcpiFormatException(status));
 	return_VALUE (status);
     }
 
     /* Set up any quirks we have for this system. */
     if (acpi_quirks == ACPI_Q_OK)
 	acpi_table_quirks(&acpi_quirks);
 
     /* If the user manually set the disabled hint to 0, force-enable ACPI. */
     if (resource_int_value("acpi", 0, "disabled", &val) == 0 && val == 0)
 	acpi_quirks &= ~ACPI_Q_BROKEN;
     if (acpi_quirks & ACPI_Q_BROKEN) {
 	printf("ACPI disabled by blacklist.  Contact your BIOS vendor.\n");
 	status = AE_SUPPORT;
     }
 
     return_VALUE (status);
 }
 
 /*
  * Detect ACPI and perform early initialisation.
  */
 int
 acpi_identify(void)
 {
     ACPI_TABLE_RSDP	*rsdp;
     ACPI_TABLE_HEADER	*rsdt;
     ACPI_PHYSICAL_ADDRESS paddr;
     struct sbuf		sb;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (!cold)
 	return (ENXIO);
 
     /* Check that we haven't been disabled with a hint. */
     if (resource_disabled("acpi", 0))
 	return (ENXIO);
 
     /* Check for other PM systems. */
     if (power_pm_get_type() != POWER_PM_TYPE_NONE &&
 	power_pm_get_type() != POWER_PM_TYPE_ACPI) {
 	printf("ACPI identify failed, other PM system enabled.\n");
 	return (ENXIO);
     }
 
     /* Initialize root tables. */
     if (ACPI_FAILURE(acpi_Startup())) {
 	printf("ACPI: Try disabling either ACPI or apic support.\n");
 	return (ENXIO);
     }
 
     if ((paddr = AcpiOsGetRootPointer()) == 0 ||
 	(rsdp = AcpiOsMapMemory(paddr, sizeof(ACPI_TABLE_RSDP))) == NULL)
 	return (ENXIO);
     if (rsdp->Revision > 1 && rsdp->XsdtPhysicalAddress != 0)
 	paddr = (ACPI_PHYSICAL_ADDRESS)rsdp->XsdtPhysicalAddress;
     else
 	paddr = (ACPI_PHYSICAL_ADDRESS)rsdp->RsdtPhysicalAddress;
     AcpiOsUnmapMemory(rsdp, sizeof(ACPI_TABLE_RSDP));
 
     if ((rsdt = AcpiOsMapMemory(paddr, sizeof(ACPI_TABLE_HEADER))) == NULL)
 	return (ENXIO);
     sbuf_new(&sb, acpi_desc, sizeof(acpi_desc), SBUF_FIXEDLEN);
     sbuf_bcat(&sb, rsdt->OemId, ACPI_OEM_ID_SIZE);
     sbuf_trim(&sb);
     sbuf_putc(&sb, ' ');
     sbuf_bcat(&sb, rsdt->OemTableId, ACPI_OEM_TABLE_ID_SIZE);
     sbuf_trim(&sb);
     sbuf_finish(&sb);
     sbuf_delete(&sb);
     AcpiOsUnmapMemory(rsdt, sizeof(ACPI_TABLE_HEADER));
 
     snprintf(acpi_ca_version, sizeof(acpi_ca_version), "%x", ACPI_CA_VERSION);
 
     return (0);
 }
 
 /*
  * Fetch some descriptive data from ACPI to put in our attach message.
  */
 static int
 acpi_probe(device_t dev)
 {
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     device_set_desc(dev, acpi_desc);
 
     return_VALUE (BUS_PROBE_NOWILDCARD);
 }
 
 static int
 acpi_attach(device_t dev)
 {
     struct acpi_softc	*sc;
     ACPI_STATUS		status;
     int			error, state;
     UINT32		flags;
     UINT8		TypeA, TypeB;
     char		*env;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     sc = device_get_softc(dev);
     sc->acpi_dev = dev;
     callout_init(&sc->susp_force_to, TRUE);
 
     error = ENXIO;
 
     /* Initialize resource manager. */
     acpi_rman_io.rm_type = RMAN_ARRAY;
     acpi_rman_io.rm_start = 0;
     acpi_rman_io.rm_end = 0xffff;
     acpi_rman_io.rm_descr = "ACPI I/O ports";
     if (rman_init(&acpi_rman_io) != 0)
 	panic("acpi rman_init IO ports failed");
     acpi_rman_mem.rm_type = RMAN_ARRAY;
     acpi_rman_mem.rm_start = 0;
     acpi_rman_mem.rm_end = ~0ul;
     acpi_rman_mem.rm_descr = "ACPI I/O memory addresses";
     if (rman_init(&acpi_rman_mem) != 0)
 	panic("acpi rman_init memory failed");
 
     /* Initialise the ACPI mutex */
     mtx_init(&acpi_mutex, "ACPI global lock", NULL, MTX_DEF);
 
     /*
      * Set the globals from our tunables.  This is needed because ACPI-CA
      * uses UINT8 for some values and we have no tunable_byte.
      */
     AcpiGbl_EnableInterpreterSlack = acpi_interpreter_slack ? TRUE : FALSE;
     AcpiGbl_EnableAmlDebugObject = acpi_debug_objects ? TRUE : FALSE;
     AcpiGbl_UseDefaultRegisterWidths = acpi_ignore_reg_width ? TRUE : FALSE;
 
 #ifndef ACPI_DEBUG
     /*
      * Disable all debugging layers and levels.
      */
     AcpiDbgLayer = 0;
     AcpiDbgLevel = 0;
 #endif
 
     /* Start up the ACPI CA subsystem. */
     status = AcpiInitializeSubsystem();
     if (ACPI_FAILURE(status)) {
 	device_printf(dev, "Could not initialize Subsystem: %s\n",
 		      AcpiFormatException(status));
 	goto out;
     }
 
     /* Override OS interfaces if the user requested. */
     acpi_reset_interfaces(dev);
 
     /* Load ACPI name space. */
     status = AcpiLoadTables();
     if (ACPI_FAILURE(status)) {
 	device_printf(dev, "Could not load Namespace: %s\n",
 		      AcpiFormatException(status));
 	goto out;
     }
 
 #if defined(__i386__) || defined(__amd64__)
     /* Handle MCFG table if present. */
     acpi_enable_pcie();
 #endif
 
     /*
      * Note that some systems (specifically, those with namespace evaluation
      * issues that require the avoidance of parts of the namespace) must
      * avoid running _INI and _STA on everything, as well as dodging the final
      * object init pass.
      *
      * For these devices, we set ACPI_NO_DEVICE_INIT and ACPI_NO_OBJECT_INIT).
      *
      * XXX We should arrange for the object init pass after we have attached
      *     all our child devices, but on many systems it works here.
      */
     flags = 0;
     if (testenv("debug.acpi.avoid"))
 	flags = ACPI_NO_DEVICE_INIT | ACPI_NO_OBJECT_INIT;
 
     /* Bring the hardware and basic handlers online. */
     if (ACPI_FAILURE(status = AcpiEnableSubsystem(flags))) {
 	device_printf(dev, "Could not enable ACPI: %s\n",
 		      AcpiFormatException(status));
 	goto out;
     }
 
     /*
      * Call the ECDT probe function to provide EC functionality before
      * the namespace has been evaluated.
      *
      * XXX This happens before the sysresource devices have been probed and
      * attached so its resources come from nexus0.  In practice, this isn't
      * a problem but should be addressed eventually.
      */
     acpi_ec_ecdt_probe(dev);
 
     /* Bring device objects and regions online. */
     if (ACPI_FAILURE(status = AcpiInitializeObjects(flags))) {
 	device_printf(dev, "Could not initialize ACPI objects: %s\n",
 		      AcpiFormatException(status));
 	goto out;
     }
 
     /*
      * Setup our sysctl tree.
      *
      * XXX: This doesn't check to make sure that none of these fail.
      */
     sysctl_ctx_init(&sc->acpi_sysctl_ctx);
     sc->acpi_sysctl_tree = SYSCTL_ADD_NODE(&sc->acpi_sysctl_ctx,
 			       SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
 			       device_get_name(dev), CTLFLAG_RD, 0, "");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "supported_sleep_state", CTLTYPE_STRING | CTLFLAG_RD,
 	0, 0, acpi_supported_sleep_state_sysctl, "A", "");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "power_button_state", CTLTYPE_STRING | CTLFLAG_RW,
 	&sc->acpi_power_button_sx, 0, acpi_sleep_state_sysctl, "A", "");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "sleep_button_state", CTLTYPE_STRING | CTLFLAG_RW,
 	&sc->acpi_sleep_button_sx, 0, acpi_sleep_state_sysctl, "A", "");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "lid_switch_state", CTLTYPE_STRING | CTLFLAG_RW,
 	&sc->acpi_lid_switch_sx, 0, acpi_sleep_state_sysctl, "A", "");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "standby_state", CTLTYPE_STRING | CTLFLAG_RW,
 	&sc->acpi_standby_sx, 0, acpi_sleep_state_sysctl, "A", "");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "suspend_state", CTLTYPE_STRING | CTLFLAG_RW,
 	&sc->acpi_suspend_sx, 0, acpi_sleep_state_sysctl, "A", "");
     SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "sleep_delay", CTLFLAG_RW, &sc->acpi_sleep_delay, 0,
 	"sleep delay in seconds");
     SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "s4bios", CTLFLAG_RW, &sc->acpi_s4bios, 0, "S4BIOS mode");
     SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "verbose", CTLFLAG_RW, &sc->acpi_verbose, 0, "verbose mode");
     SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "disable_on_reboot", CTLFLAG_RW,
 	&sc->acpi_do_disable, 0, "Disable ACPI when rebooting/halting system");
     SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "handle_reboot", CTLFLAG_RW,
 	&sc->acpi_handle_reboot, 0, "Use ACPI Reset Register to reboot");
 
     /*
      * Default to 1 second before sleeping to give some machines time to
      * stabilize.
      */
     sc->acpi_sleep_delay = 1;
     if (bootverbose)
 	sc->acpi_verbose = 1;
     if ((env = kern_getenv("hw.acpi.verbose")) != NULL) {
 	if (strcmp(env, "0") != 0)
 	    sc->acpi_verbose = 1;
 	freeenv(env);
     }
 
     /* Only enable reboot by default if the FADT says it is available. */
     if (AcpiGbl_FADT.Flags & ACPI_FADT_RESET_REGISTER)
 	sc->acpi_handle_reboot = 1;
 
     /* Only enable S4BIOS by default if the FACS says it is available. */
     if (AcpiGbl_FACS->Flags & ACPI_FACS_S4_BIOS_PRESENT)
 	sc->acpi_s4bios = 1;
 
     /* Probe all supported sleep states. */
     acpi_sleep_states[ACPI_STATE_S0] = TRUE;
     for (state = ACPI_STATE_S1; state < ACPI_S_STATE_COUNT; state++)
 	if (ACPI_SUCCESS(AcpiEvaluateObject(ACPI_ROOT_OBJECT,
 	    __DECONST(char *, AcpiGbl_SleepStateNames[state]), NULL, NULL)) &&
 	    ACPI_SUCCESS(AcpiGetSleepTypeData(state, &TypeA, &TypeB)))
 	    acpi_sleep_states[state] = TRUE;
 
     /*
      * Dispatch the default sleep state to devices.  The lid switch is set
      * to UNKNOWN by default to avoid surprising users.
      */
     sc->acpi_power_button_sx = acpi_sleep_states[ACPI_STATE_S5] ?
 	ACPI_STATE_S5 : ACPI_STATE_UNKNOWN;
     sc->acpi_lid_switch_sx = ACPI_STATE_UNKNOWN;
     sc->acpi_standby_sx = acpi_sleep_states[ACPI_STATE_S1] ?
 	ACPI_STATE_S1 : ACPI_STATE_UNKNOWN;
     sc->acpi_suspend_sx = acpi_sleep_states[ACPI_STATE_S3] ?
 	ACPI_STATE_S3 : ACPI_STATE_UNKNOWN;
 
     /* Pick the first valid sleep state for the sleep button default. */
     sc->acpi_sleep_button_sx = ACPI_STATE_UNKNOWN;
     for (state = ACPI_STATE_S1; state <= ACPI_STATE_S4; state++)
 	if (acpi_sleep_states[state]) {
 	    sc->acpi_sleep_button_sx = state;
 	    break;
 	}
 
     acpi_enable_fixed_events(sc);
 
     /*
      * Scan the namespace and attach/initialise children.
      */
 
     /* Register our shutdown handler. */
     EVENTHANDLER_REGISTER(shutdown_final, acpi_shutdown_final, sc,
 	SHUTDOWN_PRI_LAST);
 
     /*
      * Register our acpi event handlers.
      * XXX should be configurable eg. via userland policy manager.
      */
     EVENTHANDLER_REGISTER(acpi_sleep_event, acpi_system_eventhandler_sleep,
 	sc, ACPI_EVENT_PRI_LAST);
     EVENTHANDLER_REGISTER(acpi_wakeup_event, acpi_system_eventhandler_wakeup,
 	sc, ACPI_EVENT_PRI_LAST);
 
     /* Flag our initial states. */
     sc->acpi_enabled = TRUE;
     sc->acpi_sstate = ACPI_STATE_S0;
     sc->acpi_sleep_disabled = TRUE;
 
     /* Create the control device */
     sc->acpi_dev_t = make_dev(&acpi_cdevsw, 0, UID_ROOT, GID_WHEEL, 0644,
 			      "acpi");
     sc->acpi_dev_t->si_drv1 = sc;
 
     if ((error = acpi_machdep_init(dev)))
 	goto out;
 
     /* Register ACPI again to pass the correct argument of pm_func. */
     power_pm_register(POWER_PM_TYPE_ACPI, acpi_pm_func, sc);
 
     if (!acpi_disabled("bus")) {
 	EVENTHANDLER_REGISTER(dev_lookup, acpi_lookup, NULL, 1000);
 	acpi_probe_children(dev);
     }
 
     /* Update all GPEs and enable runtime GPEs. */
     status = AcpiUpdateAllGpes();
     if (ACPI_FAILURE(status))
 	device_printf(dev, "Could not update all GPEs: %s\n",
 	    AcpiFormatException(status));
 
     /* Allow sleep request after a while. */
     callout_init_mtx(&acpi_sleep_timer, &acpi_mutex, 0);
     callout_reset(&acpi_sleep_timer, hz * ACPI_MINIMUM_AWAKETIME,
 	acpi_sleep_enable, sc);
 
     error = 0;
 
  out:
     return_VALUE (error);
 }
 
 static void
 acpi_set_power_children(device_t dev, int state)
 {
 	device_t child;
 	device_t *devlist;
 	int dstate, i, numdevs;
 
 	if (device_get_children(dev, &devlist, &numdevs) != 0)
 		return;
 
 	/*
 	 * Retrieve and set D-state for the sleep state if _SxD is present.
 	 * Skip children who aren't attached since they are handled separately.
 	 */
 	for (i = 0; i < numdevs; i++) {
 		child = devlist[i];
 		dstate = state;
 		if (device_is_attached(child) &&
 		    acpi_device_pwr_for_sleep(dev, child, &dstate) == 0)
 			acpi_set_powerstate(child, dstate);
 	}
 	free(devlist, M_TEMP);
 }
 
 static int
 acpi_suspend(device_t dev)
 {
     int error;
 
     GIANT_REQUIRED;
 
     error = bus_generic_suspend(dev);
     if (error == 0)
 	acpi_set_power_children(dev, ACPI_STATE_D3);
 
     return (error);
 }
 
 static int
 acpi_resume(device_t dev)
 {
 
     GIANT_REQUIRED;
 
     acpi_set_power_children(dev, ACPI_STATE_D0);
 
     return (bus_generic_resume(dev));
 }
 
 static int
 acpi_shutdown(device_t dev)
 {
 
     GIANT_REQUIRED;
 
     /* Allow children to shutdown first. */
     bus_generic_shutdown(dev);
 
     /*
      * Enable any GPEs that are able to power-on the system (i.e., RTC).
      * Also, disable any that are not valid for this state (most).
      */
     acpi_wake_prep_walk(ACPI_STATE_S5);
 
     return (0);
 }
 
 /*
  * Handle a new device being added
  */
 static device_t
 acpi_add_child(device_t bus, u_int order, const char *name, int unit)
 {
     struct acpi_device	*ad;
     device_t		child;
 
     if ((ad = malloc(sizeof(*ad), M_ACPIDEV, M_NOWAIT | M_ZERO)) == NULL)
 	return (NULL);
 
     resource_list_init(&ad->ad_rl);
 
     child = device_add_child_ordered(bus, order, name, unit);
     if (child != NULL)
 	device_set_ivars(child, ad);
     else
 	free(ad, M_ACPIDEV);
     return (child);
 }
 
 static int
 acpi_print_child(device_t bus, device_t child)
 {
     struct acpi_device	 *adev = device_get_ivars(child);
     struct resource_list *rl = &adev->ad_rl;
     int retval = 0;
 
     retval += bus_print_child_header(bus, child);
     retval += resource_list_print_type(rl, "port",  SYS_RES_IOPORT, "%#lx");
     retval += resource_list_print_type(rl, "iomem", SYS_RES_MEMORY, "%#lx");
     retval += resource_list_print_type(rl, "irq",   SYS_RES_IRQ,    "%ld");
     retval += resource_list_print_type(rl, "drq",   SYS_RES_DRQ,    "%ld");
     if (device_get_flags(child))
 	retval += printf(" flags %#x", device_get_flags(child));
     retval += bus_print_child_domain(bus, child);
     retval += bus_print_child_footer(bus, child);
 
     return (retval);
 }
 
 /*
  * If this device is an ACPI child but no one claimed it, attempt
  * to power it off.  We'll power it back up when a driver is added.
  *
  * XXX Disabled for now since many necessary devices (like fdc and
  * ATA) don't claim the devices we created for them but still expect
  * them to be powered up.
  */
 static void
 acpi_probe_nomatch(device_t bus, device_t child)
 {
 #ifdef ACPI_ENABLE_POWERDOWN_NODRIVER
     acpi_set_powerstate(child, ACPI_STATE_D3);
 #endif
 }
 
 /*
  * If a new driver has a chance to probe a child, first power it up.
  *
  * XXX Disabled for now (see acpi_probe_nomatch for details).
  */
 static void
 acpi_driver_added(device_t dev, driver_t *driver)
 {
     device_t child, *devlist;
     int i, numdevs;
 
     DEVICE_IDENTIFY(driver, dev);
     if (device_get_children(dev, &devlist, &numdevs))
 	    return;
     for (i = 0; i < numdevs; i++) {
 	child = devlist[i];
 	if (device_get_state(child) == DS_NOTPRESENT) {
 #ifdef ACPI_ENABLE_POWERDOWN_NODRIVER
 	    acpi_set_powerstate(child, ACPI_STATE_D0);
 	    if (device_probe_and_attach(child) != 0)
 		acpi_set_powerstate(child, ACPI_STATE_D3);
 #else
 	    device_probe_and_attach(child);
 #endif
 	}
     }
     free(devlist, M_TEMP);
 }
 
 /* Location hint for devctl(8) */
 static int
 acpi_child_location_str_method(device_t cbdev, device_t child, char *buf,
     size_t buflen)
 {
     struct acpi_device *dinfo = device_get_ivars(child);
     char buf2[32];
     int pxm;
 
     if (dinfo->ad_handle) {
         snprintf(buf, buflen, "handle=%s", acpi_name(dinfo->ad_handle));
         if (ACPI_SUCCESS(acpi_GetInteger(dinfo->ad_handle, "_PXM", &pxm))) {
                 snprintf(buf2, 32, " _PXM=%d", pxm);
                 strlcat(buf, buf2, buflen);
         }
     } else {
         snprintf(buf, buflen, "unknown");
     }
     return (0);
 }
 
 /* PnP information for devctl(8) */
 static int
 acpi_child_pnpinfo_str_method(device_t cbdev, device_t child, char *buf,
     size_t buflen)
 {
     struct acpi_device *dinfo = device_get_ivars(child);
     ACPI_DEVICE_INFO *adinfo;
 
     if (ACPI_FAILURE(AcpiGetObjectInfo(dinfo->ad_handle, &adinfo))) {
 	snprintf(buf, buflen, "unknown");
 	return (0);
     }
 
     snprintf(buf, buflen, "_HID=%s _UID=%lu",
 	(adinfo->Valid & ACPI_VALID_HID) ?
 	adinfo->HardwareId.String : "none",
 	(adinfo->Valid & ACPI_VALID_UID) ?
 	strtoul(adinfo->UniqueId.String, NULL, 10) : 0UL);
     AcpiOsFree(adinfo);
 
     return (0);
 }
 
 /*
  * Handle per-device ivars
  */
 static int
 acpi_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
 {
     struct acpi_device	*ad;
 
     if ((ad = device_get_ivars(child)) == NULL) {
 	device_printf(child, "device has no ivars\n");
 	return (ENOENT);
     }
 
     /* ACPI and ISA compatibility ivars */
     switch(index) {
     case ACPI_IVAR_HANDLE:
 	*(ACPI_HANDLE *)result = ad->ad_handle;
 	break;
     case ACPI_IVAR_PRIVATE:
 	*(void **)result = ad->ad_private;
 	break;
     case ACPI_IVAR_FLAGS:
 	*(int *)result = ad->ad_flags;
 	break;
     case ISA_IVAR_VENDORID:
     case ISA_IVAR_SERIAL:
     case ISA_IVAR_COMPATID:
 	*(int *)result = -1;
 	break;
     case ISA_IVAR_LOGICALID:
 	*(int *)result = acpi_isa_get_logicalid(child);
 	break;
     default:
 	return (ENOENT);
     }
 
     return (0);
 }
 
 static int
 acpi_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
 {
     struct acpi_device	*ad;
 
     if ((ad = device_get_ivars(child)) == NULL) {
 	device_printf(child, "device has no ivars\n");
 	return (ENOENT);
     }
 
     switch(index) {
     case ACPI_IVAR_HANDLE:
 	ad->ad_handle = (ACPI_HANDLE)value;
 	break;
     case ACPI_IVAR_PRIVATE:
 	ad->ad_private = (void *)value;
 	break;
     case ACPI_IVAR_FLAGS:
 	ad->ad_flags = (int)value;
 	break;
     default:
 	panic("bad ivar write request (%d)", index);
 	return (ENOENT);
     }
 
     return (0);
 }
 
 /*
  * Handle child resource allocation/removal
  */
 static struct resource_list *
 acpi_get_rlist(device_t dev, device_t child)
 {
     struct acpi_device		*ad;
 
     ad = device_get_ivars(child);
     return (&ad->ad_rl);
 }
 
 static int
 acpi_match_resource_hint(device_t dev, int type, long value)
 {
     struct acpi_device *ad = device_get_ivars(dev);
     struct resource_list *rl = &ad->ad_rl;
     struct resource_list_entry *rle;
 
     STAILQ_FOREACH(rle, rl, link) {
 	if (rle->type != type)
 	    continue;
 	if (rle->start <= value && rle->end >= value)
 	    return (1);
     }
     return (0);
 }
 
 /*
  * Wire device unit numbers based on resource matches in hints.
  */
 static void
 acpi_hint_device_unit(device_t acdev, device_t child, const char *name,
     int *unitp)
 {
     const char *s;
     long value;
     int line, matches, unit;
 
     /*
      * Iterate over all the hints for the devices with the specified
      * name to see if one's resources are a subset of this device.
      */
     line = 0;
     for (;;) {
 	if (resource_find_dev(&line, name, &unit, "at", NULL) != 0)
 	    break;
 
 	/* Must have an "at" for acpi or isa. */
 	resource_string_value(name, unit, "at", &s);
 	if (!(strcmp(s, "acpi0") == 0 || strcmp(s, "acpi") == 0 ||
 	    strcmp(s, "isa0") == 0 || strcmp(s, "isa") == 0))
 	    continue;
 
 	/*
 	 * Check for matching resources.  We must have at least one match.
 	 * Since I/O and memory resources cannot be shared, if we get a
 	 * match on either of those, ignore any mismatches in IRQs or DRQs.
 	 *
 	 * XXX: We may want to revisit this to be more lenient and wire
 	 * as long as it gets one match.
 	 */
 	matches = 0;
 	if (resource_long_value(name, unit, "port", &value) == 0) {
 	    /*
 	     * Floppy drive controllers are notorious for having a
 	     * wide variety of resources not all of which include the
 	     * first port that is specified by the hint (typically
 	     * 0x3f0) (see the comment above fdc_isa_alloc_resources()
 	     * in fdc_isa.c).  However, they do all seem to include
 	     * port + 2 (e.g. 0x3f2) so for a floppy device, look for
 	     * 'value + 2' in the port resources instead of the hint
 	     * value.
 	     */
 	    if (strcmp(name, "fdc") == 0)
 		value += 2;
 	    if (acpi_match_resource_hint(child, SYS_RES_IOPORT, value))
 		matches++;
 	    else
 		continue;
 	}
 	if (resource_long_value(name, unit, "maddr", &value) == 0) {
 	    if (acpi_match_resource_hint(child, SYS_RES_MEMORY, value))
 		matches++;
 	    else
 		continue;
 	}
 	if (matches > 0)
 	    goto matched;
 	if (resource_long_value(name, unit, "irq", &value) == 0) {
 	    if (acpi_match_resource_hint(child, SYS_RES_IRQ, value))
 		matches++;
 	    else
 		continue;
 	}
 	if (resource_long_value(name, unit, "drq", &value) == 0) {
 	    if (acpi_match_resource_hint(child, SYS_RES_DRQ, value))
 		matches++;
 	    else
 		continue;
 	}
 
     matched:
 	if (matches > 0) {
 	    /* We have a winner! */
 	    *unitp = unit;
 	    break;
 	}
     }
 }
 
 /*
  * Fetch the NUMA domain for the given device.
  *
  * If a device has a _PXM method, map that to a NUMA domain.
  *
  * If none is found, then it'll call the parent method.
  * If there's no domain, return ENOENT.
  */
 int
 acpi_get_domain(device_t dev, device_t child, int *domain)
 {
 #if MAXMEMDOM > 1
 	ACPI_HANDLE h;
 	int d, pxm;
 
 	h = acpi_get_handle(child);
 	if ((h != NULL) &&
 	    ACPI_SUCCESS(acpi_GetInteger(h, "_PXM", &pxm))) {
 		d = acpi_map_pxm_to_vm_domainid(pxm);
 		if (d < 0)
 			return (ENOENT);
 		*domain = d;
 		return (0);
 	}
 #endif
 	/* No _PXM node; go up a level */
 	return (bus_generic_get_domain(dev, child, domain));
 }
 
 /*
  * Pre-allocate/manage all memory and IO resources.  Since rman can't handle
  * duplicates, we merge any in the sysresource attach routine.
  */
 static int
 acpi_sysres_alloc(device_t dev)
 {
     struct resource *res;
     struct resource_list *rl;
     struct resource_list_entry *rle;
     struct rman *rm;
     char *sysres_ids[] = { "PNP0C01", "PNP0C02", NULL };
     device_t *children;
     int child_count, i;
 
     /*
      * Probe/attach any sysresource devices.  This would be unnecessary if we
      * had multi-pass probe/attach.
      */
     if (device_get_children(dev, &children, &child_count) != 0)
 	return (ENXIO);
     for (i = 0; i < child_count; i++) {
 	if (ACPI_ID_PROBE(dev, children[i], sysres_ids) != NULL)
 	    device_probe_and_attach(children[i]);
     }
     free(children, M_TEMP);
 
     rl = BUS_GET_RESOURCE_LIST(device_get_parent(dev), dev);
     STAILQ_FOREACH(rle, rl, link) {
 	if (rle->res != NULL) {
 	    device_printf(dev, "duplicate resource for %lx\n", rle->start);
 	    continue;
 	}
 
 	/* Only memory and IO resources are valid here. */
 	switch (rle->type) {
 	case SYS_RES_IOPORT:
 	    rm = &acpi_rman_io;
 	    break;
 	case SYS_RES_MEMORY:
 	    rm = &acpi_rman_mem;
 	    break;
 	default:
 	    continue;
 	}
 
 	/* Pre-allocate resource and add to our rman pool. */
 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), dev, rle->type,
 	    &rle->rid, rle->start, rle->start + rle->count - 1, rle->count, 0);
 	if (res != NULL) {
 	    rman_manage_region(rm, rman_get_start(res), rman_get_end(res));
 	    rle->res = res;
-	} else
+	} else if (bootverbose)
 	    device_printf(dev, "reservation of %lx, %lx (%d) failed\n",
 		rle->start, rle->count, rle->type);
     }
     return (0);
 }
 
 static char *pcilink_ids[] = { "PNP0C0F", NULL };
 static char *sysres_ids[] = { "PNP0C01", "PNP0C02", NULL };
 
 /*
  * Reserve declared resources for devices found during attach once system
  * resources have been allocated.
  */
 static void
 acpi_reserve_resources(device_t dev)
 {
     struct resource_list_entry *rle;
     struct resource_list *rl;
     struct acpi_device *ad;
     struct acpi_softc *sc;
     device_t *children;
     int child_count, i;
 
     sc = device_get_softc(dev);
     if (device_get_children(dev, &children, &child_count) != 0)
 	return;
     for (i = 0; i < child_count; i++) {
 	ad = device_get_ivars(children[i]);
 	rl = &ad->ad_rl;
 
 	/* Don't reserve system resources. */
 	if (ACPI_ID_PROBE(dev, children[i], sysres_ids) != NULL)
 	    continue;
 
 	STAILQ_FOREACH(rle, rl, link) {
 	    /*
 	     * Don't reserve IRQ resources.  There are many sticky things
 	     * to get right otherwise (e.g. IRQs for psm, atkbd, and HPET
 	     * when using legacy routing).
 	     */
 	    if (rle->type == SYS_RES_IRQ)
 		continue;
 
 	    /*
 	     * Don't reserve the resource if it is already allocated.
 	     * The acpi_ec(4) driver can allocate its resources early
 	     * if ECDT is present.
 	     */
 	    if (rle->res != NULL)
 		continue;
 
 	    /*
 	     * Try to reserve the resource from our parent.  If this
 	     * fails because the resource is a system resource, just
 	     * let it be.  The resource range is already reserved so
 	     * that other devices will not use it.  If the driver
 	     * needs to allocate the resource, then
 	     * acpi_alloc_resource() will sub-alloc from the system
 	     * resource.
 	     */
 	    resource_list_reserve(rl, dev, children[i], rle->type, &rle->rid,
 		rle->start, rle->end, rle->count, 0);
 	}
     }
     free(children, M_TEMP);
     sc->acpi_resources_reserved = 1;
 }
 
 static int
 acpi_set_resource(device_t dev, device_t child, int type, int rid,
     u_long start, u_long count)
 {
     struct acpi_softc *sc = device_get_softc(dev);
     struct acpi_device *ad = device_get_ivars(child);
     struct resource_list *rl = &ad->ad_rl;
     ACPI_DEVICE_INFO *devinfo;
     u_long end;
     
     /* Ignore IRQ resources for PCI link devices. */
     if (type == SYS_RES_IRQ && ACPI_ID_PROBE(dev, child, pcilink_ids) != NULL)
 	return (0);
 
     /*
      * Ignore most resources for PCI root bridges.  Some BIOSes
      * incorrectly enumerate the memory ranges they decode as plain
      * memory resources instead of as ResourceProducer ranges.  Other
      * BIOSes incorrectly list system resource entries for I/O ranges
      * under the PCI bridge.  Do allow the one known-correct case on
      * x86 of a PCI bridge claiming the I/O ports used for PCI config
      * access.
      */
     if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) {
 	if (ACPI_SUCCESS(AcpiGetObjectInfo(ad->ad_handle, &devinfo))) {
 	    if ((devinfo->Flags & ACPI_PCI_ROOT_BRIDGE) != 0) {
 #if defined(__i386__) || defined(__amd64__)
 		if (!(type == SYS_RES_IOPORT && start == CONF1_ADDR_PORT))
 #endif
 		{
 		    AcpiOsFree(devinfo);
 		    return (0);
 		}
 	    }
 	    AcpiOsFree(devinfo);
 	}
     }
 
     /* If the resource is already allocated, fail. */
     if (resource_list_busy(rl, type, rid))
 	return (EBUSY);
 
     /* If the resource is already reserved, release it. */
     if (resource_list_reserved(rl, type, rid))
 	resource_list_unreserve(rl, dev, child, type, rid);
 
     /* Add the resource. */
     end = (start + count - 1);
     resource_list_add(rl, type, rid, start, end, count);
 
     /* Don't reserve resources until the system resources are allocated. */
     if (!sc->acpi_resources_reserved)
 	return (0);
 
     /* Don't reserve system resources. */
     if (ACPI_ID_PROBE(dev, child, sysres_ids) != NULL)
 	return (0);
 
     /*
      * Don't reserve IRQ resources.  There are many sticky things to
      * get right otherwise (e.g. IRQs for psm, atkbd, and HPET when
      * using legacy routing).
      */
     if (type == SYS_RES_IRQ)
 	return (0);
 
     /*
      * Reserve the resource.
      *
      * XXX: Ignores failure for now.  Failure here is probably a
      * BIOS/firmware bug?
      */
     resource_list_reserve(rl, dev, child, type, &rid, start, end, count, 0);
     return (0);
 }
 
 static struct resource *
 acpi_alloc_resource(device_t bus, device_t child, int type, int *rid,
     u_long start, u_long end, u_long count, u_int flags)
 {
     ACPI_RESOURCE ares;
     struct acpi_device *ad;
     struct resource_list_entry *rle;
     struct resource_list *rl;
     struct resource *res;
     int isdefault = (start == 0UL && end == ~0UL);
 
     /*
      * First attempt at allocating the resource.  For direct children,
      * use resource_list_alloc() to handle reserved resources.  For
      * other devices, pass the request up to our parent.
      */
     if (bus == device_get_parent(child)) {
 	ad = device_get_ivars(child);
 	rl = &ad->ad_rl;
 
 	/*
 	 * Simulate the behavior of the ISA bus for direct children
 	 * devices.  That is, if a non-default range is specified for
 	 * a resource that doesn't exist, use bus_set_resource() to
 	 * add the resource before allocating it.  Note that these
 	 * resources will not be reserved.
 	 */
 	if (!isdefault && resource_list_find(rl, type, *rid) == NULL)
 		resource_list_add(rl, type, *rid, start, end, count);
 	res = resource_list_alloc(rl, bus, child, type, rid, start, end, count,
 	    flags);
 	if (res != NULL && type == SYS_RES_IRQ) {
 	    /*
 	     * Since bus_config_intr() takes immediate effect, we cannot
 	     * configure the interrupt associated with a device when we
 	     * parse the resources but have to defer it until a driver
 	     * actually allocates the interrupt via bus_alloc_resource().
 	     *
 	     * XXX: Should we handle the lookup failing?
 	     */
 	    if (ACPI_SUCCESS(acpi_lookup_irq_resource(child, *rid, res, &ares)))
 		acpi_config_intr(child, &ares);
 	}
 
 	/*
 	 * If this is an allocation of the "default" range for a given
 	 * RID, fetch the exact bounds for this resource from the
 	 * resource list entry to try to allocate the range from the
 	 * system resource regions.
 	 */
 	if (res == NULL && isdefault) {
 	    rle = resource_list_find(rl, type, *rid);
 	    if (rle != NULL) {
 		start = rle->start;
 		end = rle->end;
 		count = rle->count;
 	    }
 	}
     } else
 	res = BUS_ALLOC_RESOURCE(device_get_parent(bus), child, type, rid,
 	    start, end, count, flags);
 
     /*
      * If the first attempt failed and this is an allocation of a
      * specific range, try to satisfy the request via a suballocation
      * from our system resource regions.
      */
     if (res == NULL && start + count - 1 == end)
 	res = acpi_alloc_sysres(child, type, rid, start, end, count, flags);
     return (res);
 }
 
 /*
  * Attempt to allocate a specific resource range from the system
  * resource ranges.  Note that we only handle memory and I/O port
  * system resources.
  */
 struct resource *
 acpi_alloc_sysres(device_t child, int type, int *rid, u_long start, u_long end,
     u_long count, u_int flags)
 {
     struct rman *rm;
     struct resource *res;
 
     switch (type) {
     case SYS_RES_IOPORT:
 	rm = &acpi_rman_io;
 	break;
     case SYS_RES_MEMORY:
 	rm = &acpi_rman_mem;
 	break;
     default:
 	return (NULL);
     }
 
     KASSERT(start + count - 1 == end, ("wildcard resource range"));
     res = rman_reserve_resource(rm, start, end, count, flags & ~RF_ACTIVE,
 	child);
     if (res == NULL)
 	return (NULL);
 
     rman_set_rid(res, *rid);
 
     /* If requested, activate the resource using the parent's method. */
     if (flags & RF_ACTIVE)
 	if (bus_activate_resource(child, type, *rid, res) != 0) {
 	    rman_release_resource(res);
 	    return (NULL);
 	}
 
     return (res);
 }
 
 static int
 acpi_is_resource_managed(int type, struct resource *r)
 {
 
     /* We only handle memory and IO resources through rman. */
     switch (type) {
     case SYS_RES_IOPORT:
 	return (rman_is_region_manager(r, &acpi_rman_io));
     case SYS_RES_MEMORY:
 	return (rman_is_region_manager(r, &acpi_rman_mem));
     }
     return (0);
 }
 
 static int
 acpi_adjust_resource(device_t bus, device_t child, int type, struct resource *r,
     u_long start, u_long end)
 {
 
     if (acpi_is_resource_managed(type, r))
 	return (rman_adjust_resource(r, start, end));
     return (bus_generic_adjust_resource(bus, child, type, r, start, end));
 }
 
 static int
 acpi_release_resource(device_t bus, device_t child, int type, int rid,
     struct resource *r)
 {
     int ret;
 
     /*
      * If this resource belongs to one of our internal managers,
      * deactivate it and release it to the local pool.
      */
     if (acpi_is_resource_managed(type, r)) {
 	if (rman_get_flags(r) & RF_ACTIVE) {
 	    ret = bus_deactivate_resource(child, type, rid, r);
 	    if (ret != 0)
 		return (ret);
 	}
 	return (rman_release_resource(r));
     }
 
     return (bus_generic_rl_release_resource(bus, child, type, rid, r));
 }
 
 static void
 acpi_delete_resource(device_t bus, device_t child, int type, int rid)
 {
     struct resource_list *rl;
 
     rl = acpi_get_rlist(bus, child);
     if (resource_list_busy(rl, type, rid)) {
 	device_printf(bus, "delete_resource: Resource still owned by child"
 	    " (type=%d, rid=%d)\n", type, rid);
 	return;
     }
     resource_list_unreserve(rl, bus, child, type, rid);
     resource_list_delete(rl, type, rid);
 }
 
 /* Allocate an IO port or memory resource, given its GAS. */
 int
 acpi_bus_alloc_gas(device_t dev, int *type, int *rid, ACPI_GENERIC_ADDRESS *gas,
     struct resource **res, u_int flags)
 {
     int error, res_type;
 
     error = ENOMEM;
     if (type == NULL || rid == NULL || gas == NULL || res == NULL)
 	return (EINVAL);
 
     /* We only support memory and IO spaces. */
     switch (gas->SpaceId) {
     case ACPI_ADR_SPACE_SYSTEM_MEMORY:
 	res_type = SYS_RES_MEMORY;
 	break;
     case ACPI_ADR_SPACE_SYSTEM_IO:
 	res_type = SYS_RES_IOPORT;
 	break;
     default:
 	return (EOPNOTSUPP);
     }
 
     /*
      * If the register width is less than 8, assume the BIOS author means
      * it is a bit field and just allocate a byte.
      */
     if (gas->BitWidth && gas->BitWidth < 8)
 	gas->BitWidth = 8;
 
     /* Validate the address after we're sure we support the space. */
     if (gas->Address == 0 || gas->BitWidth == 0)
 	return (EINVAL);
 
     bus_set_resource(dev, res_type, *rid, gas->Address,
 	gas->BitWidth / 8);
     *res = bus_alloc_resource_any(dev, res_type, rid, RF_ACTIVE | flags);
     if (*res != NULL) {
 	*type = res_type;
 	error = 0;
     } else
 	bus_delete_resource(dev, res_type, *rid);
 
     return (error);
 }
 
 /* Probe _HID and _CID for compatible ISA PNP ids. */
 static uint32_t
 acpi_isa_get_logicalid(device_t dev)
 {
     ACPI_DEVICE_INFO	*devinfo;
     ACPI_HANDLE		h;
     uint32_t		pnpid;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     /* Fetch and validate the HID. */
     if ((h = acpi_get_handle(dev)) == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return_VALUE (0);
 
     pnpid = (devinfo->Valid & ACPI_VALID_HID) != 0 &&
 	devinfo->HardwareId.Length >= ACPI_EISAID_STRING_SIZE ?
 	PNP_EISAID(devinfo->HardwareId.String) : 0;
     AcpiOsFree(devinfo);
 
     return_VALUE (pnpid);
 }
 
 static int
 acpi_isa_get_compatid(device_t dev, uint32_t *cids, int count)
 {
     ACPI_DEVICE_INFO	*devinfo;
     ACPI_PNP_DEVICE_ID	*ids;
     ACPI_HANDLE		h;
     uint32_t		*pnpid;
     int			i, valid;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     pnpid = cids;
 
     /* Fetch and validate the CID */
     if ((h = acpi_get_handle(dev)) == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return_VALUE (0);
 
     if ((devinfo->Valid & ACPI_VALID_CID) == 0) {
 	AcpiOsFree(devinfo);
 	return_VALUE (0);
     }
 
     if (devinfo->CompatibleIdList.Count < count)
 	count = devinfo->CompatibleIdList.Count;
     ids = devinfo->CompatibleIdList.Ids;
     for (i = 0, valid = 0; i < count; i++)
 	if (ids[i].Length >= ACPI_EISAID_STRING_SIZE &&
 	    strncmp(ids[i].String, "PNP", 3) == 0) {
 	    *pnpid++ = PNP_EISAID(ids[i].String);
 	    valid++;
 	}
     AcpiOsFree(devinfo);
 
     return_VALUE (valid);
 }
 
 static char *
 acpi_device_id_probe(device_t bus, device_t dev, char **ids) 
 {
     ACPI_HANDLE h;
     ACPI_OBJECT_TYPE t;
     int i;
 
     h = acpi_get_handle(dev);
     if (ids == NULL || h == NULL)
 	return (NULL);
     t = acpi_get_type(dev);
     if (t != ACPI_TYPE_DEVICE && t != ACPI_TYPE_PROCESSOR)
 	return (NULL);
 
     /* Try to match one of the array of IDs with a HID or CID. */
     for (i = 0; ids[i] != NULL; i++) {
 	if (acpi_MatchHid(h, ids[i]))
 	    return (ids[i]);
     }
     return (NULL);
 }
 
 static ACPI_STATUS
 acpi_device_eval_obj(device_t bus, device_t dev, ACPI_STRING pathname,
     ACPI_OBJECT_LIST *parameters, ACPI_BUFFER *ret)
 {
     ACPI_HANDLE h;
 
     if (dev == NULL)
 	h = ACPI_ROOT_OBJECT;
     else if ((h = acpi_get_handle(dev)) == NULL)
 	return (AE_BAD_PARAMETER);
     return (AcpiEvaluateObject(h, pathname, parameters, ret));
 }
 
 int
 acpi_device_pwr_for_sleep(device_t bus, device_t dev, int *dstate)
 {
     struct acpi_softc *sc;
     ACPI_HANDLE handle;
     ACPI_STATUS status;
     char sxd[8];
 
     handle = acpi_get_handle(dev);
 
     /*
      * XXX If we find these devices, don't try to power them down.
      * The serial and IRDA ports on my T23 hang the system when
      * set to D3 and it appears that such legacy devices may
      * need special handling in their drivers.
      */
     if (dstate == NULL || handle == NULL ||
 	acpi_MatchHid(handle, "PNP0500") ||
 	acpi_MatchHid(handle, "PNP0501") ||
 	acpi_MatchHid(handle, "PNP0502") ||
 	acpi_MatchHid(handle, "PNP0510") ||
 	acpi_MatchHid(handle, "PNP0511"))
 	return (ENXIO);
 
     /*
      * Override next state with the value from _SxD, if present.
      * Note illegal _S0D is evaluated because some systems expect this.
      */
     sc = device_get_softc(bus);
     snprintf(sxd, sizeof(sxd), "_S%dD", sc->acpi_sstate);
     status = acpi_GetInteger(handle, sxd, dstate);
     if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
 	    device_printf(dev, "failed to get %s on %s: %s\n", sxd,
 		acpi_name(handle), AcpiFormatException(status));
 	    return (ENXIO);
     }
 
     return (0);
 }
 
 /* Callback arg for our implementation of walking the namespace. */
 struct acpi_device_scan_ctx {
     acpi_scan_cb_t	user_fn;
     void		*arg;
     ACPI_HANDLE		parent;
 };
 
 static ACPI_STATUS
 acpi_device_scan_cb(ACPI_HANDLE h, UINT32 level, void *arg, void **retval)
 {
     struct acpi_device_scan_ctx *ctx;
     device_t dev, old_dev;
     ACPI_STATUS status;
     ACPI_OBJECT_TYPE type;
 
     /*
      * Skip this device if we think we'll have trouble with it or it is
      * the parent where the scan began.
      */
     ctx = (struct acpi_device_scan_ctx *)arg;
     if (acpi_avoid(h) || h == ctx->parent)
 	return (AE_OK);
 
     /* If this is not a valid device type (e.g., a method), skip it. */
     if (ACPI_FAILURE(AcpiGetType(h, &type)))
 	return (AE_OK);
     if (type != ACPI_TYPE_DEVICE && type != ACPI_TYPE_PROCESSOR &&
 	type != ACPI_TYPE_THERMAL && type != ACPI_TYPE_POWER)
 	return (AE_OK);
 
     /*
      * Call the user function with the current device.  If it is unchanged
      * afterwards, return.  Otherwise, we update the handle to the new dev.
      */
     old_dev = acpi_get_device(h);
     dev = old_dev;
     status = ctx->user_fn(h, &dev, level, ctx->arg);
     if (ACPI_FAILURE(status) || old_dev == dev)
 	return (status);
 
     /* Remove the old child and its connection to the handle. */
     if (old_dev != NULL) {
 	device_delete_child(device_get_parent(old_dev), old_dev);
 	AcpiDetachData(h, acpi_fake_objhandler);
     }
 
     /* Recreate the handle association if the user created a device. */
     if (dev != NULL)
 	AcpiAttachData(h, acpi_fake_objhandler, dev);
 
     return (AE_OK);
 }
 
 static ACPI_STATUS
 acpi_device_scan_children(device_t bus, device_t dev, int max_depth,
     acpi_scan_cb_t user_fn, void *arg)
 {
     ACPI_HANDLE h;
     struct acpi_device_scan_ctx ctx;
 
     if (acpi_disabled("children"))
 	return (AE_OK);
 
     if (dev == NULL)
 	h = ACPI_ROOT_OBJECT;
     else if ((h = acpi_get_handle(dev)) == NULL)
 	return (AE_BAD_PARAMETER);
     ctx.user_fn = user_fn;
     ctx.arg = arg;
     ctx.parent = h;
     return (AcpiWalkNamespace(ACPI_TYPE_ANY, h, max_depth,
 	acpi_device_scan_cb, NULL, &ctx, NULL));
 }
 
 /*
  * Even though ACPI devices are not PCI, we use the PCI approach for setting
  * device power states since it's close enough to ACPI.
  */
 static int
 acpi_set_powerstate(device_t child, int state)
 {
     ACPI_HANDLE h;
     ACPI_STATUS status;
 
     h = acpi_get_handle(child);
     if (state < ACPI_STATE_D0 || state > ACPI_D_STATES_MAX)
 	return (EINVAL);
     if (h == NULL)
 	return (0);
 
     /* Ignore errors if the power methods aren't present. */
     status = acpi_pwr_switch_consumer(h, state);
     if (ACPI_SUCCESS(status)) {
 	if (bootverbose)
 	    device_printf(child, "set ACPI power state D%d on %s\n",
 		state, acpi_name(h));
     } else if (status != AE_NOT_FOUND)
 	device_printf(child,
 	    "failed to set ACPI power state D%d on %s: %s\n", state,
 	    acpi_name(h), AcpiFormatException(status));
 
     return (0);
 }
 
 static int
 acpi_isa_pnp_probe(device_t bus, device_t child, struct isa_pnp_id *ids)
 {
     int			result, cid_count, i;
     uint32_t		lid, cids[8];
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     /*
      * ISA-style drivers attached to ACPI may persist and
      * probe manually if we return ENOENT.  We never want
      * that to happen, so don't ever return it.
      */
     result = ENXIO;
 
     /* Scan the supplied IDs for a match */
     lid = acpi_isa_get_logicalid(child);
     cid_count = acpi_isa_get_compatid(child, cids, 8);
     while (ids && ids->ip_id) {
 	if (lid == ids->ip_id) {
 	    result = 0;
 	    goto out;
 	}
 	for (i = 0; i < cid_count; i++) {
 	    if (cids[i] == ids->ip_id) {
 		result = 0;
 		goto out;
 	    }
 	}
 	ids++;
     }
 
  out:
     if (result == 0 && ids->ip_desc)
 	device_set_desc(child, ids->ip_desc);
 
     return_VALUE (result);
 }
 
 #if defined(__i386__) || defined(__amd64__)
 /*
  * Look for a MCFG table.  If it is present, use the settings for
  * domain (segment) 0 to setup PCI config space access via the memory
  * map.
  */
 static void
 acpi_enable_pcie(void)
 {
 	ACPI_TABLE_HEADER *hdr;
 	ACPI_MCFG_ALLOCATION *alloc, *end;
 	ACPI_STATUS status;
 
 	status = AcpiGetTable(ACPI_SIG_MCFG, 1, &hdr);
 	if (ACPI_FAILURE(status))
 		return;
 
 	end = (ACPI_MCFG_ALLOCATION *)((char *)hdr + hdr->Length);
 	alloc = (ACPI_MCFG_ALLOCATION *)((ACPI_TABLE_MCFG *)hdr + 1);
 	while (alloc < end) {
 		if (alloc->PciSegment == 0) {
 			pcie_cfgregopen(alloc->Address, alloc->StartBusNumber,
 			    alloc->EndBusNumber);
 			return;
 		}
 		alloc++;
 	}
 }
 #endif
 
 /*
  * Scan all of the ACPI namespace and attach child devices.
  *
  * We should only expect to find devices in the \_PR, \_TZ, \_SI, and
  * \_SB scopes, and \_PR and \_TZ became obsolete in the ACPI 2.0 spec.
  * However, in violation of the spec, some systems place their PCI link
  * devices in \, so we have to walk the whole namespace.  We check the
  * type of namespace nodes, so this should be ok.
  */
 static void
 acpi_probe_children(device_t bus)
 {
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     /*
      * Scan the namespace and insert placeholders for all the devices that
      * we find.  We also probe/attach any early devices.
      *
      * Note that we use AcpiWalkNamespace rather than AcpiGetDevices because
      * we want to create nodes for all devices, not just those that are
      * currently present. (This assumes that we don't want to create/remove
      * devices as they appear, which might be smarter.)
      */
     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "namespace scan\n"));
     AcpiWalkNamespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, 100, acpi_probe_child,
 	NULL, bus, NULL);
 
     /* Pre-allocate resources for our rman from any sysresource devices. */
     acpi_sysres_alloc(bus);
 
     /* Reserve resources already allocated to children. */
     acpi_reserve_resources(bus);
 
     /* Create any static children by calling device identify methods. */
     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "device identify routines\n"));
     bus_generic_probe(bus);
 
     /* Probe/attach all children, created statically and from the namespace. */
     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "acpi bus_generic_attach\n"));
     bus_generic_attach(bus);
 
     /* Attach wake sysctls. */
     acpi_wake_sysctl_walk(bus);
 
     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "done attaching children\n"));
     return_VOID;
 }
 
 /*
  * Determine the probe order for a given device.
  */
 static void
 acpi_probe_order(ACPI_HANDLE handle, int *order)
 {
 	ACPI_OBJECT_TYPE type;
 
 	/*
 	 * 0. CPUs
 	 * 1. I/O port and memory system resource holders
 	 * 2. Clocks and timers (to handle early accesses)
 	 * 3. Embedded controllers (to handle early accesses)
 	 * 4. PCI Link Devices
 	 */
 	AcpiGetType(handle, &type);
 	if (type == ACPI_TYPE_PROCESSOR)
 		*order = 0;
 	else if (acpi_MatchHid(handle, "PNP0C01") ||
 	    acpi_MatchHid(handle, "PNP0C02"))
 		*order = 1;
 	else if (acpi_MatchHid(handle, "PNP0100") ||
 	    acpi_MatchHid(handle, "PNP0103") ||
 	    acpi_MatchHid(handle, "PNP0B00"))
 		*order = 2;
 	else if (acpi_MatchHid(handle, "PNP0C09"))
 		*order = 3;
 	else if (acpi_MatchHid(handle, "PNP0C0F"))
 		*order = 4;
 }
 
 /*
  * Evaluate a child device and determine whether we might attach a device to
  * it.
  */
 static ACPI_STATUS
 acpi_probe_child(ACPI_HANDLE handle, UINT32 level, void *context, void **status)
 {
     struct acpi_prw_data prw;
     ACPI_OBJECT_TYPE type;
     ACPI_HANDLE h;
     device_t bus, child;
     char *handle_str;
     int order;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (acpi_disabled("children"))
 	return_ACPI_STATUS (AE_OK);
 
     /* Skip this device if we think we'll have trouble with it. */
     if (acpi_avoid(handle))
 	return_ACPI_STATUS (AE_OK);
 
     bus = (device_t)context;
     if (ACPI_SUCCESS(AcpiGetType(handle, &type))) {
 	handle_str = acpi_name(handle);
 	switch (type) {
 	case ACPI_TYPE_DEVICE:
 	    /*
 	     * Since we scan from \, be sure to skip system scope objects.
 	     * \_SB_ and \_TZ_ are defined in ACPICA as devices to work around
 	     * BIOS bugs.  For example, \_SB_ is to allow \_SB_._INI to be run
 	     * during the intialization and \_TZ_ is to support Notify() on it.
 	     */
 	    if (strcmp(handle_str, "\\_SB_") == 0 ||
 		strcmp(handle_str, "\\_TZ_") == 0)
 		break;
 	    if (acpi_parse_prw(handle, &prw) == 0)
 		AcpiSetupGpeForWake(handle, prw.gpe_handle, prw.gpe_bit);
 
 	    /*
 	     * Ignore devices that do not have a _HID or _CID.  They should
 	     * be discovered by other buses (e.g. the PCI bus driver).
 	     */
 	    if (!acpi_has_hid(handle))
 		break;
 	    /* FALLTHROUGH */
 	case ACPI_TYPE_PROCESSOR:
 	case ACPI_TYPE_THERMAL:
 	case ACPI_TYPE_POWER:
 	    /* 
 	     * Create a placeholder device for this node.  Sort the
 	     * placeholder so that the probe/attach passes will run
 	     * breadth-first.  Orders less than ACPI_DEV_BASE_ORDER
 	     * are reserved for special objects (i.e., system
 	     * resources).
 	     */
 	    ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "scanning '%s'\n", handle_str));
 	    order = level * 10 + ACPI_DEV_BASE_ORDER;
 	    acpi_probe_order(handle, &order);
 	    child = BUS_ADD_CHILD(bus, order, NULL, -1);
 	    if (child == NULL)
 		break;
 
 	    /* Associate the handle with the device_t and vice versa. */
 	    acpi_set_handle(child, handle);
 	    AcpiAttachData(handle, acpi_fake_objhandler, child);
 
 	    /*
 	     * Check that the device is present.  If it's not present,
 	     * leave it disabled (so that we have a device_t attached to
 	     * the handle, but we don't probe it).
 	     *
 	     * XXX PCI link devices sometimes report "present" but not
 	     * "functional" (i.e. if disabled).  Go ahead and probe them
 	     * anyway since we may enable them later.
 	     */
 	    if (type == ACPI_TYPE_DEVICE && !acpi_DeviceIsPresent(child)) {
 		/* Never disable PCI link devices. */
 		if (acpi_MatchHid(handle, "PNP0C0F"))
 		    break;
 		/*
 		 * Docking stations should remain enabled since the system
 		 * may be undocked at boot.
 		 */
 		if (ACPI_SUCCESS(AcpiGetHandle(handle, "_DCK", &h)))
 		    break;
 
 		device_disable(child);
 		break;
 	    }
 
 	    /*
 	     * Get the device's resource settings and attach them.
 	     * Note that if the device has _PRS but no _CRS, we need
 	     * to decide when it's appropriate to try to configure the
 	     * device.  Ignore the return value here; it's OK for the
 	     * device not to have any resources.
 	     */
 	    acpi_parse_resources(child, handle, &acpi_res_parse_set, NULL);
 	    break;
 	}
     }
 
     return_ACPI_STATUS (AE_OK);
 }
 
 /*
  * AcpiAttachData() requires an object handler but never uses it.  This is a
  * placeholder object handler so we can store a device_t in an ACPI_HANDLE.
  */
 void
 acpi_fake_objhandler(ACPI_HANDLE h, void *data)
 {
 }
 
 static void
 acpi_shutdown_final(void *arg, int howto)
 {
     struct acpi_softc *sc = (struct acpi_softc *)arg;
     register_t intr;
     ACPI_STATUS status;
 
     /*
      * XXX Shutdown code should only run on the BSP (cpuid 0).
      * Some chipsets do not power off the system correctly if called from
      * an AP.
      */
     if ((howto & RB_POWEROFF) != 0) {
 	status = AcpiEnterSleepStatePrep(ACPI_STATE_S5);
 	if (ACPI_FAILURE(status)) {
 	    device_printf(sc->acpi_dev, "AcpiEnterSleepStatePrep failed - %s\n",
 		AcpiFormatException(status));
 	    return;
 	}
 	device_printf(sc->acpi_dev, "Powering system off\n");
 	intr = intr_disable();
 	status = AcpiEnterSleepState(ACPI_STATE_S5);
 	if (ACPI_FAILURE(status)) {
 	    intr_restore(intr);
 	    device_printf(sc->acpi_dev, "power-off failed - %s\n",
 		AcpiFormatException(status));
 	} else {
 	    DELAY(1000000);
 	    intr_restore(intr);
 	    device_printf(sc->acpi_dev, "power-off failed - timeout\n");
 	}
     } else if ((howto & RB_HALT) == 0 && sc->acpi_handle_reboot) {
 	/* Reboot using the reset register. */
 	status = AcpiReset();
 	if (ACPI_SUCCESS(status)) {
 	    DELAY(1000000);
 	    device_printf(sc->acpi_dev, "reset failed - timeout\n");
 	} else if (status != AE_NOT_EXIST)
 	    device_printf(sc->acpi_dev, "reset failed - %s\n",
 		AcpiFormatException(status));
     } else if (sc->acpi_do_disable && panicstr == NULL) {
 	/*
 	 * Only disable ACPI if the user requested.  On some systems, writing
 	 * the disable value to SMI_CMD hangs the system.
 	 */
 	device_printf(sc->acpi_dev, "Shutting down\n");
 	AcpiTerminate();
     }
 }
 
 static void
 acpi_enable_fixed_events(struct acpi_softc *sc)
 {
     static int	first_time = 1;
 
     /* Enable and clear fixed events and install handlers. */
     if ((AcpiGbl_FADT.Flags & ACPI_FADT_POWER_BUTTON) == 0) {
 	AcpiClearEvent(ACPI_EVENT_POWER_BUTTON);
 	AcpiInstallFixedEventHandler(ACPI_EVENT_POWER_BUTTON,
 				     acpi_event_power_button_sleep, sc);
 	if (first_time)
 	    device_printf(sc->acpi_dev, "Power Button (fixed)\n");
     }
     if ((AcpiGbl_FADT.Flags & ACPI_FADT_SLEEP_BUTTON) == 0) {
 	AcpiClearEvent(ACPI_EVENT_SLEEP_BUTTON);
 	AcpiInstallFixedEventHandler(ACPI_EVENT_SLEEP_BUTTON,
 				     acpi_event_sleep_button_sleep, sc);
 	if (first_time)
 	    device_printf(sc->acpi_dev, "Sleep Button (fixed)\n");
     }
 
     first_time = 0;
 }
 
 /*
  * Returns true if the device is actually present and should
  * be attached to.  This requires the present, enabled, UI-visible 
  * and diagnostics-passed bits to be set.
  */
 BOOLEAN
 acpi_DeviceIsPresent(device_t dev)
 {
     ACPI_DEVICE_INFO	*devinfo;
     ACPI_HANDLE		h;
     BOOLEAN		present;
 
     if ((h = acpi_get_handle(dev)) == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return (FALSE);
 
     /* If no _STA method, must be present */
     present = (devinfo->Valid & ACPI_VALID_STA) == 0 ||
 	ACPI_DEVICE_PRESENT(devinfo->CurrentStatus) ? TRUE : FALSE;
 
     AcpiOsFree(devinfo);
     return (present);
 }
 
 /*
  * Returns true if the battery is actually present and inserted.
  */
 BOOLEAN
 acpi_BatteryIsPresent(device_t dev)
 {
     ACPI_DEVICE_INFO	*devinfo;
     ACPI_HANDLE		h;
     BOOLEAN		present;
 
     if ((h = acpi_get_handle(dev)) == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return (FALSE);
 
     /* If no _STA method, must be present */
     present = (devinfo->Valid & ACPI_VALID_STA) == 0 ||
 	ACPI_BATTERY_PRESENT(devinfo->CurrentStatus) ? TRUE : FALSE;
 
     AcpiOsFree(devinfo);
     return (present);
 }
 
 /*
  * Returns true if a device has at least one valid device ID.
  */
 static BOOLEAN
 acpi_has_hid(ACPI_HANDLE h)
 {
     ACPI_DEVICE_INFO	*devinfo;
     BOOLEAN		ret;
 
     if (h == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return (FALSE);
 
     ret = FALSE;
     if ((devinfo->Valid & ACPI_VALID_HID) != 0)
 	ret = TRUE;
     else if ((devinfo->Valid & ACPI_VALID_CID) != 0)
 	if (devinfo->CompatibleIdList.Count > 0)
 	    ret = TRUE;
 
     AcpiOsFree(devinfo);
     return (ret);
 }
 
 /*
  * Match a HID string against a handle
  */
 BOOLEAN
 acpi_MatchHid(ACPI_HANDLE h, const char *hid) 
 {
     ACPI_DEVICE_INFO	*devinfo;
     BOOLEAN		ret;
     int			i;
 
     if (hid == NULL || h == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return (FALSE);
 
     ret = FALSE;
     if ((devinfo->Valid & ACPI_VALID_HID) != 0 &&
 	strcmp(hid, devinfo->HardwareId.String) == 0)
 	    ret = TRUE;
     else if ((devinfo->Valid & ACPI_VALID_CID) != 0)
 	for (i = 0; i < devinfo->CompatibleIdList.Count; i++) {
 	    if (strcmp(hid, devinfo->CompatibleIdList.Ids[i].String) == 0) {
 		ret = TRUE;
 		break;
 	    }
 	}
 
     AcpiOsFree(devinfo);
     return (ret);
 }
 
 /*
  * Return the handle of a named object within our scope, ie. that of (parent)
  * or one if its parents.
  */
 ACPI_STATUS
 acpi_GetHandleInScope(ACPI_HANDLE parent, char *path, ACPI_HANDLE *result)
 {
     ACPI_HANDLE		r;
     ACPI_STATUS		status;
 
     /* Walk back up the tree to the root */
     for (;;) {
 	status = AcpiGetHandle(parent, path, &r);
 	if (ACPI_SUCCESS(status)) {
 	    *result = r;
 	    return (AE_OK);
 	}
 	/* XXX Return error here? */
 	if (status != AE_NOT_FOUND)
 	    return (AE_OK);
 	if (ACPI_FAILURE(AcpiGetParent(parent, &r)))
 	    return (AE_NOT_FOUND);
 	parent = r;
     }
 }
 
 /*
  * Allocate a buffer with a preset data size.
  */
 ACPI_BUFFER *
 acpi_AllocBuffer(int size)
 {
     ACPI_BUFFER	*buf;
 
     if ((buf = malloc(size + sizeof(*buf), M_ACPIDEV, M_NOWAIT)) == NULL)
 	return (NULL);
     buf->Length = size;
     buf->Pointer = (void *)(buf + 1);
     return (buf);
 }
 
 ACPI_STATUS
 acpi_SetInteger(ACPI_HANDLE handle, char *path, UINT32 number)
 {
     ACPI_OBJECT arg1;
     ACPI_OBJECT_LIST args;
 
     arg1.Type = ACPI_TYPE_INTEGER;
     arg1.Integer.Value = number;
     args.Count = 1;
     args.Pointer = &arg1;
 
     return (AcpiEvaluateObject(handle, path, &args, NULL));
 }
 
 /*
  * Evaluate a path that should return an integer.
  */
 ACPI_STATUS
 acpi_GetInteger(ACPI_HANDLE handle, char *path, UINT32 *number)
 {
     ACPI_STATUS	status;
     ACPI_BUFFER	buf;
     ACPI_OBJECT	param;
 
     if (handle == NULL)
 	handle = ACPI_ROOT_OBJECT;
 
     /*
      * Assume that what we've been pointed at is an Integer object, or
      * a method that will return an Integer.
      */
     buf.Pointer = &param;
     buf.Length = sizeof(param);
     status = AcpiEvaluateObject(handle, path, NULL, &buf);
     if (ACPI_SUCCESS(status)) {
 	if (param.Type == ACPI_TYPE_INTEGER)
 	    *number = param.Integer.Value;
 	else
 	    status = AE_TYPE;
     }
 
     /* 
      * In some applications, a method that's expected to return an Integer
      * may instead return a Buffer (probably to simplify some internal
      * arithmetic).  We'll try to fetch whatever it is, and if it's a Buffer,
      * convert it into an Integer as best we can.
      *
      * This is a hack.
      */
     if (status == AE_BUFFER_OVERFLOW) {
 	if ((buf.Pointer = AcpiOsAllocate(buf.Length)) == NULL) {
 	    status = AE_NO_MEMORY;
 	} else {
 	    status = AcpiEvaluateObject(handle, path, NULL, &buf);
 	    if (ACPI_SUCCESS(status))
 		status = acpi_ConvertBufferToInteger(&buf, number);
 	    AcpiOsFree(buf.Pointer);
 	}
     }
     return (status);
 }
 
 ACPI_STATUS
 acpi_ConvertBufferToInteger(ACPI_BUFFER *bufp, UINT32 *number)
 {
     ACPI_OBJECT	*p;
     UINT8	*val;
     int		i;
 
     p = (ACPI_OBJECT *)bufp->Pointer;
     if (p->Type == ACPI_TYPE_INTEGER) {
 	*number = p->Integer.Value;
 	return (AE_OK);
     }
     if (p->Type != ACPI_TYPE_BUFFER)
 	return (AE_TYPE);
     if (p->Buffer.Length > sizeof(int))
 	return (AE_BAD_DATA);
 
     *number = 0;
     val = p->Buffer.Pointer;
     for (i = 0; i < p->Buffer.Length; i++)
 	*number += val[i] << (i * 8);
     return (AE_OK);
 }
 
 /*
  * Iterate over the elements of an a package object, calling the supplied
  * function for each element.
  *
  * XXX possible enhancement might be to abort traversal on error.
  */
 ACPI_STATUS
 acpi_ForeachPackageObject(ACPI_OBJECT *pkg,
 	void (*func)(ACPI_OBJECT *comp, void *arg), void *arg)
 {
     ACPI_OBJECT	*comp;
     int		i;
 
     if (pkg == NULL || pkg->Type != ACPI_TYPE_PACKAGE)
 	return (AE_BAD_PARAMETER);
 
     /* Iterate over components */
     i = 0;
     comp = pkg->Package.Elements;
     for (; i < pkg->Package.Count; i++, comp++)
 	func(comp, arg);
 
     return (AE_OK);
 }
 
 /*
  * Find the (index)th resource object in a set.
  */
 ACPI_STATUS
 acpi_FindIndexedResource(ACPI_BUFFER *buf, int index, ACPI_RESOURCE **resp)
 {
     ACPI_RESOURCE	*rp;
     int			i;
 
     rp = (ACPI_RESOURCE *)buf->Pointer;
     i = index;
     while (i-- > 0) {
 	/* Range check */
 	if (rp > (ACPI_RESOURCE *)((u_int8_t *)buf->Pointer + buf->Length))
 	    return (AE_BAD_PARAMETER);
 
 	/* Check for terminator */
 	if (rp->Type == ACPI_RESOURCE_TYPE_END_TAG || rp->Length == 0)
 	    return (AE_NOT_FOUND);
 	rp = ACPI_NEXT_RESOURCE(rp);
     }
     if (resp != NULL)
 	*resp = rp;
 
     return (AE_OK);
 }
 
 /*
  * Append an ACPI_RESOURCE to an ACPI_BUFFER.
  *
  * Given a pointer to an ACPI_RESOURCE structure, expand the ACPI_BUFFER
  * provided to contain it.  If the ACPI_BUFFER is empty, allocate a sensible
  * backing block.  If the ACPI_RESOURCE is NULL, return an empty set of
  * resources.
  */
 #define ACPI_INITIAL_RESOURCE_BUFFER_SIZE	512
 
 ACPI_STATUS
 acpi_AppendBufferResource(ACPI_BUFFER *buf, ACPI_RESOURCE *res)
 {
     ACPI_RESOURCE	*rp;
     void		*newp;
 
     /* Initialise the buffer if necessary. */
     if (buf->Pointer == NULL) {
 	buf->Length = ACPI_INITIAL_RESOURCE_BUFFER_SIZE;
 	if ((buf->Pointer = AcpiOsAllocate(buf->Length)) == NULL)
 	    return (AE_NO_MEMORY);
 	rp = (ACPI_RESOURCE *)buf->Pointer;
 	rp->Type = ACPI_RESOURCE_TYPE_END_TAG;
 	rp->Length = ACPI_RS_SIZE_MIN;
     }
     if (res == NULL)
 	return (AE_OK);
 
     /*
      * Scan the current buffer looking for the terminator.
      * This will either find the terminator or hit the end
      * of the buffer and return an error.
      */
     rp = (ACPI_RESOURCE *)buf->Pointer;
     for (;;) {
 	/* Range check, don't go outside the buffer */
 	if (rp >= (ACPI_RESOURCE *)((u_int8_t *)buf->Pointer + buf->Length))
 	    return (AE_BAD_PARAMETER);
 	if (rp->Type == ACPI_RESOURCE_TYPE_END_TAG || rp->Length == 0)
 	    break;
 	rp = ACPI_NEXT_RESOURCE(rp);
     }
 
     /*
      * Check the size of the buffer and expand if required.
      *
      * Required size is:
      *	size of existing resources before terminator + 
      *	size of new resource and header +
      * 	size of terminator.
      *
      * Note that this loop should really only run once, unless
      * for some reason we are stuffing a *really* huge resource.
      */
     while ((((u_int8_t *)rp - (u_int8_t *)buf->Pointer) + 
 	    res->Length + ACPI_RS_SIZE_NO_DATA +
 	    ACPI_RS_SIZE_MIN) >= buf->Length) {
 	if ((newp = AcpiOsAllocate(buf->Length * 2)) == NULL)
 	    return (AE_NO_MEMORY);
 	bcopy(buf->Pointer, newp, buf->Length);
 	rp = (ACPI_RESOURCE *)((u_int8_t *)newp +
 			       ((u_int8_t *)rp - (u_int8_t *)buf->Pointer));
 	AcpiOsFree(buf->Pointer);
 	buf->Pointer = newp;
 	buf->Length += buf->Length;
     }
 
     /* Insert the new resource. */
     bcopy(res, rp, res->Length + ACPI_RS_SIZE_NO_DATA);
 
     /* And add the terminator. */
     rp = ACPI_NEXT_RESOURCE(rp);
     rp->Type = ACPI_RESOURCE_TYPE_END_TAG;
     rp->Length = ACPI_RS_SIZE_MIN;
 
     return (AE_OK);
 }
 
 /*
  * Set interrupt model.
  */
 ACPI_STATUS
 acpi_SetIntrModel(int model)
 {
 
     return (acpi_SetInteger(ACPI_ROOT_OBJECT, "_PIC", model));
 }
 
 /*
  * Walk subtables of a table and call a callback routine for each
  * subtable.  The caller should provide the first subtable and a
  * pointer to the end of the table.  This can be used to walk tables
  * such as MADT and SRAT that use subtable entries.
  */
 void
 acpi_walk_subtables(void *first, void *end, acpi_subtable_handler *handler,
     void *arg)
 {
     ACPI_SUBTABLE_HEADER *entry;
 
     for (entry = first; (void *)entry < end; ) {
 	/* Avoid an infinite loop if we hit a bogus entry. */
 	if (entry->Length < sizeof(ACPI_SUBTABLE_HEADER))
 	    return;
 
 	handler(entry, arg);
 	entry = ACPI_ADD_PTR(ACPI_SUBTABLE_HEADER, entry, entry->Length);
     }
 }
 
 /*
  * DEPRECATED.  This interface has serious deficiencies and will be
  * removed.
  *
  * Immediately enter the sleep state.  In the old model, acpiconf(8) ran
  * rc.suspend and rc.resume so we don't have to notify devd(8) to do this.
  */
 ACPI_STATUS
 acpi_SetSleepState(struct acpi_softc *sc, int state)
 {
     static int once;
 
     if (!once) {
 	device_printf(sc->acpi_dev,
 "warning: acpi_SetSleepState() deprecated, need to update your software\n");
 	once = 1;
     }
     return (acpi_EnterSleepState(sc, state));
 }
 
 #if defined(__amd64__) || defined(__i386__)
 static void
 acpi_sleep_force_task(void *context)
 {
     struct acpi_softc *sc = (struct acpi_softc *)context;
 
     if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate)))
 	device_printf(sc->acpi_dev, "force sleep state S%d failed\n",
 	    sc->acpi_next_sstate);
 }
 
 static void
 acpi_sleep_force(void *arg)
 {
     struct acpi_softc *sc = (struct acpi_softc *)arg;
 
     device_printf(sc->acpi_dev,
 	"suspend request timed out, forcing sleep now\n");
     /*
      * XXX Suspending from callout causes freezes in DEVICE_SUSPEND().
      * Suspend from acpi_task thread instead.
      */
     if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
 	acpi_sleep_force_task, sc)))
 	device_printf(sc->acpi_dev, "AcpiOsExecute() for sleeping failed\n");
 }
 #endif
 
 /*
  * Request that the system enter the given suspend state.  All /dev/apm
  * devices and devd(8) will be notified.  Userland then has a chance to
  * save state and acknowledge the request.  The system sleeps once all
  * acks are in.
  */
 int
 acpi_ReqSleepState(struct acpi_softc *sc, int state)
 {
 #if defined(__amd64__) || defined(__i386__)
     struct apm_clone_data *clone;
     ACPI_STATUS status;
 
     if (state < ACPI_STATE_S1 || state > ACPI_S_STATES_MAX)
 	return (EINVAL);
     if (!acpi_sleep_states[state])
 	return (EOPNOTSUPP);
 
     /* If a suspend request is already in progress, just return. */
     if (sc->acpi_next_sstate != 0) {
 	return (0);
     }
 
     /* Wait until sleep is enabled. */
     while (sc->acpi_sleep_disabled) {
 	AcpiOsSleep(1000);
     }
 
     ACPI_LOCK(acpi);
 
     sc->acpi_next_sstate = state;
 
     /* S5 (soft-off) should be entered directly with no waiting. */
     if (state == ACPI_STATE_S5) {
     	ACPI_UNLOCK(acpi);
 	status = acpi_EnterSleepState(sc, state);
 	return (ACPI_SUCCESS(status) ? 0 : ENXIO);
     }
 
     /* Record the pending state and notify all apm devices. */
     STAILQ_FOREACH(clone, &sc->apm_cdevs, entries) {
 	clone->notify_status = APM_EV_NONE;
 	if ((clone->flags & ACPI_EVF_DEVD) == 0) {
 	    selwakeuppri(&clone->sel_read, PZERO);
 	    KNOTE_LOCKED(&clone->sel_read.si_note, 0);
 	}
     }
 
     /* If devd(8) is not running, immediately enter the sleep state. */
     if (!devctl_process_running()) {
 	ACPI_UNLOCK(acpi);
 	status = acpi_EnterSleepState(sc, state);
 	return (ACPI_SUCCESS(status) ? 0 : ENXIO);
     }
 
     /*
      * Set a timeout to fire if userland doesn't ack the suspend request
      * in time.  This way we still eventually go to sleep if we were
      * overheating or running low on battery, even if userland is hung.
      * We cancel this timeout once all userland acks are in or the
      * suspend request is aborted.
      */
     callout_reset(&sc->susp_force_to, 10 * hz, acpi_sleep_force, sc);
     ACPI_UNLOCK(acpi);
 
     /* Now notify devd(8) also. */
     acpi_UserNotify("Suspend", ACPI_ROOT_OBJECT, state);
 
     return (0);
 #else
     /* This platform does not support acpi suspend/resume. */
     return (EOPNOTSUPP);
 #endif
 }
 
 /*
  * Acknowledge (or reject) a pending sleep state.  The caller has
  * prepared for suspend and is now ready for it to proceed.  If the
  * error argument is non-zero, it indicates suspend should be cancelled
  * and gives an errno value describing why.  Once all votes are in,
  * we suspend the system.
  */
 int
 acpi_AckSleepState(struct apm_clone_data *clone, int error)
 {
 #if defined(__amd64__) || defined(__i386__)
     struct acpi_softc *sc;
     int ret, sleeping;
 
     /* If no pending sleep state, return an error. */
     ACPI_LOCK(acpi);
     sc = clone->acpi_sc;
     if (sc->acpi_next_sstate == 0) {
     	ACPI_UNLOCK(acpi);
 	return (ENXIO);
     }
 
     /* Caller wants to abort suspend process. */
     if (error) {
 	sc->acpi_next_sstate = 0;
 	callout_stop(&sc->susp_force_to);
 	device_printf(sc->acpi_dev,
 	    "listener on %s cancelled the pending suspend\n",
 	    devtoname(clone->cdev));
     	ACPI_UNLOCK(acpi);
 	return (0);
     }
 
     /*
      * Mark this device as acking the suspend request.  Then, walk through
      * all devices, seeing if they agree yet.  We only count devices that
      * are writable since read-only devices couldn't ack the request.
      */
     sleeping = TRUE;
     clone->notify_status = APM_EV_ACKED;
     STAILQ_FOREACH(clone, &sc->apm_cdevs, entries) {
 	if ((clone->flags & ACPI_EVF_WRITE) != 0 &&
 	    clone->notify_status != APM_EV_ACKED) {
 	    sleeping = FALSE;
 	    break;
 	}
     }
 
     /* If all devices have voted "yes", we will suspend now. */
     if (sleeping)
 	callout_stop(&sc->susp_force_to);
     ACPI_UNLOCK(acpi);
     ret = 0;
     if (sleeping) {
 	if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate)))
 		ret = ENODEV;
     }
     return (ret);
 #else
     /* This platform does not support acpi suspend/resume. */
     return (EOPNOTSUPP);
 #endif
 }
 
 static void
 acpi_sleep_enable(void *arg)
 {
     struct acpi_softc	*sc = (struct acpi_softc *)arg;
 
     ACPI_LOCK_ASSERT(acpi);
 
     /* Reschedule if the system is not fully up and running. */
     if (!AcpiGbl_SystemAwakeAndRunning) {
 	callout_schedule(&acpi_sleep_timer, hz * ACPI_MINIMUM_AWAKETIME);
 	return;
     }
 
     sc->acpi_sleep_disabled = FALSE;
 }
 
 static ACPI_STATUS
 acpi_sleep_disable(struct acpi_softc *sc)
 {
     ACPI_STATUS		status;
 
     /* Fail if the system is not fully up and running. */
     if (!AcpiGbl_SystemAwakeAndRunning)
 	return (AE_ERROR);
 
     ACPI_LOCK(acpi);
     status = sc->acpi_sleep_disabled ? AE_ERROR : AE_OK;
     sc->acpi_sleep_disabled = TRUE;
     ACPI_UNLOCK(acpi);
 
     return (status);
 }
 
 enum acpi_sleep_state {
     ACPI_SS_NONE,
     ACPI_SS_GPE_SET,
     ACPI_SS_DEV_SUSPEND,
     ACPI_SS_SLP_PREP,
     ACPI_SS_SLEPT,
 };
 
 /*
  * Enter the desired system sleep state.
  *
  * Currently we support S1-S5 but S4 is only S4BIOS
  */
 static ACPI_STATUS
 acpi_EnterSleepState(struct acpi_softc *sc, int state)
 {
     register_t intr;
     ACPI_STATUS status;
     ACPI_EVENT_STATUS power_button_status;
     enum acpi_sleep_state slp_state;
     int sleep_result;
 
     ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, state);
 
     if (state < ACPI_STATE_S1 || state > ACPI_S_STATES_MAX)
 	return_ACPI_STATUS (AE_BAD_PARAMETER);
     if (!acpi_sleep_states[state]) {
 	device_printf(sc->acpi_dev, "Sleep state S%d not supported by BIOS\n",
 	    state);
 	return (AE_SUPPORT);
     }
 
     /* Re-entry once we're suspending is not allowed. */
     status = acpi_sleep_disable(sc);
     if (ACPI_FAILURE(status)) {
 	device_printf(sc->acpi_dev,
 	    "suspend request ignored (not ready yet)\n");
 	return (status);
     }
 
     if (state == ACPI_STATE_S5) {
 	/*
 	 * Shut down cleanly and power off.  This will call us back through the
 	 * shutdown handlers.
 	 */
 	shutdown_nice(RB_POWEROFF);
 	return_ACPI_STATUS (AE_OK);
     }
 
     EVENTHANDLER_INVOKE(power_suspend_early);
     stop_all_proc();
     EVENTHANDLER_INVOKE(power_suspend);
 
     if (smp_started) {
 	thread_lock(curthread);
 	sched_bind(curthread, 0);
 	thread_unlock(curthread);
     }
 
     /*
      * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
      * drivers need this.
      */
     mtx_lock(&Giant);
 
     slp_state = ACPI_SS_NONE;
 
     sc->acpi_sstate = state;
 
     /* Enable any GPEs as appropriate and requested by the user. */
     acpi_wake_prep_walk(state);
     slp_state = ACPI_SS_GPE_SET;
 
     /*
      * Inform all devices that we are going to sleep.  If at least one
      * device fails, DEVICE_SUSPEND() automatically resumes the tree.
      *
      * XXX Note that a better two-pass approach with a 'veto' pass
      * followed by a "real thing" pass would be better, but the current
      * bus interface does not provide for this.
      */
     if (DEVICE_SUSPEND(root_bus) != 0) {
 	device_printf(sc->acpi_dev, "device_suspend failed\n");
 	goto backout;
     }
     slp_state = ACPI_SS_DEV_SUSPEND;
 
     /* If testing device suspend only, back out of everything here. */
     if (acpi_susp_bounce)
 	goto backout;
 
     status = AcpiEnterSleepStatePrep(state);
     if (ACPI_FAILURE(status)) {
 	device_printf(sc->acpi_dev, "AcpiEnterSleepStatePrep failed - %s\n",
 		      AcpiFormatException(status));
 	goto backout;
     }
     slp_state = ACPI_SS_SLP_PREP;
 
     if (sc->acpi_sleep_delay > 0)
 	DELAY(sc->acpi_sleep_delay * 1000000);
 
     intr = intr_disable();
     if (state != ACPI_STATE_S1) {
 	sleep_result = acpi_sleep_machdep(sc, state);
 	acpi_wakeup_machdep(sc, state, sleep_result, 0);
 
 	/*
 	 * XXX According to ACPI specification SCI_EN bit should be restored
 	 * by ACPI platform (BIOS, firmware) to its pre-sleep state.
 	 * Unfortunately some BIOSes fail to do that and that leads to
 	 * unexpected and serious consequences during wake up like a system
 	 * getting stuck in SMI handlers.
 	 * This hack is picked up from Linux, which claims that it follows
 	 * Windows behavior.
 	 */
 	if (sleep_result == 1 && state != ACPI_STATE_S4)
 	    AcpiWriteBitRegister(ACPI_BITREG_SCI_ENABLE, ACPI_ENABLE_EVENT);
 
 	AcpiLeaveSleepStatePrep(state);
 
 	if (sleep_result == 1 && state == ACPI_STATE_S3) {
 	    /*
 	     * Prevent mis-interpretation of the wakeup by power button
 	     * as a request for power off.
 	     * Ideally we should post an appropriate wakeup event,
 	     * perhaps using acpi_event_power_button_wake or alike.
 	     *
 	     * Clearing of power button status after wakeup is mandated
 	     * by ACPI specification in section "Fixed Power Button".
 	     *
 	     * XXX As of ACPICA 20121114 AcpiGetEventStatus provides
 	     * status as 0/1 corressponding to inactive/active despite
 	     * its type being ACPI_EVENT_STATUS.  In other words,
 	     * we should not test for ACPI_EVENT_FLAG_SET for time being.
 	     */
 	    if (ACPI_SUCCESS(AcpiGetEventStatus(ACPI_EVENT_POWER_BUTTON,
 		&power_button_status)) && power_button_status != 0) {
 		AcpiClearEvent(ACPI_EVENT_POWER_BUTTON);
 		device_printf(sc->acpi_dev,
 		    "cleared fixed power button status\n");
 	    }
 	}
 
 	intr_restore(intr);
 
 	/* call acpi_wakeup_machdep() again with interrupt enabled */
 	acpi_wakeup_machdep(sc, state, sleep_result, 1);
 
 	if (sleep_result == -1)
 		goto backout;
 
 	/* Re-enable ACPI hardware on wakeup from sleep state 4. */
 	if (state == ACPI_STATE_S4)
 	    AcpiEnable();
     } else {
 	status = AcpiEnterSleepState(state);
 	AcpiLeaveSleepStatePrep(state);
 	intr_restore(intr);
 	if (ACPI_FAILURE(status)) {
 	    device_printf(sc->acpi_dev, "AcpiEnterSleepState failed - %s\n",
 			  AcpiFormatException(status));
 	    goto backout;
 	}
     }
     slp_state = ACPI_SS_SLEPT;
 
     /*
      * Back out state according to how far along we got in the suspend
      * process.  This handles both the error and success cases.
      */
 backout:
     if (slp_state >= ACPI_SS_GPE_SET) {
 	acpi_wake_prep_walk(state);
 	sc->acpi_sstate = ACPI_STATE_S0;
     }
     if (slp_state >= ACPI_SS_DEV_SUSPEND)
 	DEVICE_RESUME(root_bus);
     if (slp_state >= ACPI_SS_SLP_PREP)
 	AcpiLeaveSleepState(state);
     if (slp_state >= ACPI_SS_SLEPT) {
 	acpi_resync_clock(sc);
 	acpi_enable_fixed_events(sc);
     }
     sc->acpi_next_sstate = 0;
 
     mtx_unlock(&Giant);
 
     if (smp_started) {
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
     }
 
     resume_all_proc();
 
     EVENTHANDLER_INVOKE(power_resume);
 
     /* Allow another sleep request after a while. */
     callout_schedule(&acpi_sleep_timer, hz * ACPI_MINIMUM_AWAKETIME);
 
     /* Run /etc/rc.resume after we are back. */
     if (devctl_process_running())
 	acpi_UserNotify("Resume", ACPI_ROOT_OBJECT, state);
 
     return_ACPI_STATUS (status);
 }
 
 static void
 acpi_resync_clock(struct acpi_softc *sc)
 {
 #ifdef __amd64__
     if (!acpi_reset_clock)
 	return;
 
     /*
      * Warm up timecounter again and reset system clock.
      */
     (void)timecounter->tc_get_timecount(timecounter);
     (void)timecounter->tc_get_timecount(timecounter);
     inittodr(time_second + sc->acpi_sleep_delay);
 #endif
 }
 
 /* Enable or disable the device's wake GPE. */
 int
 acpi_wake_set_enable(device_t dev, int enable)
 {
     struct acpi_prw_data prw;
     ACPI_STATUS status;
     int flags;
 
     /* Make sure the device supports waking the system and get the GPE. */
     if (acpi_parse_prw(acpi_get_handle(dev), &prw) != 0)
 	return (ENXIO);
 
     flags = acpi_get_flags(dev);
     if (enable) {
 	status = AcpiSetGpeWakeMask(prw.gpe_handle, prw.gpe_bit,
 	    ACPI_GPE_ENABLE);
 	if (ACPI_FAILURE(status)) {
 	    device_printf(dev, "enable wake failed\n");
 	    return (ENXIO);
 	}
 	acpi_set_flags(dev, flags | ACPI_FLAG_WAKE_ENABLED);
     } else {
 	status = AcpiSetGpeWakeMask(prw.gpe_handle, prw.gpe_bit,
 	    ACPI_GPE_DISABLE);
 	if (ACPI_FAILURE(status)) {
 	    device_printf(dev, "disable wake failed\n");
 	    return (ENXIO);
 	}
 	acpi_set_flags(dev, flags & ~ACPI_FLAG_WAKE_ENABLED);
     }
 
     return (0);
 }
 
 static int
 acpi_wake_sleep_prep(ACPI_HANDLE handle, int sstate)
 {
     struct acpi_prw_data prw;
     device_t dev;
 
     /* Check that this is a wake-capable device and get its GPE. */
     if (acpi_parse_prw(handle, &prw) != 0)
 	return (ENXIO);
     dev = acpi_get_device(handle);
 
     /*
      * The destination sleep state must be less than (i.e., higher power)
      * or equal to the value specified by _PRW.  If this GPE cannot be
      * enabled for the next sleep state, then disable it.  If it can and
      * the user requested it be enabled, turn on any required power resources
      * and set _PSW.
      */
     if (sstate > prw.lowest_wake) {
 	AcpiSetGpeWakeMask(prw.gpe_handle, prw.gpe_bit, ACPI_GPE_DISABLE);
 	if (bootverbose)
 	    device_printf(dev, "wake_prep disabled wake for %s (S%d)\n",
 		acpi_name(handle), sstate);
     } else if (dev && (acpi_get_flags(dev) & ACPI_FLAG_WAKE_ENABLED) != 0) {
 	acpi_pwr_wake_enable(handle, 1);
 	acpi_SetInteger(handle, "_PSW", 1);
 	if (bootverbose)
 	    device_printf(dev, "wake_prep enabled for %s (S%d)\n",
 		acpi_name(handle), sstate);
     }
 
     return (0);
 }
 
 static int
 acpi_wake_run_prep(ACPI_HANDLE handle, int sstate)
 {
     struct acpi_prw_data prw;
     device_t dev;
 
     /*
      * Check that this is a wake-capable device and get its GPE.  Return
      * now if the user didn't enable this device for wake.
      */
     if (acpi_parse_prw(handle, &prw) != 0)
 	return (ENXIO);
     dev = acpi_get_device(handle);
     if (dev == NULL || (acpi_get_flags(dev) & ACPI_FLAG_WAKE_ENABLED) == 0)
 	return (0);
 
     /*
      * If this GPE couldn't be enabled for the previous sleep state, it was
      * disabled before going to sleep so re-enable it.  If it was enabled,
      * clear _PSW and turn off any power resources it used.
      */
     if (sstate > prw.lowest_wake) {
 	AcpiSetGpeWakeMask(prw.gpe_handle, prw.gpe_bit, ACPI_GPE_ENABLE);
 	if (bootverbose)
 	    device_printf(dev, "run_prep re-enabled %s\n", acpi_name(handle));
     } else {
 	acpi_SetInteger(handle, "_PSW", 0);
 	acpi_pwr_wake_enable(handle, 0);
 	if (bootverbose)
 	    device_printf(dev, "run_prep cleaned up for %s\n",
 		acpi_name(handle));
     }
 
     return (0);
 }
 
 static ACPI_STATUS
 acpi_wake_prep(ACPI_HANDLE handle, UINT32 level, void *context, void **status)
 {
     int sstate;
 
     /* If suspending, run the sleep prep function, otherwise wake. */
     sstate = *(int *)context;
     if (AcpiGbl_SystemAwakeAndRunning)
 	acpi_wake_sleep_prep(handle, sstate);
     else
 	acpi_wake_run_prep(handle, sstate);
     return (AE_OK);
 }
 
 /* Walk the tree rooted at acpi0 to prep devices for suspend/resume. */
 static int
 acpi_wake_prep_walk(int sstate)
 {
     ACPI_HANDLE sb_handle;
 
     if (ACPI_SUCCESS(AcpiGetHandle(ACPI_ROOT_OBJECT, "\\_SB_", &sb_handle)))
 	AcpiWalkNamespace(ACPI_TYPE_DEVICE, sb_handle, 100,
 	    acpi_wake_prep, NULL, &sstate, NULL);
     return (0);
 }
 
 /* Walk the tree rooted at acpi0 to attach per-device wake sysctls. */
 static int
 acpi_wake_sysctl_walk(device_t dev)
 {
     int error, i, numdevs;
     device_t *devlist;
     device_t child;
     ACPI_STATUS status;
 
     error = device_get_children(dev, &devlist, &numdevs);
     if (error != 0 || numdevs == 0) {
 	if (numdevs == 0)
 	    free(devlist, M_TEMP);
 	return (error);
     }
     for (i = 0; i < numdevs; i++) {
 	child = devlist[i];
 	acpi_wake_sysctl_walk(child);
 	if (!device_is_attached(child))
 	    continue;
 	status = AcpiEvaluateObject(acpi_get_handle(child), "_PRW", NULL, NULL);
 	if (ACPI_SUCCESS(status)) {
 	    SYSCTL_ADD_PROC(device_get_sysctl_ctx(child),
 		SYSCTL_CHILDREN(device_get_sysctl_tree(child)), OID_AUTO,
 		"wake", CTLTYPE_INT | CTLFLAG_RW, child, 0,
 		acpi_wake_set_sysctl, "I", "Device set to wake the system");
 	}
     }
     free(devlist, M_TEMP);
 
     return (0);
 }
 
 /* Enable or disable wake from userland. */
 static int
 acpi_wake_set_sysctl(SYSCTL_HANDLER_ARGS)
 {
     int enable, error;
     device_t dev;
 
     dev = (device_t)arg1;
     enable = (acpi_get_flags(dev) & ACPI_FLAG_WAKE_ENABLED) ? 1 : 0;
 
     error = sysctl_handle_int(oidp, &enable, 0, req);
     if (error != 0 || req->newptr == NULL)
 	return (error);
     if (enable != 0 && enable != 1)
 	return (EINVAL);
 
     return (acpi_wake_set_enable(dev, enable));
 }
 
 /* Parse a device's _PRW into a structure. */
 int
 acpi_parse_prw(ACPI_HANDLE h, struct acpi_prw_data *prw)
 {
     ACPI_STATUS			status;
     ACPI_BUFFER			prw_buffer;
     ACPI_OBJECT			*res, *res2;
     int				error, i, power_count;
 
     if (h == NULL || prw == NULL)
 	return (EINVAL);
 
     /*
      * The _PRW object (7.2.9) is only required for devices that have the
      * ability to wake the system from a sleeping state.
      */
     error = EINVAL;
     prw_buffer.Pointer = NULL;
     prw_buffer.Length = ACPI_ALLOCATE_BUFFER;
     status = AcpiEvaluateObject(h, "_PRW", NULL, &prw_buffer);
     if (ACPI_FAILURE(status))
 	return (ENOENT);
     res = (ACPI_OBJECT *)prw_buffer.Pointer;
     if (res == NULL)
 	return (ENOENT);
     if (!ACPI_PKG_VALID(res, 2))
 	goto out;
 
     /*
      * Element 1 of the _PRW object:
      * The lowest power system sleeping state that can be entered while still
      * providing wake functionality.  The sleeping state being entered must
      * be less than (i.e., higher power) or equal to this value.
      */
     if (acpi_PkgInt32(res, 1, &prw->lowest_wake) != 0)
 	goto out;
 
     /*
      * Element 0 of the _PRW object:
      */
     switch (res->Package.Elements[0].Type) {
     case ACPI_TYPE_INTEGER:
 	/*
 	 * If the data type of this package element is numeric, then this
 	 * _PRW package element is the bit index in the GPEx_EN, in the
 	 * GPE blocks described in the FADT, of the enable bit that is
 	 * enabled for the wake event.
 	 */
 	prw->gpe_handle = NULL;
 	prw->gpe_bit = res->Package.Elements[0].Integer.Value;
 	error = 0;
 	break;
     case ACPI_TYPE_PACKAGE:
 	/*
 	 * If the data type of this package element is a package, then this
 	 * _PRW package element is itself a package containing two
 	 * elements.  The first is an object reference to the GPE Block
 	 * device that contains the GPE that will be triggered by the wake
 	 * event.  The second element is numeric and it contains the bit
 	 * index in the GPEx_EN, in the GPE Block referenced by the
 	 * first element in the package, of the enable bit that is enabled for
 	 * the wake event.
 	 *
 	 * For example, if this field is a package then it is of the form:
 	 * Package() {\_SB.PCI0.ISA.GPE, 2}
 	 */
 	res2 = &res->Package.Elements[0];
 	if (!ACPI_PKG_VALID(res2, 2))
 	    goto out;
 	prw->gpe_handle = acpi_GetReference(NULL, &res2->Package.Elements[0]);
 	if (prw->gpe_handle == NULL)
 	    goto out;
 	if (acpi_PkgInt32(res2, 1, &prw->gpe_bit) != 0)
 	    goto out;
 	error = 0;
 	break;
     default:
 	goto out;
     }
 
     /* Elements 2 to N of the _PRW object are power resources. */
     power_count = res->Package.Count - 2;
     if (power_count > ACPI_PRW_MAX_POWERRES) {
 	printf("ACPI device %s has too many power resources\n", acpi_name(h));
 	power_count = 0;
     }
     prw->power_res_count = power_count;
     for (i = 0; i < power_count; i++)
 	prw->power_res[i] = res->Package.Elements[i];
 
 out:
     if (prw_buffer.Pointer != NULL)
 	AcpiOsFree(prw_buffer.Pointer);
     return (error);
 }
 
 /*
  * ACPI Event Handlers
  */
 
 /* System Event Handlers (registered by EVENTHANDLER_REGISTER) */
 
 static void
 acpi_system_eventhandler_sleep(void *arg, int state)
 {
     struct acpi_softc *sc = (struct acpi_softc *)arg;
     int ret;
 
     ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, state);
 
     /* Check if button action is disabled or unknown. */
     if (state == ACPI_STATE_UNKNOWN)
 	return;
 
     /* Request that the system prepare to enter the given suspend state. */
     ret = acpi_ReqSleepState(sc, state);
     if (ret != 0)
 	device_printf(sc->acpi_dev,
 	    "request to enter state S%d failed (err %d)\n", state, ret);
 
     return_VOID;
 }
 
 static void
 acpi_system_eventhandler_wakeup(void *arg, int state)
 {
 
     ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, state);
 
     /* Currently, nothing to do for wakeup. */
 
     return_VOID;
 }
 
 /* 
  * ACPICA Event Handlers (FixedEvent, also called from button notify handler)
  */
 static void
 acpi_invoke_sleep_eventhandler(void *context)
 {
 
     EVENTHANDLER_INVOKE(acpi_sleep_event, *(int *)context);
 }
 
 static void
 acpi_invoke_wake_eventhandler(void *context)
 {
 
     EVENTHANDLER_INVOKE(acpi_wakeup_event, *(int *)context);
 }
 
 UINT32
 acpi_event_power_button_sleep(void *context)
 {
     struct acpi_softc	*sc = (struct acpi_softc *)context;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
 	acpi_invoke_sleep_eventhandler, &sc->acpi_power_button_sx)))
 	return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
     return_VALUE (ACPI_INTERRUPT_HANDLED);
 }
 
 UINT32
 acpi_event_power_button_wake(void *context)
 {
     struct acpi_softc	*sc = (struct acpi_softc *)context;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
 	acpi_invoke_wake_eventhandler, &sc->acpi_power_button_sx)))
 	return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
     return_VALUE (ACPI_INTERRUPT_HANDLED);
 }
 
 UINT32
 acpi_event_sleep_button_sleep(void *context)
 {
     struct acpi_softc	*sc = (struct acpi_softc *)context;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
 	acpi_invoke_sleep_eventhandler, &sc->acpi_sleep_button_sx)))
 	return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
     return_VALUE (ACPI_INTERRUPT_HANDLED);
 }
 
 UINT32
 acpi_event_sleep_button_wake(void *context)
 {
     struct acpi_softc	*sc = (struct acpi_softc *)context;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
 	acpi_invoke_wake_eventhandler, &sc->acpi_sleep_button_sx)))
 	return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
     return_VALUE (ACPI_INTERRUPT_HANDLED);
 }
 
 /*
  * XXX This static buffer is suboptimal.  There is no locking so only
  * use this for single-threaded callers.
  */
 char *
 acpi_name(ACPI_HANDLE handle)
 {
     ACPI_BUFFER buf;
     static char data[256];
 
     buf.Length = sizeof(data);
     buf.Pointer = data;
 
     if (handle && ACPI_SUCCESS(AcpiGetName(handle, ACPI_FULL_PATHNAME, &buf)))
 	return (data);
     return ("(unknown)");
 }
 
 /*
  * Debugging/bug-avoidance.  Avoid trying to fetch info on various
  * parts of the namespace.
  */
 int
 acpi_avoid(ACPI_HANDLE handle)
 {
     char	*cp, *env, *np;
     int		len;
 
     np = acpi_name(handle);
     if (*np == '\\')
 	np++;
     if ((env = kern_getenv("debug.acpi.avoid")) == NULL)
 	return (0);
 
     /* Scan the avoid list checking for a match */
     cp = env;
     for (;;) {
 	while (*cp != 0 && isspace(*cp))
 	    cp++;
 	if (*cp == 0)
 	    break;
 	len = 0;
 	while (cp[len] != 0 && !isspace(cp[len]))
 	    len++;
 	if (!strncmp(cp, np, len)) {
 	    freeenv(env);
 	    return(1);
 	}
 	cp += len;
     }
     freeenv(env);
 
     return (0);
 }
 
 /*
  * Debugging/bug-avoidance.  Disable ACPI subsystem components.
  */
 int
 acpi_disabled(char *subsys)
 {
     char	*cp, *env;
     int		len;
 
     if ((env = kern_getenv("debug.acpi.disabled")) == NULL)
 	return (0);
     if (strcmp(env, "all") == 0) {
 	freeenv(env);
 	return (1);
     }
 
     /* Scan the disable list, checking for a match. */
     cp = env;
     for (;;) {
 	while (*cp != '\0' && isspace(*cp))
 	    cp++;
 	if (*cp == '\0')
 	    break;
 	len = 0;
 	while (cp[len] != '\0' && !isspace(cp[len]))
 	    len++;
 	if (strncmp(cp, subsys, len) == 0) {
 	    freeenv(env);
 	    return (1);
 	}
 	cp += len;
     }
     freeenv(env);
 
     return (0);
 }
 
 static void
 acpi_lookup(void *arg, const char *name, device_t *dev)
 {
     ACPI_HANDLE handle;
 
     if (*dev != NULL)
 	return;
 
     /*
      * Allow any handle name that is specified as an absolute path and
      * starts with '\'.  We could restrict this to \_SB and friends,
      * but see acpi_probe_children() for notes on why we scan the entire
      * namespace for devices.
      *
      * XXX: The pathname argument to AcpiGetHandle() should be fixed to
      * be const.
      */
     if (name[0] != '\\')
 	return;
     if (ACPI_FAILURE(AcpiGetHandle(ACPI_ROOT_OBJECT, __DECONST(char *, name),
 	&handle)))
 	return;
     *dev = acpi_get_device(handle);
 }
 
 /*
  * Control interface.
  *
  * We multiplex ioctls for all participating ACPI devices here.  Individual 
  * drivers wanting to be accessible via /dev/acpi should use the
  * register/deregister interface to make their handlers visible.
  */
 struct acpi_ioctl_hook
 {
     TAILQ_ENTRY(acpi_ioctl_hook) link;
     u_long			 cmd;
     acpi_ioctl_fn		 fn;
     void			 *arg;
 };
 
 static TAILQ_HEAD(,acpi_ioctl_hook)	acpi_ioctl_hooks;
 static int				acpi_ioctl_hooks_initted;
 
 int
 acpi_register_ioctl(u_long cmd, acpi_ioctl_fn fn, void *arg)
 {
     struct acpi_ioctl_hook	*hp;
 
     if ((hp = malloc(sizeof(*hp), M_ACPIDEV, M_NOWAIT)) == NULL)
 	return (ENOMEM);
     hp->cmd = cmd;
     hp->fn = fn;
     hp->arg = arg;
 
     ACPI_LOCK(acpi);
     if (acpi_ioctl_hooks_initted == 0) {
 	TAILQ_INIT(&acpi_ioctl_hooks);
 	acpi_ioctl_hooks_initted = 1;
     }
     TAILQ_INSERT_TAIL(&acpi_ioctl_hooks, hp, link);
     ACPI_UNLOCK(acpi);
 
     return (0);
 }
 
 void
 acpi_deregister_ioctl(u_long cmd, acpi_ioctl_fn fn)
 {
     struct acpi_ioctl_hook	*hp;
 
     ACPI_LOCK(acpi);
     TAILQ_FOREACH(hp, &acpi_ioctl_hooks, link)
 	if (hp->cmd == cmd && hp->fn == fn)
 	    break;
 
     if (hp != NULL) {
 	TAILQ_REMOVE(&acpi_ioctl_hooks, hp, link);
 	free(hp, M_ACPIDEV);
     }
     ACPI_UNLOCK(acpi);
 }
 
 static int
 acpiopen(struct cdev *dev, int flag, int fmt, struct thread *td)
 {
     return (0);
 }
 
 static int
 acpiclose(struct cdev *dev, int flag, int fmt, struct thread *td)
 {
     return (0);
 }
 
 static int
 acpiioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
 {
     struct acpi_softc		*sc;
     struct acpi_ioctl_hook	*hp;
     int				error, state;
 
     error = 0;
     hp = NULL;
     sc = dev->si_drv1;
 
     /*
      * Scan the list of registered ioctls, looking for handlers.
      */
     ACPI_LOCK(acpi);
     if (acpi_ioctl_hooks_initted)
 	TAILQ_FOREACH(hp, &acpi_ioctl_hooks, link) {
 	    if (hp->cmd == cmd)
 		break;
 	}
     ACPI_UNLOCK(acpi);
     if (hp)
 	return (hp->fn(cmd, addr, hp->arg));
 
     /*
      * Core ioctls are not permitted for non-writable user.
      * Currently, other ioctls just fetch information.
      * Not changing system behavior.
      */
     if ((flag & FWRITE) == 0)
 	return (EPERM);
 
     /* Core system ioctls. */
     switch (cmd) {
     case ACPIIO_REQSLPSTATE:
 	state = *(int *)addr;
 	if (state != ACPI_STATE_S5)
 	    return (acpi_ReqSleepState(sc, state));
 	device_printf(sc->acpi_dev, "power off via acpi ioctl not supported\n");
 	error = EOPNOTSUPP;
 	break;
     case ACPIIO_ACKSLPSTATE:
 	error = *(int *)addr;
 	error = acpi_AckSleepState(sc->acpi_clone, error);
 	break;
     case ACPIIO_SETSLPSTATE:	/* DEPRECATED */
 	state = *(int *)addr;
 	if (state < ACPI_STATE_S0 || state > ACPI_S_STATES_MAX)
 	    return (EINVAL);
 	if (!acpi_sleep_states[state])
 	    return (EOPNOTSUPP);
 	if (ACPI_FAILURE(acpi_SetSleepState(sc, state)))
 	    error = ENXIO;
 	break;
     default:
 	error = ENXIO;
 	break;
     }
 
     return (error);
 }
 
 static int
 acpi_sname2sstate(const char *sname)
 {
     int sstate;
 
     if (toupper(sname[0]) == 'S') {
 	sstate = sname[1] - '0';
 	if (sstate >= ACPI_STATE_S0 && sstate <= ACPI_STATE_S5 &&
 	    sname[2] == '\0')
 	    return (sstate);
     } else if (strcasecmp(sname, "NONE") == 0)
 	return (ACPI_STATE_UNKNOWN);
     return (-1);
 }
 
 static const char *
 acpi_sstate2sname(int sstate)
 {
     static const char *snames[] = { "S0", "S1", "S2", "S3", "S4", "S5" };
 
     if (sstate >= ACPI_STATE_S0 && sstate <= ACPI_STATE_S5)
 	return (snames[sstate]);
     else if (sstate == ACPI_STATE_UNKNOWN)
 	return ("NONE");
     return (NULL);
 }
 
 static int
 acpi_supported_sleep_state_sysctl(SYSCTL_HANDLER_ARGS)
 {
     int error;
     struct sbuf sb;
     UINT8 state;
 
     sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND);
     for (state = ACPI_STATE_S1; state < ACPI_S_STATE_COUNT; state++)
 	if (acpi_sleep_states[state])
 	    sbuf_printf(&sb, "%s ", acpi_sstate2sname(state));
     sbuf_trim(&sb);
     sbuf_finish(&sb);
     error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
     sbuf_delete(&sb);
     return (error);
 }
 
 static int
 acpi_sleep_state_sysctl(SYSCTL_HANDLER_ARGS)
 {
     char sleep_state[10];
     int error, new_state, old_state;
 
     old_state = *(int *)oidp->oid_arg1;
     strlcpy(sleep_state, acpi_sstate2sname(old_state), sizeof(sleep_state));
     error = sysctl_handle_string(oidp, sleep_state, sizeof(sleep_state), req);
     if (error == 0 && req->newptr != NULL) {
 	new_state = acpi_sname2sstate(sleep_state);
 	if (new_state < ACPI_STATE_S1)
 	    return (EINVAL);
 	if (new_state < ACPI_S_STATE_COUNT && !acpi_sleep_states[new_state])
 	    return (EOPNOTSUPP);
 	if (new_state != old_state)
 	    *(int *)oidp->oid_arg1 = new_state;
     }
     return (error);
 }
 
 /* Inform devctl(4) when we receive a Notify. */
 void
 acpi_UserNotify(const char *subsystem, ACPI_HANDLE h, uint8_t notify)
 {
     char		notify_buf[16];
     ACPI_BUFFER		handle_buf;
     ACPI_STATUS		status;
 
     if (subsystem == NULL)
 	return;
 
     handle_buf.Pointer = NULL;
     handle_buf.Length = ACPI_ALLOCATE_BUFFER;
     status = AcpiNsHandleToPathname(h, &handle_buf);
     if (ACPI_FAILURE(status))
 	return;
     snprintf(notify_buf, sizeof(notify_buf), "notify=0x%02x", notify);
     devctl_notify("ACPI", subsystem, handle_buf.Pointer, notify_buf);
     AcpiOsFree(handle_buf.Pointer);
 }
 
 #ifdef ACPI_DEBUG
 /*
  * Support for parsing debug options from the kernel environment.
  *
  * Bits may be set in the AcpiDbgLayer and AcpiDbgLevel debug registers
  * by specifying the names of the bits in the debug.acpi.layer and
  * debug.acpi.level environment variables.  Bits may be unset by 
  * prefixing the bit name with !.
  */
 struct debugtag
 {
     char	*name;
     UINT32	value;
 };
 
 static struct debugtag	dbg_layer[] = {
     {"ACPI_UTILITIES",		ACPI_UTILITIES},
     {"ACPI_HARDWARE",		ACPI_HARDWARE},
     {"ACPI_EVENTS",		ACPI_EVENTS},
     {"ACPI_TABLES",		ACPI_TABLES},
     {"ACPI_NAMESPACE",		ACPI_NAMESPACE},
     {"ACPI_PARSER",		ACPI_PARSER},
     {"ACPI_DISPATCHER",		ACPI_DISPATCHER},
     {"ACPI_EXECUTER",		ACPI_EXECUTER},
     {"ACPI_RESOURCES",		ACPI_RESOURCES},
     {"ACPI_CA_DEBUGGER",	ACPI_CA_DEBUGGER},
     {"ACPI_OS_SERVICES",	ACPI_OS_SERVICES},
     {"ACPI_CA_DISASSEMBLER",	ACPI_CA_DISASSEMBLER},
     {"ACPI_ALL_COMPONENTS",	ACPI_ALL_COMPONENTS},
 
     {"ACPI_AC_ADAPTER",		ACPI_AC_ADAPTER},
     {"ACPI_BATTERY",		ACPI_BATTERY},
     {"ACPI_BUS",		ACPI_BUS},
     {"ACPI_BUTTON",		ACPI_BUTTON},
     {"ACPI_EC", 		ACPI_EC},
     {"ACPI_FAN",		ACPI_FAN},
     {"ACPI_POWERRES",		ACPI_POWERRES},
     {"ACPI_PROCESSOR",		ACPI_PROCESSOR},
     {"ACPI_THERMAL",		ACPI_THERMAL},
     {"ACPI_TIMER",		ACPI_TIMER},
     {"ACPI_ALL_DRIVERS",	ACPI_ALL_DRIVERS},
     {NULL, 0}
 };
 
 static struct debugtag dbg_level[] = {
     {"ACPI_LV_INIT",		ACPI_LV_INIT},
     {"ACPI_LV_DEBUG_OBJECT",	ACPI_LV_DEBUG_OBJECT},
     {"ACPI_LV_INFO",		ACPI_LV_INFO},
     {"ACPI_LV_REPAIR",		ACPI_LV_REPAIR},
     {"ACPI_LV_ALL_EXCEPTIONS",	ACPI_LV_ALL_EXCEPTIONS},
 
     /* Trace verbosity level 1 [Standard Trace Level] */
     {"ACPI_LV_INIT_NAMES",	ACPI_LV_INIT_NAMES},
     {"ACPI_LV_PARSE",		ACPI_LV_PARSE},
     {"ACPI_LV_LOAD",		ACPI_LV_LOAD},
     {"ACPI_LV_DISPATCH",	ACPI_LV_DISPATCH},
     {"ACPI_LV_EXEC",		ACPI_LV_EXEC},
     {"ACPI_LV_NAMES",		ACPI_LV_NAMES},
     {"ACPI_LV_OPREGION",	ACPI_LV_OPREGION},
     {"ACPI_LV_BFIELD",		ACPI_LV_BFIELD},
     {"ACPI_LV_TABLES",		ACPI_LV_TABLES},
     {"ACPI_LV_VALUES",		ACPI_LV_VALUES},
     {"ACPI_LV_OBJECTS",		ACPI_LV_OBJECTS},
     {"ACPI_LV_RESOURCES",	ACPI_LV_RESOURCES},
     {"ACPI_LV_USER_REQUESTS",	ACPI_LV_USER_REQUESTS},
     {"ACPI_LV_PACKAGE",		ACPI_LV_PACKAGE},
     {"ACPI_LV_VERBOSITY1",	ACPI_LV_VERBOSITY1},
 
     /* Trace verbosity level 2 [Function tracing and memory allocation] */
     {"ACPI_LV_ALLOCATIONS",	ACPI_LV_ALLOCATIONS},
     {"ACPI_LV_FUNCTIONS",	ACPI_LV_FUNCTIONS},
     {"ACPI_LV_OPTIMIZATIONS",	ACPI_LV_OPTIMIZATIONS},
     {"ACPI_LV_VERBOSITY2",	ACPI_LV_VERBOSITY2},
     {"ACPI_LV_ALL",		ACPI_LV_ALL},
 
     /* Trace verbosity level 3 [Threading, I/O, and Interrupts] */
     {"ACPI_LV_MUTEX",		ACPI_LV_MUTEX},
     {"ACPI_LV_THREADS",		ACPI_LV_THREADS},
     {"ACPI_LV_IO",		ACPI_LV_IO},
     {"ACPI_LV_INTERRUPTS",	ACPI_LV_INTERRUPTS},
     {"ACPI_LV_VERBOSITY3",	ACPI_LV_VERBOSITY3},
 
     /* Exceptionally verbose output -- also used in the global "DebugLevel"  */
     {"ACPI_LV_AML_DISASSEMBLE",	ACPI_LV_AML_DISASSEMBLE},
     {"ACPI_LV_VERBOSE_INFO",	ACPI_LV_VERBOSE_INFO},
     {"ACPI_LV_FULL_TABLES",	ACPI_LV_FULL_TABLES},
     {"ACPI_LV_EVENTS",		ACPI_LV_EVENTS},
     {"ACPI_LV_VERBOSE",		ACPI_LV_VERBOSE},
     {NULL, 0}
 };    
 
 static void
 acpi_parse_debug(char *cp, struct debugtag *tag, UINT32 *flag)
 {
     char	*ep;
     int		i, l;
     int		set;
 
     while (*cp) {
 	if (isspace(*cp)) {
 	    cp++;
 	    continue;
 	}
 	ep = cp;
 	while (*ep && !isspace(*ep))
 	    ep++;
 	if (*cp == '!') {
 	    set = 0;
 	    cp++;
 	    if (cp == ep)
 		continue;
 	} else {
 	    set = 1;
 	}
 	l = ep - cp;
 	for (i = 0; tag[i].name != NULL; i++) {
 	    if (!strncmp(cp, tag[i].name, l)) {
 		if (set)
 		    *flag |= tag[i].value;
 		else
 		    *flag &= ~tag[i].value;
 	    }
 	}
 	cp = ep;
     }
 }
 
 static void
 acpi_set_debugging(void *junk)
 {
     char	*layer, *level;
 
     if (cold) {
 	AcpiDbgLayer = 0;
 	AcpiDbgLevel = 0;
     }
 
     layer = kern_getenv("debug.acpi.layer");
     level = kern_getenv("debug.acpi.level");
     if (layer == NULL && level == NULL)
 	return;
 
     printf("ACPI set debug");
     if (layer != NULL) {
 	if (strcmp("NONE", layer) != 0)
 	    printf(" layer '%s'", layer);
 	acpi_parse_debug(layer, &dbg_layer[0], &AcpiDbgLayer);
 	freeenv(layer);
     }
     if (level != NULL) {
 	if (strcmp("NONE", level) != 0)
 	    printf(" level '%s'", level);
 	acpi_parse_debug(level, &dbg_level[0], &AcpiDbgLevel);
 	freeenv(level);
     }
     printf("\n");
 }
 
 SYSINIT(acpi_debugging, SI_SUB_TUNABLES, SI_ORDER_ANY, acpi_set_debugging,
 	NULL);
 
 static int
 acpi_debug_sysctl(SYSCTL_HANDLER_ARGS)
 {
     int		 error, *dbg;
     struct	 debugtag *tag;
     struct	 sbuf sb;
     char	 temp[128];
 
     if (sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND) == NULL)
 	return (ENOMEM);
     if (strcmp(oidp->oid_arg1, "debug.acpi.layer") == 0) {
 	tag = &dbg_layer[0];
 	dbg = &AcpiDbgLayer;
     } else {
 	tag = &dbg_level[0];
 	dbg = &AcpiDbgLevel;
     }
 
     /* Get old values if this is a get request. */
     ACPI_SERIAL_BEGIN(acpi);
     if (*dbg == 0) {
 	sbuf_cpy(&sb, "NONE");
     } else if (req->newptr == NULL) {
 	for (; tag->name != NULL; tag++) {
 	    if ((*dbg & tag->value) == tag->value)
 		sbuf_printf(&sb, "%s ", tag->name);
 	}
     }
     sbuf_trim(&sb);
     sbuf_finish(&sb);
     strlcpy(temp, sbuf_data(&sb), sizeof(temp));
     sbuf_delete(&sb);
 
     error = sysctl_handle_string(oidp, temp, sizeof(temp), req);
 
     /* Check for error or no change */
     if (error == 0 && req->newptr != NULL) {
 	*dbg = 0;
 	kern_setenv((char *)oidp->oid_arg1, temp);
 	acpi_set_debugging(NULL);
     }
     ACPI_SERIAL_END(acpi);
 
     return (error);
 }
 
 SYSCTL_PROC(_debug_acpi, OID_AUTO, layer, CTLFLAG_RW | CTLTYPE_STRING,
 	    "debug.acpi.layer", 0, acpi_debug_sysctl, "A", "");
 SYSCTL_PROC(_debug_acpi, OID_AUTO, level, CTLFLAG_RW | CTLTYPE_STRING,
 	    "debug.acpi.level", 0, acpi_debug_sysctl, "A", "");
 #endif /* ACPI_DEBUG */
 
 static int
 acpi_debug_objects_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int	error;
 	int	old;
 
 	old = acpi_debug_objects;
 	error = sysctl_handle_int(oidp, &acpi_debug_objects, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (old == acpi_debug_objects || (old && acpi_debug_objects))
 		return (0);
 
 	ACPI_SERIAL_BEGIN(acpi);
 	AcpiGbl_EnableAmlDebugObject = acpi_debug_objects ? TRUE : FALSE;
 	ACPI_SERIAL_END(acpi);
 
 	return (0);
 }
 
 static int
 acpi_parse_interfaces(char *str, struct acpi_interface *iface)
 {
 	char *p;
 	size_t len;
 	int i, j;
 
 	p = str;
 	while (isspace(*p) || *p == ',')
 		p++;
 	len = strlen(p);
 	if (len == 0)
 		return (0);
 	p = strdup(p, M_TEMP);
 	for (i = 0; i < len; i++)
 		if (p[i] == ',')
 			p[i] = '\0';
 	i = j = 0;
 	while (i < len)
 		if (isspace(p[i]) || p[i] == '\0')
 			i++;
 		else {
 			i += strlen(p + i) + 1;
 			j++;
 		}
 	if (j == 0) {
 		free(p, M_TEMP);
 		return (0);
 	}
 	iface->data = malloc(sizeof(*iface->data) * j, M_TEMP, M_WAITOK);
 	iface->num = j;
 	i = j = 0;
 	while (i < len)
 		if (isspace(p[i]) || p[i] == '\0')
 			i++;
 		else {
 			iface->data[j] = p + i;
 			i += strlen(p + i) + 1;
 			j++;
 		}
 
 	return (j);
 }
 
 static void
 acpi_free_interfaces(struct acpi_interface *iface)
 {
 
 	free(iface->data[0], M_TEMP);
 	free(iface->data, M_TEMP);
 }
 
 static void
 acpi_reset_interfaces(device_t dev)
 {
 	struct acpi_interface list;
 	ACPI_STATUS status;
 	int i;
 
 	if (acpi_parse_interfaces(acpi_install_interface, &list) > 0) {
 		for (i = 0; i < list.num; i++) {
 			status = AcpiInstallInterface(list.data[i]);
 			if (ACPI_FAILURE(status))
 				device_printf(dev,
 				    "failed to install _OSI(\"%s\"): %s\n",
 				    list.data[i], AcpiFormatException(status));
 			else if (bootverbose)
 				device_printf(dev, "installed _OSI(\"%s\")\n",
 				    list.data[i]);
 		}
 		acpi_free_interfaces(&list);
 	}
 	if (acpi_parse_interfaces(acpi_remove_interface, &list) > 0) {
 		for (i = 0; i < list.num; i++) {
 			status = AcpiRemoveInterface(list.data[i]);
 			if (ACPI_FAILURE(status))
 				device_printf(dev,
 				    "failed to remove _OSI(\"%s\"): %s\n",
 				    list.data[i], AcpiFormatException(status));
 			else if (bootverbose)
 				device_printf(dev, "removed _OSI(\"%s\")\n",
 				    list.data[i]);
 		}
 		acpi_free_interfaces(&list);
 	}
 }
 
 static int
 acpi_pm_func(u_long cmd, void *arg, ...)
 {
 	int	state, acpi_state;
 	int	error;
 	struct	acpi_softc *sc;
 	va_list	ap;
 
 	error = 0;
 	switch (cmd) {
 	case POWER_CMD_SUSPEND:
 		sc = (struct acpi_softc *)arg;
 		if (sc == NULL) {
 			error = EINVAL;
 			goto out;
 		}
 
 		va_start(ap, arg);
 		state = va_arg(ap, int);
 		va_end(ap);
 
 		switch (state) {
 		case POWER_SLEEP_STATE_STANDBY:
 			acpi_state = sc->acpi_standby_sx;
 			break;
 		case POWER_SLEEP_STATE_SUSPEND:
 			acpi_state = sc->acpi_suspend_sx;
 			break;
 		case POWER_SLEEP_STATE_HIBERNATE:
 			acpi_state = ACPI_STATE_S4;
 			break;
 		default:
 			error = EINVAL;
 			goto out;
 		}
 
 		if (ACPI_FAILURE(acpi_EnterSleepState(sc, acpi_state)))
 			error = ENXIO;
 		break;
 	default:
 		error = EINVAL;
 		goto out;
 	}
 
 out:
 	return (error);
 }
 
 static void
 acpi_pm_register(void *arg)
 {
     if (!cold || resource_disabled("acpi", 0))
 	return;
 
     power_pm_register(POWER_PM_TYPE_ACPI, acpi_pm_func, NULL);
 }
 
 SYSINIT(power, SI_SUB_KLD, SI_ORDER_ANY, acpi_pm_register, 0);
Index: projects/ifnet/sys/netinet6/ip6_forward.c
===================================================================
--- projects/ifnet/sys/netinet6/ip6_forward.c	(revision 281172)
+++ projects/ifnet/sys/netinet6/ip6_forward.c	(revision 281173)
@@ -1,640 +1,627 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_forward.c,v 1.69 2001/05/17 03:48:30 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipfw.h"
 #include "opt_ipsec.h"
 #include "opt_ipstealth.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/pfil.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
 
 #include <netinet/in_pcb.h>
 
 #ifdef IPSEC
 #include <netinet6/ip6_ipsec.h>
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
 #endif /* IPSEC */
 
 /*
  * Forward a packet.  If some error occurs return the sender
  * an icmp packet.  Note we can't always generate a meaningful
  * icmp message because icmp doesn't have a large enough repertoire
  * of codes and types.
  *
  * If not forwarding, just drop the packet.  This could be confusing
  * if ipforwarding was zero but some routing protocol was advancing
  * us as a gateway to somewhere.  However, we must let the routing
  * protocol deal with that.
  *
  */
 void
 ip6_forward(struct mbuf *m, int srcrt)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct sockaddr_in6 *dst = NULL;
 	struct rtentry *rt = NULL;
 	struct route_in6 rin6;
 	int error, type = 0, code = 0;
 	struct mbuf *mcopy = NULL;
 	struct ifnet *origifp;	/* maybe unnecessary */
 	u_int32_t inzone, outzone;
 	struct in6_addr src_in6, dst_in6, odst;
 #ifdef IPSEC
 	struct secpolicy *sp = NULL;
 #endif
 #ifdef SCTP
 	int sw_csum;
 #endif
 	struct m_tag *fwd_tag;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 	/*
 	 * Do not forward packets to multicast destination (should be handled
 	 * by ip6_mforward().
 	 * Do not forward packets with unspecified source.  It was discussed
 	 * in July 2000, on the ipngwg mailing list.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST)) != 0 ||
 	    IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
 		IP6STAT_INC(ip6s_cantforward);
 		/* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
 		if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
 			V_ip6_log_time = time_uptime;
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "from %s to %s nxt %d received on %s\n",
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ip6->ip6_nxt,
 			    if_name(m->m_pkthdr.rcvif));
 		}
 		m_freem(m);
 		return;
 	}
 #ifdef IPSEC
 	/*
 	 * Check if this packet has an active SA and needs to be dropped
 	 * instead of forwarded.
 	 */
 	if (ip6_ipsec_fwd(m) != 0) {
 		IP6STAT_INC(ip6s_cantforward);
 		m_freem(m);
 		return;
 	}
 #endif /* IPSEC */
 
 #ifdef IPSTEALTH
 	if (!V_ip6stealth) {
 #endif
 	if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
 		/* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
 		icmp6_error(m, ICMP6_TIME_EXCEEDED,
 				ICMP6_TIME_EXCEED_TRANSIT, 0);
 		return;
 	}
 	ip6->ip6_hlim -= IPV6_HLIMDEC;
 
 #ifdef IPSTEALTH
 	}
 #endif
 
 	/*
 	 * Save at most ICMPV6_PLD_MAXLEN (= the min IPv6 MTU -
 	 * size of IPv6 + ICMPv6 headers) bytes of the packet in case
 	 * we need to generate an ICMP6 message to the src.
 	 * Thanks to M_EXT, in most cases copy will not occur.
 	 *
 	 * It is important to save it before IPsec processing as IPsec
 	 * processing may modify the mbuf.
 	 */
 	mcopy = m_copy(m, 0, imin(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN));
 
 #ifdef IPSEC
 	/* get a security policy for this packet */
 	sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, &error);
 	if (sp == NULL) {
 		IPSEC6STAT_INC(ips_out_inval);
 		IP6STAT_INC(ip6s_cantforward);
 		if (mcopy) {
 #if 0
 			/* XXX: what icmp ? */
 #else
 			m_freem(mcopy);
 #endif
 		}
 		m_freem(m);
 		return;
 	}
 
 	error = 0;
 
 	/* check policy */
 	switch (sp->policy) {
 	case IPSEC_POLICY_DISCARD:
 		/*
 		 * This packet is just discarded.
 		 */
 		IPSEC6STAT_INC(ips_out_polvio);
 		IP6STAT_INC(ip6s_cantforward);
 		KEY_FREESP(&sp);
 		if (mcopy) {
 #if 0
 			/* XXX: what icmp ? */
 #else
 			m_freem(mcopy);
 #endif
 		}
 		m_freem(m);
 		return;
 
 	case IPSEC_POLICY_BYPASS:
 	case IPSEC_POLICY_NONE:
 		/* no need to do IPsec. */
 		KEY_FREESP(&sp);
 		goto skip_ipsec;
 
 	case IPSEC_POLICY_IPSEC:
 		if (sp->req == NULL) {
 			/* XXX should be panic ? */
 			printf("ip6_forward: No IPsec request specified.\n");
 			IP6STAT_INC(ip6s_cantforward);
 			KEY_FREESP(&sp);
 			if (mcopy) {
 #if 0
 				/* XXX: what icmp ? */
 #else
 				m_freem(mcopy);
 #endif
 			}
 			m_freem(m);
 			return;
 		}
 		/* do IPsec */
 		break;
 
 	case IPSEC_POLICY_ENTRUST:
 	default:
 		/* should be panic ?? */
 		printf("ip6_forward: Invalid policy found. %d\n", sp->policy);
 		KEY_FREESP(&sp);
 		goto skip_ipsec;
 	}
 
     {
 	struct ipsecrequest *isr = NULL;
 
 	/*
 	 * when the kernel forwards a packet, it is not proper to apply
 	 * IPsec transport mode to the packet is not proper.  this check
 	 * avoid from this.
 	 * at present, if there is even a transport mode SA request in the
 	 * security policy, the kernel does not apply IPsec to the packet.
 	 * this check is not enough because the following case is valid.
 	 *      ipsec esp/tunnel/xxx-xxx/require esp/transport//require;
 	 */
 	for (isr = sp->req; isr; isr = isr->next) {
 		if (isr->saidx.mode == IPSEC_MODE_ANY)
 			goto doipsectunnel;
 		if (isr->saidx.mode == IPSEC_MODE_TUNNEL)
 			goto doipsectunnel;
 	}
 
 	/*
 	 * if there's no need for tunnel mode IPsec, skip.
 	 */
 	if (!isr)
 		goto skip_ipsec;
 
     doipsectunnel:
 	/*
 	 * All the extension headers will become inaccessible
 	 * (since they can be encrypted).
 	 * Don't panic, we need no more updates to extension headers
 	 * on inner IPv6 packet (since they are now encapsulated).
 	 *
 	 * IPv6 [ESP|AH] IPv6 [extension headers] payload
 	 */
 
 	/*
 	 * If we need to encapsulate the packet, do it here
 	 * ipsec6_proces_packet will send the packet using ip6_output
 	 */
 	error = ipsec6_process_packet(m, sp->req);
 
 	KEY_FREESP(&sp);
 
 	if (error == EJUSTRETURN) {
 		/*
 		 * We had a SP with a level of 'use' and no SA. We
 		 * will just continue to process the packet without
 		 * IPsec processing.
 		 */
 		error = 0;
 		goto skip_ipsec;
 	}
 
 	if (error) {
 		/* mbuf is already reclaimed in ipsec6_process_packet. */
 		switch (error) {
 		case EHOSTUNREACH:
 		case ENETUNREACH:
 		case EMSGSIZE:
 		case ENOBUFS:
 		case ENOMEM:
 			break;
 		default:
 			printf("ip6_output (ipsec): error code %d\n", error);
 			/* FALLTHROUGH */
 		case ENOENT:
 			/* don't show these error codes to the user */
 			break;
 		}
 		IP6STAT_INC(ip6s_cantforward);
 		if (mcopy) {
 #if 0
 			/* XXX: what icmp ? */
 #else
 			m_freem(mcopy);
 #endif
 		}
 		return;
 	} else {
 		/*
 		 * In the FAST IPSec case we have already
 		 * re-injected the packet and it has been freed
 		 * by the ipsec_done() function.  So, just clean
 		 * up after ourselves.
 		 */
 		m = NULL;
 		goto freecopy;
 	}
     }
 skip_ipsec:
 #endif
 again:
 	bzero(&rin6, sizeof(struct route_in6));
 	dst = (struct sockaddr_in6 *)&rin6.ro_dst;
 	dst->sin6_len = sizeof(struct sockaddr_in6);
 	dst->sin6_family = AF_INET6;
 	dst->sin6_addr = ip6->ip6_dst;
 again2:
 	rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m));
 	if (rin6.ro_rt != NULL)
 		RT_UNLOCK(rin6.ro_rt);
 	else {
 		IP6STAT_INC(ip6s_noroute);
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute);
 		if (mcopy) {
 			icmp6_error(mcopy, ICMP6_DST_UNREACH,
 			ICMP6_DST_UNREACH_NOROUTE, 0);
 		}
 		goto bad;
 	}
 	rt = rin6.ro_rt;
 
 	/*
 	 * Source scope check: if a packet can't be delivered to its
 	 * destination for the reason that the destination is beyond the scope
 	 * of the source address, discard the packet and return an icmp6
 	 * destination unreachable error with Code 2 (beyond scope of source
 	 * address).  We use a local copy of ip6_src, since in6_setscope()
 	 * will possibly modify its first argument.
 	 * [draft-ietf-ipngwg-icmp-v3-04.txt, Section 3.1]
 	 */
 	src_in6 = ip6->ip6_src;
 	if (in6_setscope(&src_in6, rt->rt_ifp, &outzone)) {
 		/* XXX: this should not happen */
 		IP6STAT_INC(ip6s_cantforward);
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 	if (in6_setscope(&src_in6, m->m_pkthdr.rcvif, &inzone)) {
 		IP6STAT_INC(ip6s_cantforward);
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 	if (inzone != outzone) {
 		IP6STAT_INC(ip6s_cantforward);
 		IP6STAT_INC(ip6s_badscope);
 		in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard);
 
 		if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
 			V_ip6_log_time = time_uptime;
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ip6->ip6_nxt,
 			    if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp));
 		}
 		if (mcopy)
 			icmp6_error(mcopy, ICMP6_DST_UNREACH,
 				    ICMP6_DST_UNREACH_BEYONDSCOPE, 0);
 		goto bad;
 	}
 
 	/*
 	 * Destination scope check: if a packet is going to break the scope
 	 * zone of packet's destination address, discard it.  This case should
 	 * usually be prevented by appropriately-configured routing table, but
 	 * we need an explicit check because we may mistakenly forward the
 	 * packet to a different zone by (e.g.) a default route.
 	 */
 	dst_in6 = ip6->ip6_dst;
 	if (in6_setscope(&dst_in6, m->m_pkthdr.rcvif, &inzone) != 0 ||
 	    in6_setscope(&dst_in6, rt->rt_ifp, &outzone) != 0 ||
 	    inzone != outzone) {
 		IP6STAT_INC(ip6s_cantforward);
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 
 	if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) {
 		in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig);
 		if (mcopy) {
 			u_long mtu;
 #ifdef IPSEC
 			size_t ipsechdrsiz;
 #endif /* IPSEC */
 
 			mtu = IN6_LINKMTU(rt->rt_ifp);
 #ifdef IPSEC
 			/*
 			 * When we do IPsec tunnel ingress, we need to play
 			 * with the link value (decrement IPsec header size
 			 * from mtu value).  The code is much simpler than v4
 			 * case, as we have the outgoing interface for
 			 * encapsulated packet as "rt->rt_ifp".
 			 */
 			ipsechdrsiz = ipsec_hdrsiz(mcopy, IPSEC_DIR_OUTBOUND,
 			    NULL);
 			if (ipsechdrsiz < mtu)
 				mtu -= ipsechdrsiz;
 			/*
 			 * if mtu becomes less than minimum MTU,
 			 * tell minimum MTU (and I'll need to fragment it).
 			 */
 			if (mtu < IPV6_MMTU)
 				mtu = IPV6_MMTU;
 #endif /* IPSEC */
 			icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
 		}
 		goto bad;
 	}
 
 	if (rt->rt_flags & RTF_GATEWAY)
 		dst = (struct sockaddr_in6 *)rt->rt_gateway;
 
 	/*
 	 * If we are to forward the packet using the same interface
 	 * as one we got the packet from, perhaps we should send a redirect
 	 * to sender to shortcut a hop.
 	 * Only send redirect if source is sending directly to us,
 	 * and if packet was not source routed (or has any options).
 	 * Also, don't send redirect if forwarding using a route
 	 * modified by a redirect.
 	 */
 	if (V_ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt &&
 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) {
 		if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) {
 			/*
 			 * If the incoming interface is equal to the outgoing
 			 * one, and the link attached to the interface is
 			 * point-to-point, then it will be highly probable
 			 * that a routing loop occurs. Thus, we immediately
 			 * drop the packet and send an ICMPv6 error message.
 			 *
 			 * type/code is based on suggestion by Rich Draves.
 			 * not sure if it is the best pick.
 			 */
 			icmp6_error(mcopy, ICMP6_DST_UNREACH,
 				    ICMP6_DST_UNREACH_ADDR, 0);
 			goto bad;
 		}
 		type = ND_REDIRECT;
 	}
 
 	/*
 	 * Fake scoped addresses. Note that even link-local source or
 	 * destinaion can appear, if the originating node just sends the
 	 * packet to us (without address resolution for the destination).
 	 * Since both icmp6_error and icmp6_redirect_output fill the embedded
 	 * link identifiers, we can do this stuff after making a copy for
 	 * returning an error.
 	 */
 	if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
 		/*
 		 * See corresponding comments in ip6_output.
 		 * XXX: but is it possible that ip6_forward() sends a packet
 		 *      to a loopback interface? I don't think so, and thus
 		 *      I bark here. (jinmei@kame.net)
 		 * XXX: it is common to route invalid packets to loopback.
 		 *	also, the codepath will be visited on use of ::1 in
 		 *	rthdr. (itojun)
 		 */
 #if 1
 		if (0)
 #else
 		if ((rt->rt_flags & (RTF_BLACKHOLE|RTF_REJECT)) == 0)
 #endif
 		{
 			printf("ip6_forward: outgoing interface is loopback. "
 			       "src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
 			       ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			       ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			       ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif),
 			       if_name(rt->rt_ifp));
 		}
 
 		/* we can just use rcvif in forwarding. */
 		origifp = m->m_pkthdr.rcvif;
 	}
 	else
 		origifp = rt->rt_ifp;
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&V_inet6_pfil_hook))
 		goto pass;
 
 	odst = ip6->ip6_dst;
 	/* Run through list of hooks for output packets. */
 	error = pfil_run_hooks(&V_inet6_pfil_hook, &m, rt->rt_ifp, PFIL_OUT, NULL);
 	if (error != 0 || m == NULL)
 		goto freecopy;		/* consumed by filter */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* See if destination IP address was changed by packet filter. */
 	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip6_input(). */
-		if (in6_localip(&ip6->ip6_dst)) {
+		if (in6_localip(&ip6->ip6_dst))
 			m->m_flags |= M_FASTFWD_OURS;
-			if (m->m_pkthdr.rcvif == NULL)
-				m->m_pkthdr.rcvif = V_loif;
-			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
-				m->m_pkthdr.csum_flags |=
-				    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
-				m->m_pkthdr.csum_data = 0xffff;
-			}
-#ifdef SCTP
-			if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
-				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
-#endif
-			error = netisr_queue(NETISR_IPV6, m);
-			goto out;
-		} else
+		else
 			goto again;	/* Redo the routing table lookup. */
 	}
 
 	/* See if local, if yes, send it to netisr. */
 	if (m->m_flags & M_FASTFWD_OURS) {
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = V_loif;
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 			m->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
 			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 		error = netisr_queue(NETISR_IPV6, m);
 		goto out;
 	}
 	/* Or forward to some other address? */
 	if ((m->m_flags & M_IP6_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		dst = (struct sockaddr_in6 *)&rin6.ro_dst;
 		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in6));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m->m_flags &= ~M_IP6_NEXTHOP;
 		m_tag_delete(m, fwd_tag);
 		goto again2;
 	}
 
 pass:
 	error = nd6_output(rt->rt_ifp, origifp, m, dst, rt);
 	if (error) {
 		in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard);
 		IP6STAT_INC(ip6s_cantforward);
 	} else {
 		IP6STAT_INC(ip6s_forward);
 		in6_ifstat_inc(rt->rt_ifp, ifs6_out_forward);
 		if (type)
 			IP6STAT_INC(ip6s_redirectsent);
 		else {
 			if (mcopy)
 				goto freecopy;
 		}
 	}
 
 	if (mcopy == NULL)
 		goto out;
 	switch (error) {
 	case 0:
 		if (type == ND_REDIRECT) {
 			icmp6_redirect_output(mcopy, rt);
 			goto out;
 		}
 		goto freecopy;
 
 	case EMSGSIZE:
 		/* xxx MTU is constant in PPP? */
 		goto freecopy;
 
 	case ENOBUFS:
 		/* Tell source to slow down like source quench in IP? */
 		goto freecopy;
 
 	case ENETUNREACH:	/* shouldn't happen, checked above */
 	case EHOSTUNREACH:
 	case ENETDOWN:
 	case EHOSTDOWN:
 	default:
 		type = ICMP6_DST_UNREACH;
 		code = ICMP6_DST_UNREACH_ADDR;
 		break;
 	}
 	icmp6_error(mcopy, type, code, 0);
 	goto out;
 
  freecopy:
 	m_freem(mcopy);
 	goto out;
 bad:
 	m_freem(m);
 out:
 	if (rt != NULL)
 		RTFREE(rt);
 }
Index: projects/ifnet/sys/netinet6/ip6_mroute.c
===================================================================
--- projects/ifnet/sys/netinet6/ip6_mroute.c	(revision 281172)
+++ projects/ifnet/sys/netinet6/ip6_mroute.c	(revision 281173)
@@ -1,1951 +1,1976 @@
 /*-
  * Copyright (C) 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_mroute.c,v 1.58 2001/12/18 02:36:31 itojun Exp $
  */
 
 /*-
  * Copyright (c) 1989 Stephen Deering
  * Copyright (c) 1992, 1993
  *      The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Stephen Deering of Stanford University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_mroute.c	8.2 (Berkeley) 11/15/93
  *	BSDI ip_mroute.c,v 2.10 1996/11/14 00:29:52 jch Exp
  */
 
 /*
  * IP multicast forwarding procedures
  *
  * Written by David Waitzman, BBN Labs, August 1988.
  * Modified by Steve Deering, Stanford, February 1989.
  * Modified by Mark J. Steiglitz, Stanford, May, 1991
  * Modified by Van Jacobson, LBL, January 1993
  * Modified by Ajit Thyagarajan, PARC, August 1993
  * Modified by Bill Fenner, PARC, April 1994
  *
  * MROUTING Revision: 3.5.1.2 + PIM-SMv2 (pimd) Support
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/callout.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/raw_cb.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/icmp6.h>
 #include <netinet/ip_encap.h>
 
 #include <netinet/ip6.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/ip6_mroute.h>
 #include <netinet6/pim6.h>
 #include <netinet6/pim6_var.h>
 
 static MALLOC_DEFINE(M_MRTABLE6, "mf6c", "multicast forwarding cache entry");
 
 static int	ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *);
 static void	phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);
 static int	register_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);
 static int	set_pim6(int *);
 static int	socket_send(struct socket *, struct mbuf *,
 		    struct sockaddr_in6 *);
 
 extern int in6_mcast_loop;
 extern struct domain inet6domain;
 
 static const struct encaptab *pim6_encap_cookie;
 static const struct protosw in6_pim_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_PIM,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		pim6_input,
 	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_usrreqs =		&rip6_usrreqs
 };
 static int pim6_encapcheck(const struct mbuf *, int, int, void *);
 
 static VNET_DEFINE(int, ip6_mrouter_ver) = 0;
 #define	V_ip6_mrouter_ver	VNET(ip6_mrouter_ver)
 
 SYSCTL_DECL(_net_inet6);
 SYSCTL_DECL(_net_inet6_ip6);
 static SYSCTL_NODE(_net_inet6, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
 
 static struct mrt6stat mrt6stat;
 SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RW,
     &mrt6stat, mrt6stat,
     "Multicast Routing Statistics (struct mrt6stat, netinet6/ip6_mroute.h)");
 
 #define	MRT6STAT_INC(name)	mrt6stat.name += 1
 #define NO_RTE_FOUND	0x1
 #define RTE_FOUND	0x2
 
 static struct mtx mrouter6_mtx;
 #define	MROUTER6_LOCK()		mtx_lock(&mrouter6_mtx)
 #define	MROUTER6_UNLOCK()	mtx_unlock(&mrouter6_mtx)
 #define	MROUTER6_LOCK_ASSERT()	do {					\
 	mtx_assert(&mrouter6_mtx, MA_OWNED);				\
 	NET_ASSERT_GIANT();						\
 } while (0)
 #define	MROUTER6_LOCK_INIT()	\
 	mtx_init(&mrouter6_mtx, "IPv6 multicast forwarding", NULL, MTX_DEF)
 #define	MROUTER6_LOCK_DESTROY()	mtx_destroy(&mrouter6_mtx)
 
 static struct mf6c *mf6ctable[MF6CTBLSIZ];
 SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mf6ctable, CTLFLAG_RD,
     &mf6ctable, sizeof(mf6ctable), "S,*mf6ctable[MF6CTBLSIZ]",
     "IPv6 Multicast Forwarding Table (struct *mf6ctable[MF6CTBLSIZ], "
     "netinet6/ip6_mroute.h)");
 
 static struct mtx mfc6_mtx;
 #define	MFC6_LOCK()		mtx_lock(&mfc6_mtx)
 #define	MFC6_UNLOCK()		mtx_unlock(&mfc6_mtx)
 #define	MFC6_LOCK_ASSERT()	do {					\
 	mtx_assert(&mfc6_mtx, MA_OWNED);				\
 	NET_ASSERT_GIANT();						\
 } while (0)
 #define	MFC6_LOCK_INIT()		\
 	mtx_init(&mfc6_mtx, "IPv6 multicast forwarding cache", NULL, MTX_DEF)
 #define	MFC6_LOCK_DESTROY()	mtx_destroy(&mfc6_mtx)
 
 static u_char n6expire[MF6CTBLSIZ];
 
 static struct mif6 mif6table[MAXMIFS];
-SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mif6table, CTLFLAG_RD,
-    &mif6table, sizeof(mif6table), "S,mif6[MAXMIFS]",
-    "IPv6 Multicast Interfaces (struct mif6[MAXMIFS], netinet6/ip6_mroute.h)");
+static int
+sysctl_mif6table(SYSCTL_HANDLER_ARGS)
+{
+	struct mif6_sctl *out;
+	int error;
+
+	out = malloc(sizeof(struct mif6_sctl) * MAXMIFS, M_TEMP, M_WAITOK);
+	for (int i = 0; i < MAXMIFS; i++) {
+		out[i].m6_flags		= mif6table[i].m6_flags;
+		out[i].m6_rate_limit	= mif6table[i].m6_rate_limit;
+		out[i].m6_lcl_addr	= mif6table[i].m6_lcl_addr;
+		if (mif6table[i].m6_ifp != NULL)
+			out[i].m6_ifp	= mif6table[i].m6_ifp->if_index;
+		else
+			out[i].m6_ifp	= 0;
+		out[i].m6_pkt_in	= mif6table[i].m6_pkt_in;
+		out[i].m6_pkt_out	= mif6table[i].m6_pkt_out;
+		out[i].m6_bytes_in	= mif6table[i].m6_bytes_in;
+		out[i].m6_bytes_out	= mif6table[i].m6_bytes_out;
+	}
+	error = SYSCTL_OUT(req, out, sizeof(struct mif6_sctl) * MAXMIFS);
+	free(out, M_TEMP);
+	return (error);
+}
+SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, mif6table, CTLTYPE_OPAQUE | CTLFLAG_RD,
+    NULL, 0, sysctl_mif6table, "S,mif6_sctl[MAXMIFS]",
+    "IPv6 Multicast Interfaces (struct mif6_sctl[MAXMIFS], "
+    "netinet6/ip6_mroute.h)");
 
 static struct mtx mif6_mtx;
 #define	MIF6_LOCK()		mtx_lock(&mif6_mtx)
 #define	MIF6_UNLOCK()		mtx_unlock(&mif6_mtx)
 #define	MIF6_LOCK_ASSERT()	mtx_assert(&mif6_mtx, MA_OWNED)
 #define	MIF6_LOCK_INIT()	\
 	mtx_init(&mif6_mtx, "IPv6 multicast interfaces", NULL, MTX_DEF)
 #define	MIF6_LOCK_DESTROY()	mtx_destroy(&mif6_mtx)
 
 #ifdef MRT6DEBUG
 static VNET_DEFINE(u_int, mrt6debug) = 0;	/* debug level */
 #define	V_mrt6debug		VNET(mrt6debug)
 #define DEBUG_MFC	0x02
 #define DEBUG_FORWARD	0x04
 #define DEBUG_EXPIRE	0x08
 #define DEBUG_XMIT	0x10
 #define DEBUG_REG	0x20
 #define DEBUG_PIM	0x40
 #define	DEBUG_ERR	0x80
 #define	DEBUG_ANY	0x7f
 #define	MRT6_DLOG(m, fmt, ...)	\
 	if (V_mrt6debug & (m))	\
 		log(((m) & DEBUG_ERR) ? LOG_ERR: LOG_DEBUG, \
 		    "%s: " fmt "\n", __func__, ##__VA_ARGS__)
 #else
 #define	MRT6_DLOG(m, fmt, ...)
 #endif
 
 static void	expire_upcalls(void *);
 #define	EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second */
 #define	UPCALL_EXPIRE	6		/* number of timeouts */
 
 /*
  * XXX TODO: maintain a count to if_allmulti() calls in struct ifnet.
  */
 
 /*
  * 'Interfaces' associated with decapsulator (so we can tell
  * packets that went through it from ones that get reflected
  * by a broken gateway).  Different from IPv4 register_if,
  * these interfaces are linked into the system ifnet list,
  * because per-interface IPv6 statistics are maintained in
  * ifp->if_afdata.  But it does not have any routes point
  * to them.  I.e., packets can't be sent this way.  They
  * only exist as a placeholder for multicast source
  * verification.
  */
 static struct ifnet *multicast_register_if6;
 
 #define ENCAP_HOPS 64
 
 /*
  * Private variables.
  */
 static mifi_t nummifs = 0;
 static mifi_t reg_mif_num = (mifi_t)-1;
 
 static struct pim6stat pim6stat;
 SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_RW,
     &pim6stat, pim6stat,
     "PIM Statistics (struct pim6stat, netinet6/pim6_var.h)");
 
 #define	PIM6STAT_INC(name)	pim6stat.name += 1
 static VNET_DEFINE(int, pim6);
 #define	V_pim6		VNET(pim6)
 
 /*
  * Hash function for a source, group entry
  */
 #define MF6CHASH(a, g) MF6CHASHMOD((a).s6_addr32[0] ^ (a).s6_addr32[1] ^ \
 				   (a).s6_addr32[2] ^ (a).s6_addr32[3] ^ \
 				   (g).s6_addr32[0] ^ (g).s6_addr32[1] ^ \
 				   (g).s6_addr32[2] ^ (g).s6_addr32[3])
 
 /*
  * Find a route for a given origin IPv6 address and Multicast group address.
  */
 #define MF6CFIND(o, g, rt) do { \
 	struct mf6c *_rt = mf6ctable[MF6CHASH(o,g)]; \
 	rt = NULL; \
 	while (_rt) { \
 		if (IN6_ARE_ADDR_EQUAL(&_rt->mf6c_origin.sin6_addr, &(o)) && \
 		    IN6_ARE_ADDR_EQUAL(&_rt->mf6c_mcastgrp.sin6_addr, &(g)) && \
 		    (_rt->mf6c_stall == NULL)) { \
 			rt = _rt; \
 			break; \
 		} \
 		_rt = _rt->mf6c_next; \
 	} \
 	if (rt == NULL) { \
 		MRT6STAT_INC(mrt6s_mfc_misses); \
 	} \
 } while (/*CONSTCOND*/ 0)
 
 /*
  * Macros to compute elapsed time efficiently
  * Borrowed from Van Jacobson's scheduling code
  * XXX: replace with timersub() ?
  */
 #define TV_DELTA(a, b, delta) do { \
 	    int xxs; \
 		\
 	    delta = (a).tv_usec - (b).tv_usec; \
 	    if ((xxs = (a).tv_sec - (b).tv_sec)) { \
 	       switch (xxs) { \
 		      case 2: \
 			  delta += 1000000; \
 			      /* FALLTHROUGH */ \
 		      case 1: \
 			  delta += 1000000; \
 			  break; \
 		      default: \
 			  delta += (1000000 * xxs); \
 	       } \
 	    } \
 } while (/*CONSTCOND*/ 0)
 
 /* XXX: replace with timercmp(a, b, <) ? */
 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
 	      (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
 
 #ifdef UPCALL_TIMING
 #define UPCALL_MAX	50
 static u_long upcall_data[UPCALL_MAX + 1];
 static void collate();
 #endif /* UPCALL_TIMING */
 
 static int ip6_mrouter_init(struct socket *, int, int);
 static int add_m6fc(struct mf6cctl *);
 static int add_m6if(struct mif6ctl *);
 static int del_m6fc(struct mf6cctl *);
 static int del_m6if(mifi_t *);
 static int del_m6if_locked(mifi_t *);
 static int get_mif6_cnt(struct sioc_mif_req6 *);
 static int get_sg_cnt(struct sioc_sg_req6 *);
 
 static struct callout expire_upcalls_ch;
 
 int X_ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *);
 int X_ip6_mrouter_done(void);
 int X_ip6_mrouter_set(struct socket *, struct sockopt *);
 int X_ip6_mrouter_get(struct socket *, struct sockopt *);
 int X_mrt6_ioctl(u_long, caddr_t);
 
 /*
  * Handle MRT setsockopt commands to modify the multicast routing tables.
  */
 int
 X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0;
 	int optval;
 	struct mif6ctl mifc;
 	struct mf6cctl mfcc;
 	mifi_t mifi;
 
 	if (so != V_ip6_mrouter && sopt->sopt_name != MRT6_INIT)
 		return (EPERM);
 
 	switch (sopt->sopt_name) {
 	case MRT6_INIT:
 #ifdef MRT6_OINIT
 	case MRT6_OINIT:
 #endif
 		error = sooptcopyin(sopt, &optval, sizeof(optval),
 		    sizeof(optval));
 		if (error)
 			break;
 		error = ip6_mrouter_init(so, optval, sopt->sopt_name);
 		break;
 	case MRT6_DONE:
 		error = X_ip6_mrouter_done();
 		break;
 	case MRT6_ADD_MIF:
 		error = sooptcopyin(sopt, &mifc, sizeof(mifc), sizeof(mifc));
 		if (error)
 			break;
 		error = add_m6if(&mifc);
 		break;
 	case MRT6_ADD_MFC:
 		error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc));
 		if (error)
 			break;
 		error = add_m6fc(&mfcc);
 		break;
 	case MRT6_DEL_MFC:
 		error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc));
 		if (error)
 			break;
 		error = del_m6fc(&mfcc);
 		break;
 	case MRT6_DEL_MIF:
 		error = sooptcopyin(sopt, &mifi, sizeof(mifi), sizeof(mifi));
 		if (error)
 			break;
 		error = del_m6if(&mifi);
 		break;
 	case MRT6_PIM:
 		error = sooptcopyin(sopt, &optval, sizeof(optval),
 		    sizeof(optval));
 		if (error)
 			break;
 		error = set_pim6(&optval);
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Handle MRT getsockopt commands
  */
 int
 X_ip6_mrouter_get(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0;
 
 	if (so != V_ip6_mrouter)
 		return (EACCES);
 
 	switch (sopt->sopt_name) {
 		case MRT6_PIM:
 			error = sooptcopyout(sopt, &V_pim6, sizeof(V_pim6));
 			break;
 	}
 	return (error);
 }
 
 /*
  * Handle ioctl commands to obtain information from the cache
  */
 int
 X_mrt6_ioctl(u_long cmd, caddr_t data)
 {
 	int ret;
 
 	ret = EINVAL;
 
 	switch (cmd) {
 	case SIOCGETSGCNT_IN6:
 		ret = get_sg_cnt((struct sioc_sg_req6 *)data);
 		break;
 
 	case SIOCGETMIFCNT_IN6:
 		ret = get_mif6_cnt((struct sioc_mif_req6 *)data);
 		break;
 
 	default:
 		break;
 	}
 
 	return (ret);
 }
 
 /*
  * returns the packet, byte, rpf-failure count for the source group provided
  */
 static int
 get_sg_cnt(struct sioc_sg_req6 *req)
 {
 	struct mf6c *rt;
 	int ret;
 
 	ret = 0;
 
 	MFC6_LOCK();
 
 	MF6CFIND(req->src.sin6_addr, req->grp.sin6_addr, rt);
 	if (rt == NULL) {
 		ret = ESRCH;
 	} else {
 		req->pktcnt = rt->mf6c_pkt_cnt;
 		req->bytecnt = rt->mf6c_byte_cnt;
 		req->wrong_if = rt->mf6c_wrong_if;
 	}
 
 	MFC6_UNLOCK();
 
 	return (ret);
 }
 
 /*
  * returns the input and output packet and byte counts on the mif provided
  */
 static int
 get_mif6_cnt(struct sioc_mif_req6 *req)
 {
 	mifi_t mifi;
 	int ret;
 
 	ret = 0;
 	mifi = req->mifi;
 
 	MIF6_LOCK();
 
 	if (mifi >= nummifs) {
 		ret = EINVAL;
 	} else {
 		req->icount = mif6table[mifi].m6_pkt_in;
 		req->ocount = mif6table[mifi].m6_pkt_out;
 		req->ibytes = mif6table[mifi].m6_bytes_in;
 		req->obytes = mif6table[mifi].m6_bytes_out;
 	}
 
 	MIF6_UNLOCK();
 
 	return (ret);
 }
 
 static int
 set_pim6(int *i)
 {
 	if ((*i != 1) && (*i != 0))
 		return (EINVAL);
 
 	V_pim6 = *i;
 
 	return (0);
 }
 
 /*
  * Enable multicast routing
  */
 static int
 ip6_mrouter_init(struct socket *so, int v, int cmd)
 {
 
 	MRT6_DLOG(DEBUG_ANY, "so_type = %d, pr_protocol = %d",
 	    so->so_type, so->so_proto->pr_protocol);
 
 	if (so->so_type != SOCK_RAW ||
 	    so->so_proto->pr_protocol != IPPROTO_ICMPV6)
 		return (EOPNOTSUPP);
 
 	if (v != 1)
 		return (ENOPROTOOPT);
 
 	MROUTER6_LOCK();
 
 	if (V_ip6_mrouter != NULL) {
 		MROUTER6_UNLOCK();
 		return (EADDRINUSE);
 	}
 
 	V_ip6_mrouter = so;
 	V_ip6_mrouter_ver = cmd;
 
 	bzero((caddr_t)mf6ctable, sizeof(mf6ctable));
 	bzero((caddr_t)n6expire, sizeof(n6expire));
 
 	V_pim6 = 0;/* used for stubbing out/in pim stuff */
 
 	callout_init(&expire_upcalls_ch, 0);
 	callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
 	    expire_upcalls, NULL);
 
 	MROUTER6_UNLOCK();
 	MRT6_DLOG(DEBUG_ANY, "finished");
 
 	return (0);
 }
 
 /*
  * Disable IPv6 multicast forwarding.
  */
 int
 X_ip6_mrouter_done(void)
 {
 	mifi_t mifi;
 	u_long i;
 	struct mf6c *rt;
 	struct rtdetq *rte;
 
 	MROUTER6_LOCK();
 
 	if (V_ip6_mrouter == NULL) {
 		MROUTER6_UNLOCK();
 		return (EINVAL);
 	}
 
 	/*
 	 * For each phyint in use, disable promiscuous reception of all IPv6
 	 * multicasts.
 	 */
 	for (mifi = 0; mifi < nummifs; mifi++) {
 		if (mif6table[mifi].m6_ifp &&
 		    !(mif6table[mifi].m6_flags & MIFF_REGISTER)) {
 			if_allmulti(mif6table[mifi].m6_ifp, 0);
 		}
 	}
 	bzero((caddr_t)mif6table, sizeof(mif6table));
 	nummifs = 0;
 
 	V_pim6 = 0; /* used to stub out/in pim specific code */
 
 	callout_stop(&expire_upcalls_ch);
 
 	/*
 	 * Free all multicast forwarding cache entries.
 	 */
 	MFC6_LOCK();
 	for (i = 0; i < MF6CTBLSIZ; i++) {
 		rt = mf6ctable[i];
 		while (rt) {
 			struct mf6c *frt;
 
 			for (rte = rt->mf6c_stall; rte != NULL; ) {
 				struct rtdetq *n = rte->next;
 
 				m_freem(rte->m);
 				free(rte, M_MRTABLE6);
 				rte = n;
 			}
 			frt = rt;
 			rt = rt->mf6c_next;
 			free(frt, M_MRTABLE6);
 		}
 	}
 	bzero((caddr_t)mf6ctable, sizeof(mf6ctable));
 	MFC6_UNLOCK();
 
 	/*
 	 * Reset register interface
 	 */
 	if (reg_mif_num != (mifi_t)-1 && multicast_register_if6 != NULL) {
 		if_detach(multicast_register_if6);
 		if_free(multicast_register_if6);
 		reg_mif_num = (mifi_t)-1;
 		multicast_register_if6 = NULL;
 	}
 
 	V_ip6_mrouter = NULL;
 	V_ip6_mrouter_ver = 0;
 
 	MROUTER6_UNLOCK();
 	MRT6_DLOG(DEBUG_ANY, "finished");
 
 	return (0);
 }
 
 static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 };
 
 /*
  * Add a mif to the mif table
  */
 static int
 add_m6if(struct mif6ctl *mifcp)
 {
 	struct mif6 *mifp;
 	struct ifnet *ifp;
 	int error;
 
 	MIF6_LOCK();
 
 	if (mifcp->mif6c_mifi >= MAXMIFS) {
 		MIF6_UNLOCK();
 		return (EINVAL);
 	}
 	mifp = mif6table + mifcp->mif6c_mifi;
 	if (mifp->m6_ifp != NULL) {
 		MIF6_UNLOCK();
 		return (EADDRINUSE); /* XXX: is it appropriate? */
 	}
 	if (mifcp->mif6c_pifi == 0 || mifcp->mif6c_pifi > V_if_index) {
 		MIF6_UNLOCK();
 		return (ENXIO);
 	}
 
 	ifp = ifnet_byindex(mifcp->mif6c_pifi);
 
 	if (mifcp->mif6c_flags & MIFF_REGISTER) {
 		if (reg_mif_num == (mifi_t)-1) {
 			ifp = if_alloc(IFT_OTHER);
 
 			if_initname(ifp, "register_mif", 0);
 			ifp->if_flags |= IFF_LOOPBACK;
 			if_attach(ifp);
 			multicast_register_if6 = ifp;
 			reg_mif_num = mifcp->mif6c_mifi;
 			/*
 			 * it is impossible to guess the ifindex of the
 			 * register interface.  So mif6c_pifi is automatically
 			 * calculated.
 			 */
 			mifcp->mif6c_pifi = ifp->if_index;
 		} else {
 			ifp = multicast_register_if6;
 		}
 	} else {
 		/* Make sure the interface supports multicast */
 		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 			MIF6_UNLOCK();
 			return (EOPNOTSUPP);
 		}
 
 		error = if_allmulti(ifp, 1);
 		if (error) {
 			MIF6_UNLOCK();
 			return (error);
 		}
 	}
 
 	mifp->m6_flags     = mifcp->mif6c_flags;
 	mifp->m6_ifp       = ifp;
 
 	/* initialize per mif pkt counters */
 	mifp->m6_pkt_in    = 0;
 	mifp->m6_pkt_out   = 0;
 	mifp->m6_bytes_in  = 0;
 	mifp->m6_bytes_out = 0;
 
 	/* Adjust nummifs up if the mifi is higher than nummifs */
 	if (nummifs <= mifcp->mif6c_mifi)
 		nummifs = mifcp->mif6c_mifi + 1;
 
 	MIF6_UNLOCK();
 	MRT6_DLOG(DEBUG_ANY, "mif #%d, phyint %s", mifcp->mif6c_mifi,
 	    if_name(ifp));
 
 	return (0);
 }
 
 /*
  * Delete a mif from the mif table
  */
 static int
 del_m6if_locked(mifi_t *mifip)
 {
 	struct mif6 *mifp = mif6table + *mifip;
 	mifi_t mifi;
 	struct ifnet *ifp;
 
 	MIF6_LOCK_ASSERT();
 
 	if (*mifip >= nummifs)
 		return (EINVAL);
 	if (mifp->m6_ifp == NULL)
 		return (EINVAL);
 
 	if (!(mifp->m6_flags & MIFF_REGISTER)) {
 		/* XXX: TODO: Maintain an ALLMULTI refcount in struct ifnet. */
 		ifp = mifp->m6_ifp;
 		if_allmulti(ifp, 0);
 	} else {
 		if (reg_mif_num != (mifi_t)-1 &&
 		    multicast_register_if6 != NULL) {
 			if_detach(multicast_register_if6);
 			if_free(multicast_register_if6);
 			reg_mif_num = (mifi_t)-1;
 			multicast_register_if6 = NULL;
 		}
 	}
 
 	bzero((caddr_t)mifp, sizeof(*mifp));
 
 	/* Adjust nummifs down */
 	for (mifi = nummifs; mifi > 0; mifi--)
 		if (mif6table[mifi - 1].m6_ifp)
 			break;
 	nummifs = mifi;
 	MRT6_DLOG(DEBUG_ANY, "mif %d, nummifs %d", *mifip, nummifs);
 
 	return (0);
 }
 
 static int
 del_m6if(mifi_t *mifip)
 {
 	int cc;
 
 	MIF6_LOCK();
 	cc = del_m6if_locked(mifip);
 	MIF6_UNLOCK();
 
 	return (cc);
 }
 
 /*
  * Add an mfc entry
  */
 static int
 add_m6fc(struct mf6cctl *mfccp)
 {
 	struct mf6c *rt;
 	u_long hash;
 	struct rtdetq *rte;
 	u_short nstl;
 	char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
 
 	MFC6_LOCK();
 
 	MF6CFIND(mfccp->mf6cc_origin.sin6_addr,
 		 mfccp->mf6cc_mcastgrp.sin6_addr, rt);
 
 	/* If an entry already exists, just update the fields */
 	if (rt) {
 		MRT6_DLOG(DEBUG_MFC, "no upcall o %s g %s p %x",
 		    ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
 		    ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
 		    mfccp->mf6cc_parent);
 
 		rt->mf6c_parent = mfccp->mf6cc_parent;
 		rt->mf6c_ifset = mfccp->mf6cc_ifset;
 
 		MFC6_UNLOCK();
 		return (0);
 	}
 
 	/*
 	 * Find the entry for which the upcall was made and update
 	 */
 	hash = MF6CHASH(mfccp->mf6cc_origin.sin6_addr,
 			mfccp->mf6cc_mcastgrp.sin6_addr);
 	for (rt = mf6ctable[hash], nstl = 0; rt; rt = rt->mf6c_next) {
 		if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr,
 				       &mfccp->mf6cc_origin.sin6_addr) &&
 		    IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr,
 				       &mfccp->mf6cc_mcastgrp.sin6_addr) &&
 		    (rt->mf6c_stall != NULL)) {
 
 			if (nstl++)
 				log(LOG_ERR,
 				    "add_m6fc: %s o %s g %s p %x dbx %p\n",
 				    "multiple kernel entries",
 				    ip6_sprintf(ip6bufo,
 					    &mfccp->mf6cc_origin.sin6_addr),
 				    ip6_sprintf(ip6bufg,
 					    &mfccp->mf6cc_mcastgrp.sin6_addr),
 				    mfccp->mf6cc_parent, rt->mf6c_stall);
 
 			MRT6_DLOG(DEBUG_MFC, "o %s g %s p %x dbg %p",
 			    ip6_sprintf(ip6bufo,
 			    &mfccp->mf6cc_origin.sin6_addr),
 			    ip6_sprintf(ip6bufg,
 				&mfccp->mf6cc_mcastgrp.sin6_addr),
 			    mfccp->mf6cc_parent, rt->mf6c_stall);
 
 			rt->mf6c_origin     = mfccp->mf6cc_origin;
 			rt->mf6c_mcastgrp   = mfccp->mf6cc_mcastgrp;
 			rt->mf6c_parent     = mfccp->mf6cc_parent;
 			rt->mf6c_ifset	    = mfccp->mf6cc_ifset;
 			/* initialize pkt counters per src-grp */
 			rt->mf6c_pkt_cnt    = 0;
 			rt->mf6c_byte_cnt   = 0;
 			rt->mf6c_wrong_if   = 0;
 
 			rt->mf6c_expire = 0;	/* Don't clean this guy up */
 			n6expire[hash]--;
 
 			/* free packets Qed at the end of this entry */
 			for (rte = rt->mf6c_stall; rte != NULL; ) {
 				struct rtdetq *n = rte->next;
 				ip6_mdq(rte->m, rte->ifp, rt);
 				m_freem(rte->m);
 #ifdef UPCALL_TIMING
 				collate(&(rte->t));
 #endif /* UPCALL_TIMING */
 				free(rte, M_MRTABLE6);
 				rte = n;
 			}
 			rt->mf6c_stall = NULL;
 		}
 	}
 
 	/*
 	 * It is possible that an entry is being inserted without an upcall
 	 */
 	if (nstl == 0) {
 		MRT6_DLOG(DEBUG_MFC, "no upcall h %lu o %s g %s p %x", hash,
 		    ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
 		    ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
 		    mfccp->mf6cc_parent);
 
 		for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
 
 			if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr,
 					       &mfccp->mf6cc_origin.sin6_addr)&&
 			    IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr,
 					       &mfccp->mf6cc_mcastgrp.sin6_addr)) {
 
 				rt->mf6c_origin     = mfccp->mf6cc_origin;
 				rt->mf6c_mcastgrp   = mfccp->mf6cc_mcastgrp;
 				rt->mf6c_parent     = mfccp->mf6cc_parent;
 				rt->mf6c_ifset	    = mfccp->mf6cc_ifset;
 				/* initialize pkt counters per src-grp */
 				rt->mf6c_pkt_cnt    = 0;
 				rt->mf6c_byte_cnt   = 0;
 				rt->mf6c_wrong_if   = 0;
 
 				if (rt->mf6c_expire)
 					n6expire[hash]--;
 				rt->mf6c_expire	   = 0;
 			}
 		}
 		if (rt == NULL) {
 			/* no upcall, so make a new entry */
 			rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6,
 						  M_NOWAIT);
 			if (rt == NULL) {
 				MFC6_UNLOCK();
 				return (ENOBUFS);
 			}
 
 			/* insert new entry at head of hash chain */
 			rt->mf6c_origin     = mfccp->mf6cc_origin;
 			rt->mf6c_mcastgrp   = mfccp->mf6cc_mcastgrp;
 			rt->mf6c_parent     = mfccp->mf6cc_parent;
 			rt->mf6c_ifset	    = mfccp->mf6cc_ifset;
 			/* initialize pkt counters per src-grp */
 			rt->mf6c_pkt_cnt    = 0;
 			rt->mf6c_byte_cnt   = 0;
 			rt->mf6c_wrong_if   = 0;
 			rt->mf6c_expire     = 0;
 			rt->mf6c_stall = NULL;
 
 			/* link into table */
 			rt->mf6c_next  = mf6ctable[hash];
 			mf6ctable[hash] = rt;
 		}
 	}
 
 	MFC6_UNLOCK();
 	return (0);
 }
 
 #ifdef UPCALL_TIMING
 /*
  * collect delay statistics on the upcalls
  */
 static void
 collate(struct timeval *t)
 {
 	u_long d;
 	struct timeval tp;
 	u_long delta;
 
 	GET_TIME(tp);
 
 	if (TV_LT(*t, tp))
 	{
 		TV_DELTA(tp, *t, delta);
 
 		d = delta >> 10;
 		if (d > UPCALL_MAX)
 			d = UPCALL_MAX;
 
 		++upcall_data[d];
 	}
 }
 #endif /* UPCALL_TIMING */
 
 /*
  * Delete an mfc entry
  */
 static int
 del_m6fc(struct mf6cctl *mfccp)
 {
 #ifdef MRT6DEBUG
 	char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
 #endif
 	struct sockaddr_in6	origin;
 	struct sockaddr_in6	mcastgrp;
 	struct mf6c		*rt;
 	struct mf6c		**nptr;
 	u_long		hash;
 
 	origin = mfccp->mf6cc_origin;
 	mcastgrp = mfccp->mf6cc_mcastgrp;
 	hash = MF6CHASH(origin.sin6_addr, mcastgrp.sin6_addr);
 
 	MRT6_DLOG(DEBUG_MFC, "orig %s mcastgrp %s",
 	    ip6_sprintf(ip6bufo, &origin.sin6_addr),
 	    ip6_sprintf(ip6bufg, &mcastgrp.sin6_addr));
 
 	MFC6_LOCK();
 
 	nptr = &mf6ctable[hash];
 	while ((rt = *nptr) != NULL) {
 		if (IN6_ARE_ADDR_EQUAL(&origin.sin6_addr,
 				       &rt->mf6c_origin.sin6_addr) &&
 		    IN6_ARE_ADDR_EQUAL(&mcastgrp.sin6_addr,
 				       &rt->mf6c_mcastgrp.sin6_addr) &&
 		    rt->mf6c_stall == NULL)
 			break;
 
 		nptr = &rt->mf6c_next;
 	}
 	if (rt == NULL) {
 		MFC6_UNLOCK();
 		return (EADDRNOTAVAIL);
 	}
 
 	*nptr = rt->mf6c_next;
 	free(rt, M_MRTABLE6);
 
 	MFC6_UNLOCK();
 
 	return (0);
 }
 
 static int
 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in6 *src)
 {
 
 	if (s) {
 		if (sbappendaddr(&s->so_rcv,
 				 (struct sockaddr *)src,
 				 mm, (struct mbuf *)0) != 0) {
 			sorwakeup(s);
 			return (0);
 		}
 	}
 	m_freem(mm);
 	return (-1);
 }
 
 /*
  * IPv6 multicast forwarding function. This function assumes that the packet
  * pointed to by "ip6" has arrived on (or is about to be sent to) the interface
  * pointed to by "ifp", and the packet is to be relayed to other networks
  * that have members of the packet's destination IPv6 multicast group.
  *
  * The packet is returned unscathed to the caller, unless it is
  * erroneous, in which case a non-zero return value tells the caller to
  * discard it.
  *
  * NOTE: this implementation assumes that m->m_pkthdr.rcvif is NULL iff
  * this function is called in the originating context (i.e., not when
  * forwarding a packet from other node).  ip6_output(), which is currently the
  * only function that calls this function is called in the originating context,
  * explicitly ensures this condition.  It is caller's responsibility to ensure
  * that if this function is called from somewhere else in the originating
  * context in the future.
  */
 int
 X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
 {
 	struct rtdetq *rte;
 	struct mbuf *mb0;
 	struct mf6c *rt;
 	struct mif6 *mifp;
 	struct mbuf *mm;
 	u_long hash;
 	mifi_t mifi;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 #ifdef UPCALL_TIMING
 	struct timeval tp;
 
 	GET_TIME(tp);
 #endif /* UPCALL_TIMING */
 
 	MRT6_DLOG(DEBUG_FORWARD, "src %s, dst %s, ifindex %d",
 	    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 	    ip6_sprintf(ip6bufd, &ip6->ip6_dst), ifp->if_index);
 
 	/*
 	 * Don't forward a packet with Hop limit of zero or one,
 	 * or a packet destined to a local-only group.
 	 */
 	if (ip6->ip6_hlim <= 1 || IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) ||
 	    IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
 		return (0);
 	ip6->ip6_hlim--;
 
 	/*
 	 * Source address check: do not forward packets with unspecified
 	 * source. It was discussed in July 2000, on ipngwg mailing list.
 	 * This is rather more serious than unicast cases, because some
 	 * MLD packets can be sent with the unspecified source address
 	 * (although such packets must normally set 1 to the hop limit field).
 	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
 		IP6STAT_INC(ip6s_cantforward);
 		if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
 			V_ip6_log_time = time_uptime;
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "from %s to %s nxt %d received on %s\n",
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ip6->ip6_nxt,
 			    if_name(m->m_pkthdr.rcvif));
 		}
 		return (0);
 	}
 
 	MFC6_LOCK();
 
 	/*
 	 * Determine forwarding mifs from the forwarding cache table
 	 */
 	MF6CFIND(ip6->ip6_src, ip6->ip6_dst, rt);
 	MRT6STAT_INC(mrt6s_mfc_lookups);
 
 	/* Entry exists, so forward if necessary */
 	if (rt) {
 		MFC6_UNLOCK();
 		return (ip6_mdq(m, ifp, rt));
 	}
 
 	/*
 	 * If we don't have a route for packet's origin,
 	 * Make a copy of the packet & send message to routing daemon.
 	 */
 	MRT6STAT_INC(mrt6s_no_route);
 	MRT6_DLOG(DEBUG_FORWARD | DEBUG_MFC, "no rte s %s g %s",
 	    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 	    ip6_sprintf(ip6bufd, &ip6->ip6_dst));
 
 	/*
 	 * Allocate mbufs early so that we don't do extra work if we
 	 * are just going to fail anyway.
 	 */
 	rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE6, M_NOWAIT);
 	if (rte == NULL) {
 		MFC6_UNLOCK();
 		return (ENOBUFS);
 	}
 	mb0 = m_copy(m, 0, M_COPYALL);
 	/*
 	 * Pullup packet header if needed before storing it,
 	 * as other references may modify it in the meantime.
 	 */
 	if (mb0 && (!M_WRITABLE(mb0) || mb0->m_len < sizeof(struct ip6_hdr)))
 		mb0 = m_pullup(mb0, sizeof(struct ip6_hdr));
 	if (mb0 == NULL) {
 		free(rte, M_MRTABLE6);
 		MFC6_UNLOCK();
 		return (ENOBUFS);
 	}
 
 	/* is there an upcall waiting for this packet? */
 	hash = MF6CHASH(ip6->ip6_src, ip6->ip6_dst);
 	for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
 		if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src,
 		    &rt->mf6c_origin.sin6_addr) &&
 		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
 		    &rt->mf6c_mcastgrp.sin6_addr) && (rt->mf6c_stall != NULL))
 			break;
 	}
 
 	if (rt == NULL) {
 		struct mrt6msg *im;
 #ifdef MRT6_OINIT
 		struct omrt6msg *oim;
 #endif
 		/* no upcall, so make a new entry */
 		rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6, M_NOWAIT);
 		if (rt == NULL) {
 			free(rte, M_MRTABLE6);
 			m_freem(mb0);
 			MFC6_UNLOCK();
 			return (ENOBUFS);
 		}
 		/*
 		 * Make a copy of the header to send to the user
 		 * level process
 		 */
 		mm = m_copy(mb0, 0, sizeof(struct ip6_hdr));
 		if (mm == NULL) {
 			free(rte, M_MRTABLE6);
 			m_freem(mb0);
 			free(rt, M_MRTABLE6);
 			MFC6_UNLOCK();
 			return (ENOBUFS);
 		}
 
 		/*
 		 * Send message to routing daemon
 		 */
 		sin6.sin6_addr = ip6->ip6_src;
 		im = NULL;
 #ifdef MRT6_OINIT
 		oim = NULL;
 #endif
 		switch (V_ip6_mrouter_ver) {
 #ifdef MRT6_OINIT
 		case MRT6_OINIT:
 			oim = mtod(mm, struct omrt6msg *);
 			oim->im6_msgtype = MRT6MSG_NOCACHE;
 			oim->im6_mbz = 0;
 			break;
 #endif
 		case MRT6_INIT:
 			im = mtod(mm, struct mrt6msg *);
 			im->im6_msgtype = MRT6MSG_NOCACHE;
 			im->im6_mbz = 0;
 			break;
 		default:
 			free(rte, M_MRTABLE6);
 			m_freem(mb0);
 			free(rt, M_MRTABLE6);
 			MFC6_UNLOCK();
 			return (EINVAL);
 		}
 
 		MRT6_DLOG(DEBUG_FORWARD, "getting the iif info in the kernel");
 		for (mifp = mif6table, mifi = 0;
 		    mifi < nummifs && mifp->m6_ifp != ifp; mifp++, mifi++)
 				;
 
 		switch (V_ip6_mrouter_ver) {
 #ifdef MRT6_OINIT
 		case MRT6_OINIT:
 			oim->im6_mif = mifi;
 			break;
 #endif
 		case MRT6_INIT:
 			im->im6_mif = mifi;
 			break;
 		}
 
 		if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
 			log(LOG_WARNING, "ip6_mforward: ip6_mrouter "
 			    "socket queue full\n");
 			MRT6STAT_INC(mrt6s_upq_sockfull);
 			free(rte, M_MRTABLE6);
 			m_freem(mb0);
 			free(rt, M_MRTABLE6);
 			MFC6_UNLOCK();
 			return (ENOBUFS);
 		}
 
 		MRT6STAT_INC(mrt6s_upcalls);
 
 		/* insert new entry at head of hash chain */
 		bzero(rt, sizeof(*rt));
 		rt->mf6c_origin.sin6_family = AF_INET6;
 		rt->mf6c_origin.sin6_len = sizeof(struct sockaddr_in6);
 		rt->mf6c_origin.sin6_addr = ip6->ip6_src;
 		rt->mf6c_mcastgrp.sin6_family = AF_INET6;
 		rt->mf6c_mcastgrp.sin6_len = sizeof(struct sockaddr_in6);
 		rt->mf6c_mcastgrp.sin6_addr = ip6->ip6_dst;
 		rt->mf6c_expire = UPCALL_EXPIRE;
 		n6expire[hash]++;
 		rt->mf6c_parent = MF6C_INCOMPLETE_PARENT;
 
 		/* link into table */
 		rt->mf6c_next  = mf6ctable[hash];
 		mf6ctable[hash] = rt;
 		/* Add this entry to the end of the queue */
 		rt->mf6c_stall = rte;
 	} else {
 		/* determine if q has overflowed */
 		struct rtdetq **p;
 		int npkts = 0;
 
 		for (p = &rt->mf6c_stall; *p != NULL; p = &(*p)->next)
 			if (++npkts > MAX_UPQ6) {
 				MRT6STAT_INC(mrt6s_upq_ovflw);
 				free(rte, M_MRTABLE6);
 				m_freem(mb0);
 				MFC6_UNLOCK();
 				return (0);
 			}
 
 		/* Add this entry to the end of the queue */
 		*p = rte;
 	}
 
 	rte->next = NULL;
 	rte->m = mb0;
 	rte->ifp = ifp;
 #ifdef UPCALL_TIMING
 	rte->t = tp;
 #endif /* UPCALL_TIMING */
 
 	MFC6_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Clean up cache entries if upcalls are not serviced
  * Call from the Slow Timeout mechanism, every half second.
  */
 static void
 expire_upcalls(void *unused)
 {
 #ifdef MRT6DEBUG
 	char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
 #endif
 	struct rtdetq *rte;
 	struct mf6c *mfc, **nptr;
 	u_long i;
 
 	MFC6_LOCK();
 	for (i = 0; i < MF6CTBLSIZ; i++) {
 		if (n6expire[i] == 0)
 			continue;
 		nptr = &mf6ctable[i];
 		while ((mfc = *nptr) != NULL) {
 			rte = mfc->mf6c_stall;
 			/*
 			 * Skip real cache entries
 			 * Make sure it wasn't marked to not expire (shouldn't happen)
 			 * If it expires now
 			 */
 			if (rte != NULL &&
 			    mfc->mf6c_expire != 0 &&
 			    --mfc->mf6c_expire == 0) {
 				MRT6_DLOG(DEBUG_EXPIRE, "expiring (%s %s)",
 				    ip6_sprintf(ip6bufo, &mfc->mf6c_origin.sin6_addr),
 				    ip6_sprintf(ip6bufg, &mfc->mf6c_mcastgrp.sin6_addr));
 				/*
 				 * drop all the packets
 				 * free the mbuf with the pkt, if, timing info
 				 */
 				do {
 					struct rtdetq *n = rte->next;
 					m_freem(rte->m);
 					free(rte, M_MRTABLE6);
 					rte = n;
 				} while (rte != NULL);
 				MRT6STAT_INC(mrt6s_cache_cleanups);
 				n6expire[i]--;
 
 				*nptr = mfc->mf6c_next;
 				free(mfc, M_MRTABLE6);
 			} else {
 				nptr = &mfc->mf6c_next;
 			}
 		}
 	}
 	MFC6_UNLOCK();
 	callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
 	    expire_upcalls, NULL);
 }
 
 /*
  * Packet forwarding routine once entry in the cache is made
  */
 static int
 ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct mf6c *rt)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	mifi_t mifi, iif;
 	struct mif6 *mifp;
 	int plen = m->m_pkthdr.len;
 	struct in6_addr src0, dst0; /* copies for local work */
 	u_int32_t iszone, idzone, oszone, odzone;
 	int error = 0;
 
 /*
  * Macro to send packet on mif.  Since RSVP packets don't get counted on
  * input, they shouldn't get counted on output, so statistics keeping is
  * separate.
  */
 
 #define MC6_SEND(ip6, mifp, m) do {				\
 	if ((mifp)->m6_flags & MIFF_REGISTER)			\
 		register_send((ip6), (mifp), (m));		\
 	else							\
 		phyint_send((ip6), (mifp), (m));		\
 } while (/*CONSTCOND*/ 0)
 
 	/*
 	 * Don't forward if it didn't arrive from the parent mif
 	 * for its origin.
 	 */
 	mifi = rt->mf6c_parent;
 	if ((mifi >= nummifs) || (mif6table[mifi].m6_ifp != ifp)) {
 		/* came in the wrong interface */
 		MRT6_DLOG(DEBUG_FORWARD,
 		    "wrong if: ifid %d mifi %d mififid %x", ifp->if_index,
 		    mifi, mif6table[mifi].m6_ifp->if_index);
 		MRT6STAT_INC(mrt6s_wrong_if);
 		rt->mf6c_wrong_if++;
 		/*
 		 * If we are doing PIM processing, and we are forwarding
 		 * packets on this interface, send a message to the
 		 * routing daemon.
 		 */
 		/* have to make sure this is a valid mif */
 		if (mifi < nummifs && mif6table[mifi].m6_ifp)
 			if (V_pim6 && (m->m_flags & M_LOOP) == 0) {
 				/*
 				 * Check the M_LOOP flag to avoid an
 				 * unnecessary PIM assert.
 				 * XXX: M_LOOP is an ad-hoc hack...
 				 */
 				static struct sockaddr_in6 sin6 =
 				{ sizeof(sin6), AF_INET6 };
 
 				struct mbuf *mm;
 				struct mrt6msg *im;
 #ifdef MRT6_OINIT
 				struct omrt6msg *oim;
 #endif
 
 				mm = m_copy(m, 0, sizeof(struct ip6_hdr));
 				if (mm &&
 				    (!M_WRITABLE(mm) ||
 				     mm->m_len < sizeof(struct ip6_hdr)))
 					mm = m_pullup(mm, sizeof(struct ip6_hdr));
 				if (mm == NULL)
 					return (ENOBUFS);
 
 #ifdef MRT6_OINIT
 				oim = NULL;
 #endif
 				im = NULL;
 				switch (V_ip6_mrouter_ver) {
 #ifdef MRT6_OINIT
 				case MRT6_OINIT:
 					oim = mtod(mm, struct omrt6msg *);
 					oim->im6_msgtype = MRT6MSG_WRONGMIF;
 					oim->im6_mbz = 0;
 					break;
 #endif
 				case MRT6_INIT:
 					im = mtod(mm, struct mrt6msg *);
 					im->im6_msgtype = MRT6MSG_WRONGMIF;
 					im->im6_mbz = 0;
 					break;
 				default:
 					m_freem(mm);
 					return (EINVAL);
 				}
 
 				for (mifp = mif6table, iif = 0;
 				     iif < nummifs && mifp &&
 					     mifp->m6_ifp != ifp;
 				     mifp++, iif++)
 					;
 
 				switch (V_ip6_mrouter_ver) {
 #ifdef MRT6_OINIT
 				case MRT6_OINIT:
 					oim->im6_mif = iif;
 					sin6.sin6_addr = oim->im6_src;
 					break;
 #endif
 				case MRT6_INIT:
 					im->im6_mif = iif;
 					sin6.sin6_addr = im->im6_src;
 					break;
 				}
 
 				MRT6STAT_INC(mrt6s_upcalls);
 
 				if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
 					MRT6_DLOG(DEBUG_ANY,
 					    "ip6_mrouter socket queue full");
 					MRT6STAT_INC(mrt6s_upq_sockfull);
 					return (ENOBUFS);
 				}	/* if socket Q full */
 			}		/* if PIM */
 		return (0);
 	}			/* if wrong iif */
 
 	/* If I sourced this packet, it counts as output, else it was input. */
 	if (m->m_pkthdr.rcvif == NULL) {
 		/* XXX: is rcvif really NULL when output?? */
 		mif6table[mifi].m6_pkt_out++;
 		mif6table[mifi].m6_bytes_out += plen;
 	} else {
 		mif6table[mifi].m6_pkt_in++;
 		mif6table[mifi].m6_bytes_in += plen;
 	}
 	rt->mf6c_pkt_cnt++;
 	rt->mf6c_byte_cnt += plen;
 
 	/*
 	 * For each mif, forward a copy of the packet if there are group
 	 * members downstream on the interface.
 	 */
 	src0 = ip6->ip6_src;
 	dst0 = ip6->ip6_dst;
 	if ((error = in6_setscope(&src0, ifp, &iszone)) != 0 ||
 	    (error = in6_setscope(&dst0, ifp, &idzone)) != 0) {
 		IP6STAT_INC(ip6s_badscope);
 		return (error);
 	}
 	for (mifp = mif6table, mifi = 0; mifi < nummifs; mifp++, mifi++) {
 		if (IF_ISSET(mifi, &rt->mf6c_ifset)) {
 			/*
 			 * check if the outgoing packet is going to break
 			 * a scope boundary.
 			 * XXX For packets through PIM register tunnel
 			 * interface, we believe a routing daemon.
 			 */
 			if (!(mif6table[rt->mf6c_parent].m6_flags &
 			      MIFF_REGISTER) &&
 			    !(mif6table[mifi].m6_flags & MIFF_REGISTER)) {
 				if (in6_setscope(&src0, mif6table[mifi].m6_ifp,
 				    &oszone) ||
 				    in6_setscope(&dst0, mif6table[mifi].m6_ifp,
 				    &odzone) ||
 				    iszone != oszone ||
 				    idzone != odzone) {
 					IP6STAT_INC(ip6s_badscope);
 					continue;
 				}
 			}
 
 			mifp->m6_pkt_out++;
 			mifp->m6_bytes_out += plen;
 			MC6_SEND(ip6, mifp, m);
 		}
 	}
 	return (0);
 }
 
 static void
 phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
 {
 #ifdef MRT6DEBUG
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 #endif
 	struct mbuf *mb_copy;
 	struct ifnet *ifp = mifp->m6_ifp;
 	int error = 0;
 	u_long linkmtu;
 
 	/*
 	 * Make a new reference to the packet; make sure that
 	 * the IPv6 header is actually copied, not just referenced,
 	 * so that ip6_output() only scribbles on the copy.
 	 */
 	mb_copy = m_copy(m, 0, M_COPYALL);
 	if (mb_copy &&
 	    (!M_WRITABLE(mb_copy) || mb_copy->m_len < sizeof(struct ip6_hdr)))
 		mb_copy = m_pullup(mb_copy, sizeof(struct ip6_hdr));
 	if (mb_copy == NULL) {
 		return;
 	}
 	/* set MCAST flag to the outgoing packet */
 	mb_copy->m_flags |= M_MCAST;
 
 	/*
 	 * If we sourced the packet, call ip6_output since we may devide
 	 * the packet into fragments when the packet is too big for the
 	 * outgoing interface.
 	 * Otherwise, we can simply send the packet to the interface
 	 * sending queue.
 	 */
 	if (m->m_pkthdr.rcvif == NULL) {
 		struct ip6_moptions im6o;
 
 		im6o.im6o_multicast_ifp = ifp;
 		/* XXX: ip6_output will override ip6->ip6_hlim */
 		im6o.im6o_multicast_hlim = ip6->ip6_hlim;
 		im6o.im6o_multicast_loop = 1;
 		error = ip6_output(mb_copy, NULL, NULL, IPV6_FORWARDING, &im6o,
 		    NULL, NULL);
 
 		MRT6_DLOG(DEBUG_XMIT, "mif %u err %d",
 		    (uint16_t)(mifp - mif6table), error);
 		return;
 	}
 
 	/*
 	 * If configured to loop back multicasts by default,
 	 * loop back a copy now.
 	 */
 	if (in6_mcast_loop) {
 		struct sockaddr_in6 dst6;
 
 		bzero(&dst6, sizeof(dst6));
 		dst6.sin6_len = sizeof(struct sockaddr_in6);
 		dst6.sin6_family = AF_INET6;
 		dst6.sin6_addr = ip6->ip6_dst;
 		ip6_mloopback(ifp, m, &dst6);
 	}
 
 	/*
 	 * Put the packet into the sending queue of the outgoing interface
 	 * if it would fit in the MTU of the interface.
 	 */
 	linkmtu = IN6_LINKMTU(ifp);
 	if (mb_copy->m_pkthdr.len <= linkmtu || linkmtu < IPV6_MMTU) {
 		struct sockaddr_in6 dst6;
 
 		bzero(&dst6, sizeof(dst6));
 		dst6.sin6_len = sizeof(struct sockaddr_in6);
 		dst6.sin6_family = AF_INET6;
 		dst6.sin6_addr = ip6->ip6_dst;
 
 		IP_PROBE(send, NULL, NULL, ip6, ifp, NULL, ip6);
 		/*
 		 * We just call if_output instead of nd6_output here, since
 		 * we need no ND for a multicast forwarded packet...right?
 		 */
 		m_clrprotoflags(m);	/* Avoid confusing lower layers. */
 		error = (*ifp->if_output)(ifp, mb_copy,
 		    (struct sockaddr *)&dst6, NULL);
 		MRT6_DLOG(DEBUG_XMIT, "mif %u err %d",
 		    (uint16_t)(mifp - mif6table), error);
 	} else {
 		/*
 		 * pMTU discovery is intentionally disabled by default, since
 		 * various router may notify pMTU in multicast, which can be
 		 * a DDoS to a router
 		 */
 		if (V_ip6_mcast_pmtu)
 			icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, linkmtu);
 		else {
 			MRT6_DLOG(DEBUG_XMIT, " packet too big on %s o %s "
 			    "g %s size %d (discarded)", if_name(ifp),
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    mb_copy->m_pkthdr.len);
 			m_freem(mb_copy); /* simply discard the packet */
 		}
 	}
 }
 
 static int
 register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
 {
 #ifdef MRT6DEBUG
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 #endif
 	struct mbuf *mm;
 	int i, len = m->m_pkthdr.len;
 	static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 };
 	struct mrt6msg *im6;
 
 	MRT6_DLOG(DEBUG_ANY, "src %s dst %s",
 	    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 	    ip6_sprintf(ip6bufd, &ip6->ip6_dst));
 	PIM6STAT_INC(pim6s_snd_registers);
 
 	/* Make a copy of the packet to send to the user level process. */
 	mm = m_gethdr(M_NOWAIT, MT_DATA);
 	if (mm == NULL)
 		return (ENOBUFS);
 	mm->m_data += max_linkhdr;
 	mm->m_len = sizeof(struct ip6_hdr);
 
 	if ((mm->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
 		m_freem(mm);
 		return (ENOBUFS);
 	}
 	i = MHLEN - M_LEADINGSPACE(mm);
 	if (i > len)
 		i = len;
 	mm = m_pullup(mm, i);
 	if (mm == NULL)
 		return (ENOBUFS);
 /* TODO: check it! */
 	mm->m_pkthdr.len = len + sizeof(struct ip6_hdr);
 
 	/*
 	 * Send message to routing daemon
 	 */
 	sin6.sin6_addr = ip6->ip6_src;
 
 	im6 = mtod(mm, struct mrt6msg *);
 	im6->im6_msgtype      = MRT6MSG_WHOLEPKT;
 	im6->im6_mbz          = 0;
 
 	im6->im6_mif = mif - mif6table;
 
 	/* iif info is not given for reg. encap.n */
 	MRT6STAT_INC(mrt6s_upcalls);
 
 	if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
 		MRT6_DLOG(DEBUG_ANY, "ip6_mrouter socket queue full");
 		MRT6STAT_INC(mrt6s_upq_sockfull);
 		return (ENOBUFS);
 	}
 	return (0);
 }
 
 /*
  * pim6_encapcheck() is called by the encap6_input() path at runtime to
  * determine if a packet is for PIM; allowing PIM to be dynamically loaded
  * into the kernel.
  */
 static int
 pim6_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 
 #ifdef DIAGNOSTIC
     KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
 #endif
     if (proto != IPPROTO_PIM)
 	return 0;	/* not for us; reject the datagram. */
 
     return 64;		/* claim the datagram. */
 }
 
 /*
  * PIM sparse mode hook
  * Receives the pim control messages, and passes them up to the listening
  * socket, using rip6_input.
  * The only message processed is the REGISTER pim message; the pim header
  * is stripped off, and the inner packet is passed to register_mforward.
  */
 int
 pim6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct pim *pim; /* pointer to a pim struct */
 	struct ip6_hdr *ip6;
 	int pimlen;
 	struct mbuf *m = *mp;
 	int minlen;
 	int off = *offp;
 
 	PIM6STAT_INC(pim6s_rcv_total);
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	pimlen = m->m_pkthdr.len - *offp;
 
 	/*
 	 * Validate lengths
 	 */
 	if (pimlen < PIM_MINLEN) {
 		PIM6STAT_INC(pim6s_rcv_tooshort);
 		MRT6_DLOG(DEBUG_PIM, "PIM packet too short");
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	/*
 	 * if the packet is at least as big as a REGISTER, go ahead
 	 * and grab the PIM REGISTER header size, to avoid another
 	 * possible m_pullup() later.
 	 *
 	 * PIM_MINLEN       == pimhdr + u_int32 == 8
 	 * PIM6_REG_MINLEN   == pimhdr + reghdr + eip6hdr == 4 + 4 + 40
 	 */
 	minlen = (pimlen >= PIM6_REG_MINLEN) ? PIM6_REG_MINLEN : PIM_MINLEN;
 
 	/*
 	 * Make sure that the IP6 and PIM headers in contiguous memory, and
 	 * possibly the PIM REGISTER header
 	 */
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, minlen, IPPROTO_DONE);
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* adjust mbuf to point to the PIM header */
 	pim = (struct pim *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(pim, struct pim *, m, off, minlen);
 	if (pim == NULL) {
 		PIM6STAT_INC(pim6s_rcv_tooshort);
 		return (IPPROTO_DONE);
 	}
 #endif
 
 #define PIM6_CHECKSUM
 #ifdef PIM6_CHECKSUM
 	{
 		int cksumlen;
 
 		/*
 		 * Validate checksum.
 		 * If PIM REGISTER, exclude the data packet
 		 */
 		if (pim->pim_type == PIM_REGISTER)
 			cksumlen = PIM_MINLEN;
 		else
 			cksumlen = pimlen;
 
 		if (in6_cksum(m, IPPROTO_PIM, off, cksumlen)) {
 			PIM6STAT_INC(pim6s_rcv_badsum);
 			MRT6_DLOG(DEBUG_PIM, "invalid checksum");
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	}
 #endif /* PIM_CHECKSUM */
 
 	/* PIM version check */
 	if (pim->pim_ver != PIM_VERSION) {
 		PIM6STAT_INC(pim6s_rcv_badversion);
 		MRT6_DLOG(DEBUG_ANY | DEBUG_ERR,
 		    "incorrect version %d, expecting %d",
 		    pim->pim_ver, PIM_VERSION);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	if (pim->pim_type == PIM_REGISTER) {
 		/*
 		 * since this is a REGISTER, we'll make a copy of the register
 		 * headers ip6+pim+u_int32_t+encap_ip6, to be passed up to the
 		 * routing daemon.
 		 */
 		static struct sockaddr_in6 dst = { sizeof(dst), AF_INET6 };
 
 		struct mbuf *mcp;
 		struct ip6_hdr *eip6;
 		u_int32_t *reghdr;
 		int rc;
 #ifdef MRT6DEBUG
 		char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 #endif
 
 		PIM6STAT_INC(pim6s_rcv_registers);
 
 		if ((reg_mif_num >= nummifs) || (reg_mif_num == (mifi_t) -1)) {
 			MRT6_DLOG(DEBUG_PIM, "register mif not set: %d",
 			    reg_mif_num);
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 
 		reghdr = (u_int32_t *)(pim + 1);
 
 		if ((ntohl(*reghdr) & PIM_NULL_REGISTER))
 			goto pim6_input_to_daemon;
 
 		/*
 		 * Validate length
 		 */
 		if (pimlen < PIM6_REG_MINLEN) {
 			PIM6STAT_INC(pim6s_rcv_tooshort);
 			PIM6STAT_INC(pim6s_rcv_badregisters);
 			MRT6_DLOG(DEBUG_ANY | DEBUG_ERR, "register packet "
 			    "size too small %d from %s",
 			    pimlen, ip6_sprintf(ip6bufs, &ip6->ip6_src));
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 
 		eip6 = (struct ip6_hdr *) (reghdr + 1);
 		MRT6_DLOG(DEBUG_PIM, "eip6: %s -> %s, eip6 plen %d",
 		    ip6_sprintf(ip6bufs, &eip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &eip6->ip6_dst),
 		    ntohs(eip6->ip6_plen));
 
 		/* verify the version number of the inner packet */
 		if ((eip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 			PIM6STAT_INC(pim6s_rcv_badregisters);
 			MRT6_DLOG(DEBUG_ANY, "invalid IP version (%d) "
 			    "of the inner packet",
 			    (eip6->ip6_vfc & IPV6_VERSION));
 			m_freem(m);
 			return (IPPROTO_NONE);
 		}
 
 		/* verify the inner packet is destined to a mcast group */
 		if (!IN6_IS_ADDR_MULTICAST(&eip6->ip6_dst)) {
 			PIM6STAT_INC(pim6s_rcv_badregisters);
 			MRT6_DLOG(DEBUG_PIM, "inner packet of register "
 			    "is not multicast %s",
 			    ip6_sprintf(ip6bufd, &eip6->ip6_dst));
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 
 		/*
 		 * make a copy of the whole header to pass to the daemon later.
 		 */
 		mcp = m_copy(m, 0, off + PIM6_REG_MINLEN);
 		if (mcp == NULL) {
 			MRT6_DLOG(DEBUG_ANY | DEBUG_ERR, "pim register: "
 			    "could not copy register head");
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 
 		/*
 		 * forward the inner ip6 packet; point m_data at the inner ip6.
 		 */
 		m_adj(m, off + PIM_MINLEN);
 		MRT6_DLOG(DEBUG_PIM, "forwarding decapsulated register: "
 		    "src %s, dst %s, mif %d",
 		    ip6_sprintf(ip6bufs, &eip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &eip6->ip6_dst), reg_mif_num);
 
 		rc = if_simloop(mif6table[reg_mif_num].m6_ifp, m,
 				dst.sin6_family, 0);
 
 		/* prepare the register head to send to the mrouting daemon */
 		m = mcp;
 	}
 
 	/*
 	 * Pass the PIM message up to the daemon; if it is a register message
 	 * pass the 'head' only up to the daemon. This includes the
 	 * encapsulator ip6 header, pim header, register header and the
 	 * encapsulated ip6 header.
 	 */
   pim6_input_to_daemon:
 	rip6_input(&m, offp, proto);
 	return (IPPROTO_DONE);
 }
 
 static int
 ip6_mroute_modevent(module_t mod, int type, void *unused)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		MROUTER6_LOCK_INIT();
 		MFC6_LOCK_INIT();
 		MIF6_LOCK_INIT();
 
 		pim6_encap_cookie = encap_attach_func(AF_INET6, IPPROTO_PIM,
 			pim6_encapcheck,
 			(const struct protosw *)&in6_pim_protosw, NULL);
 		if (pim6_encap_cookie == NULL) {
 			printf("ip6_mroute: unable to attach pim6 encap\n");
 			MIF6_LOCK_DESTROY();
 			MFC6_LOCK_DESTROY();
 			MROUTER6_LOCK_DESTROY();
 			return (EINVAL);
 		}
 
 		ip6_mforward = X_ip6_mforward;
 		ip6_mrouter_done = X_ip6_mrouter_done;
 		ip6_mrouter_get = X_ip6_mrouter_get;
 		ip6_mrouter_set = X_ip6_mrouter_set;
 		mrt6_ioctl = X_mrt6_ioctl;
 		break;
 
 	case MOD_UNLOAD:
 		if (V_ip6_mrouter != NULL)
 			return EINVAL;
 
 		if (pim6_encap_cookie) {
 			encap_detach(pim6_encap_cookie);
 			pim6_encap_cookie = NULL;
 		}
 		X_ip6_mrouter_done();
 		ip6_mforward = NULL;
 		ip6_mrouter_done = NULL;
 		ip6_mrouter_get = NULL;
 		ip6_mrouter_set = NULL;
 		mrt6_ioctl = NULL;
 
 		MIF6_LOCK_DESTROY();
 		MFC6_LOCK_DESTROY();
 		MROUTER6_LOCK_DESTROY();
 		break;
 
 	default:
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 static moduledata_t ip6_mroutemod = {
 	"ip6_mroute",
 	ip6_mroute_modevent,
 	0
 };
 
 DECLARE_MODULE(ip6_mroute, ip6_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY);
Index: projects/ifnet/sys/netinet6/ip6_mroute.h
===================================================================
--- projects/ifnet/sys/netinet6/ip6_mroute.h	(revision 281172)
+++ projects/ifnet/sys/netinet6/ip6_mroute.h	(revision 281173)
@@ -1,265 +1,279 @@
 /*-
  * Copyright (C) 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_mroute.h,v 1.19 2001/06/14 06:12:55 suz Exp $
  * $FreeBSD$
  */
 
 /*	BSDI ip_mroute.h,v 2.5 1996/10/11 16:01:48 pjd Exp	*/
 
 /*
  * Definitions for IP multicast forwarding.
  *
  * Written by David Waitzman, BBN Labs, August 1988.
  * Modified by Steve Deering, Stanford, February 1989.
  * Modified by Ajit Thyagarajan, PARC, August 1993.
  * Modified by Ajit Thyagarajan, PARC, August 1994.
  * Modified by Ahmed Helmy, USC, September 1996.
  *
  * MROUTING Revision: 1.2
  */
 
 #ifndef _NETINET6_IP6_MROUTE_H_
 #define _NETINET6_IP6_MROUTE_H_
 
 /*
  * Multicast Routing set/getsockopt commands.
  */
 #ifdef _KERNEL
 #define MRT6_OINIT		100	/* initialize forwarder (omrt6msg) */
 #endif
 #define MRT6_DONE		101	/* shut down forwarder */
 #define MRT6_ADD_MIF		102	/* add multicast interface */
 #define MRT6_DEL_MIF		103	/* delete multicast interface */
 #define MRT6_ADD_MFC		104	/* insert forwarding cache entry */
 #define MRT6_DEL_MFC		105	/* delete forwarding cache entry */
 #define MRT6_PIM                107     /* enable pim code */
 #define MRT6_INIT		108	/* initialize forwarder (mrt6msg) */
 
 #if BSD >= 199103
 #define GET_TIME(t)	microtime(&t)
 #elif defined(sun)
 #define GET_TIME(t)	uniqtime(&t)
 #else
 #define GET_TIME(t)	((t) = time)
 #endif
 
 /*
  * Types and macros for handling bitmaps with one bit per multicast interface.
  */
 typedef u_short mifi_t;		/* type of a mif index */
 #define MAXMIFS		64
 
 #ifndef	IF_SETSIZE
 #define	IF_SETSIZE	256
 #endif
 
 typedef	uint32_t	mrt6_ifmask;
 #define	NIFBITS	(sizeof(mrt6_ifmask) * NBBY)	/* bits per mask */
 
 #ifndef howmany
 #define	howmany(x, y)	(((x) + ((y) - 1)) / (y))
 #endif
 
 typedef	struct mrt6_ifset {
 	mrt6_ifmask	ifs_bits[howmany(IF_SETSIZE, NIFBITS)];
 } mrt6_ifset;
 
 #define	IF_SET(n, p)	((p)->ifs_bits[(n)/NIFBITS] |= (1 << ((n) % NIFBITS)))
 #define	IF_CLR(n, p)	((p)->ifs_bits[(n)/NIFBITS] &= ~(1 << ((n) % NIFBITS)))
 #define	IF_ISSET(n, p)	((p)->ifs_bits[(n)/NIFBITS] & (1 << ((n) % NIFBITS)))
 #define	IF_COPY(f, t)	bcopy(f, t, sizeof(*(f)))
 #define	IF_ZERO(p)	bzero(p, sizeof(*(p)))
 
 /*
  * Argument structure for MRT6_ADD_IF.
  */
 struct mif6ctl {
 	mifi_t	    mif6c_mifi;		/* the index of the mif to be added  */
 	u_char	    mif6c_flags;	/* MIFF_ flags defined below         */
 	u_short	    mif6c_pifi;		/* the index of the physical IF */
 };
 
 #define	MIFF_REGISTER	0x1	/* mif represents a register end-point */
 
 /*
  * Argument structure for MRT6_ADD_MFC and MRT6_DEL_MFC
  */
 struct mf6cctl {
 	struct sockaddr_in6 mf6cc_origin;	/* IPv6 origin of mcasts */
 	struct sockaddr_in6 mf6cc_mcastgrp; /* multicast group associated */
 	mifi_t		mf6cc_parent;	/* incoming ifindex */
 	struct mrt6_ifset mf6cc_ifset;	/* set of forwarding ifs */
 };
 
 /*
  * The kernel's multicast routing statistics.
  */
 struct mrt6stat {
 	uint64_t mrt6s_mfc_lookups;	/* # forw. cache hash table hits   */
 	uint64_t mrt6s_mfc_misses;	/* # forw. cache hash table misses */
 	uint64_t mrt6s_upcalls;		/* # calls to multicast routing daemon */
 	uint64_t mrt6s_no_route;	/* no route for packet's origin    */
 	uint64_t mrt6s_bad_tunnel;	/* malformed tunnel options        */
 	uint64_t mrt6s_cant_tunnel;	/* no room for tunnel options      */
 	uint64_t mrt6s_wrong_if;	/* arrived on wrong interface	   */
 	uint64_t mrt6s_upq_ovflw;	/* upcall Q overflow		   */
 	uint64_t mrt6s_cache_cleanups;	/* # entries with no upcalls	   */
 	uint64_t mrt6s_drop_sel;	/* pkts dropped selectively        */
 	uint64_t mrt6s_q_overflow;	/* pkts dropped - Q overflow       */
 	uint64_t mrt6s_pkt2large;	/* pkts dropped - size > BKT SIZE  */
 	uint64_t mrt6s_upq_sockfull;	/* upcalls dropped - socket full   */
 };
 
 #ifdef MRT6_OINIT
 /*
  * Struct used to communicate from kernel to multicast router
  * note the convenient similarity to an IPv6 header.
  * XXX old version, superseded by mrt6msg.
  */
 struct omrt6msg {
 	u_long	    unused1;
 	u_char	    im6_msgtype;		/* what type of message	    */
 	u_char	    im6_mbz;			/* must be zero		    */
 	u_char	    im6_mif;			/* mif rec'd on		    */
 	u_char	    unused2;
 	struct in6_addr  im6_src, im6_dst;
 };
 #endif
 
 /*
  * Structure used to communicate from kernel to multicast router.
  * We'll overlay the structure onto an MLD header (not an IPv6 header
  * like igmpmsg{} used for IPv4 implementation). This is because this
  * structure will be passed via an IPv6 raw socket, on which an application
  * will only receive the payload i.e. the data after the IPv6 header and all
  * the extension headers. (see Section 3 of RFC3542)
  */
 struct mrt6msg {
 #define MRT6MSG_NOCACHE		1
 #define MRT6MSG_WRONGMIF	2
 #define MRT6MSG_WHOLEPKT	3		/* used for user level encap*/
 	u_char	    im6_mbz;			/* must be zero		    */
 	u_char	    im6_msgtype;		/* what type of message	    */
 	u_int16_t   im6_mif;			/* mif rec'd on		    */
 	u_int32_t   im6_pad;			/* padding for 64bit arch   */
 	struct in6_addr  im6_src, im6_dst;
 };
 
 /*
  * Argument structure used by multicast routing daemon to get src-grp
  * packet counts
  */
 struct sioc_sg_req6 {
 	struct sockaddr_in6 src;
 	struct sockaddr_in6 grp;
 	u_quad_t pktcnt;
 	u_quad_t bytecnt;
 	u_quad_t wrong_if;
 };
 
 /*
  * Argument structure used by mrouted to get mif pkt counts
  */
 struct sioc_mif_req6 {
 	mifi_t mifi;		/* mif number				*/
 	u_quad_t icount;	/* Input packet count on mif		*/
 	u_quad_t ocount;	/* Output packet count on mif		*/
 	u_quad_t ibytes;	/* Input byte count on mif		*/
 	u_quad_t obytes;	/* Output byte count on mif		*/
 };
 
+/*
+ * Structure to export 'struct mif6' to userland via sysctl.
+ */
+struct mif6_sctl {
+	u_char		m6_flags;	/* MIFF_ flags defined above         */
+	u_int		m6_rate_limit;	/* max rate			     */
+	struct in6_addr	m6_lcl_addr;	/* local interface address           */
+	uint32_t	m6_ifp;		/* interface index	             */
+	u_quad_t	m6_pkt_in;	/* # pkts in on interface            */
+	u_quad_t	m6_pkt_out;	/* # pkts out on interface           */
+	u_quad_t	m6_bytes_in;	/* # bytes in on interface	     */
+	u_quad_t	m6_bytes_out;	/* # bytes out on interface	     */
+};
+
 #if defined(_KERNEL) || defined(KERNEL)
 /*
  * The kernel's multicast-interface structure.
  */
 struct mif6 {
         u_char		m6_flags;	/* MIFF_ flags defined above         */
 	u_int		m6_rate_limit;	/* max rate			     */
 	struct in6_addr	m6_lcl_addr;	/* local interface address           */
 	struct ifnet    *m6_ifp;	/* pointer to interface              */
 	u_quad_t	m6_pkt_in;	/* # pkts in on interface            */
 	u_quad_t	m6_pkt_out;	/* # pkts out on interface           */
 	u_quad_t	m6_bytes_in;	/* # bytes in on interface	     */
 	u_quad_t	m6_bytes_out;	/* # bytes out on interface	     */
 #ifdef notyet
 	u_int		m6_rsvp_on;	/* RSVP listening on this vif */
 	struct socket   *m6_rsvpd;	/* RSVP daemon socket */
 #endif
 };
 
 /*
  * The kernel's multicast forwarding cache entry structure
  */
 struct mf6c {
 	struct sockaddr_in6  mf6c_origin;	/* IPv6 origin of mcasts     */
 	struct sockaddr_in6  mf6c_mcastgrp;	/* multicast group associated*/
 	mifi_t		 mf6c_parent;		/* incoming IF               */
 	struct mrt6_ifset mf6c_ifset;		/* set of outgoing IFs */
 
 	u_quad_t	mf6c_pkt_cnt;		/* pkt count for src-grp     */
 	u_quad_t	mf6c_byte_cnt;		/* byte count for src-grp    */
 	u_quad_t	mf6c_wrong_if;		/* wrong if for src-grp	     */
 	int		mf6c_expire;		/* time to clean entry up    */
 	struct timeval  mf6c_last_assert;	/* last time I sent an assert*/
 	struct rtdetq  *mf6c_stall;		/* pkts waiting for route */
 	struct mf6c    *mf6c_next;		/* hash table linkage */
 };
 
 #define MF6C_INCOMPLETE_PARENT ((mifi_t)-1)
 
 /*
  * Argument structure used for pkt info. while upcall is made
  */
 #ifndef _NETINET_IP_MROUTE_H_
 struct rtdetq {		/* XXX: rtdetq is also defined in ip_mroute.h */
     struct mbuf		*m;		/* A copy of the packet		    */
     struct ifnet	*ifp;		/* Interface pkt came in on	    */
 #ifdef UPCALL_TIMING
     struct timeval	t;		/* Timestamp */
 #endif /* UPCALL_TIMING */
     struct rtdetq	*next;
 };
 #endif /* _NETINET_IP_MROUTE_H_ */
 
 #define MF6CTBLSIZ	256
 #if (MF6CTBLSIZ & (MF6CTBLSIZ - 1)) == 0	  /* from sys:route.h */
 #define MF6CHASHMOD(h)	((h) & (MF6CTBLSIZ - 1))
 #else
 #define MF6CHASHMOD(h)	((h) % MF6CTBLSIZ)
 #endif
 
 #define MAX_UPQ6	4		/* max. no of pkts in upcall Q */
 
 extern int	(*ip6_mrouter_set)(struct socket *so, struct sockopt *sopt);
 extern int	(*ip6_mrouter_get)(struct socket *so, struct sockopt *sopt);
 extern int	(*ip6_mrouter_done)(void);
 extern int	(*mrt6_ioctl)(u_long, caddr_t);
 #endif /* _KERNEL */
 
 #endif /* !_NETINET6_IP6_MROUTE_H_ */
Index: projects/ifnet/sys/netpfil/pf/pf_norm.c
===================================================================
--- projects/ifnet/sys/netpfil/pf/pf_norm.c	(revision 281172)
+++ projects/ifnet/sys/netpfil/pf/pf_norm.c	(revision 281173)
@@ -1,2293 +1,2294 @@
 /*-
  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
  * Copyright 2011 Alexander Bluhm <bluhm@openbsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *	$OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_pf.h"
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 
 #include <net/if.h>
 #include <net/vnet.h>
 #include <net/pfvar.h>
 #include <net/if_pflog.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif /* INET6 */
 
 struct pf_frent {
 	TAILQ_ENTRY(pf_frent)	fr_next;
 	struct mbuf	*fe_m;
 	uint16_t	fe_hdrlen;	/* ipv4 header lenght with ip options
 					   ipv6, extension, fragment header */
 	uint16_t	fe_extoff;	/* last extension header offset or 0 */
 	uint16_t	fe_len;		/* fragment length */
 	uint16_t	fe_off;		/* fragment offset */
 	uint16_t	fe_mff;		/* more fragment flag */
 };
 
 struct pf_fragment_cmp {
 	struct pf_addr	frc_src;
 	struct pf_addr	frc_dst;
 	uint32_t	frc_id;
 	sa_family_t	frc_af;
 	uint8_t		frc_proto;
 	uint8_t		frc_direction;
 };
 
 struct pf_fragment {
 	struct pf_fragment_cmp	fr_key;
 #define fr_src	fr_key.frc_src
 #define fr_dst	fr_key.frc_dst
 #define fr_id	fr_key.frc_id
 #define fr_af	fr_key.frc_af
 #define fr_proto	fr_key.frc_proto
 #define fr_direction	fr_key.frc_direction
 
 	RB_ENTRY(pf_fragment) fr_entry;
 	TAILQ_ENTRY(pf_fragment) frag_next;
 	uint8_t		fr_flags;	/* status flags */
 #define PFFRAG_SEENLAST		0x0001	/* Seen the last fragment for this */
 #define PFFRAG_NOBUFFER		0x0002	/* Non-buffering fragment cache */
 #define PFFRAG_DROP		0x0004	/* Drop all fragments */
 #define BUFFER_FRAGMENTS(fr)	(!((fr)->fr_flags & PFFRAG_NOBUFFER))
 	uint16_t	fr_max;		/* fragment data max */
 	uint32_t	fr_timeout;
 	uint16_t	fr_maxlen;	/* maximum length of single fragment */
 	TAILQ_HEAD(pf_fragq, pf_frent) fr_queue;
 };
 
 struct pf_fragment_tag {
 	uint16_t	ft_hdrlen;	/* header length of reassembled pkt */
 	uint16_t	ft_extoff;	/* last extension header offset or 0 */
 	uint16_t	ft_maxlen;	/* maximum fragment payload length */
 	uint32_t	ft_id;		/* fragment id */
 };
 
 static struct mtx pf_frag_mtx;
 #define PF_FRAG_LOCK()		mtx_lock(&pf_frag_mtx)
 #define PF_FRAG_UNLOCK()	mtx_unlock(&pf_frag_mtx)
 #define PF_FRAG_ASSERT()	mtx_assert(&pf_frag_mtx, MA_OWNED)
 
 VNET_DEFINE(uma_zone_t, pf_state_scrub_z);	/* XXX: shared with pfsync */
 
 static VNET_DEFINE(uma_zone_t, pf_frent_z);
 #define	V_pf_frent_z	VNET(pf_frent_z)
 static VNET_DEFINE(uma_zone_t, pf_frag_z);
 #define	V_pf_frag_z	VNET(pf_frag_z)
 
 TAILQ_HEAD(pf_fragqueue, pf_fragment);
 TAILQ_HEAD(pf_cachequeue, pf_fragment);
 static VNET_DEFINE(struct pf_fragqueue,	pf_fragqueue);
 #define	V_pf_fragqueue			VNET(pf_fragqueue)
 static VNET_DEFINE(struct pf_cachequeue,	pf_cachequeue);
 #define	V_pf_cachequeue			VNET(pf_cachequeue)
 RB_HEAD(pf_frag_tree, pf_fragment);
 static VNET_DEFINE(struct pf_frag_tree,	pf_frag_tree);
 #define	V_pf_frag_tree			VNET(pf_frag_tree)
 static VNET_DEFINE(struct pf_frag_tree,	pf_cache_tree);
 #define	V_pf_cache_tree			VNET(pf_cache_tree)
 static int		 pf_frag_compare(struct pf_fragment *,
 			    struct pf_fragment *);
 static RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
 static RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
 
 static void	pf_flush_fragments(void);
 static void	pf_free_fragment(struct pf_fragment *);
 static void	pf_remove_fragment(struct pf_fragment *);
 static int	pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
 		    struct tcphdr *, int, sa_family_t);
 static struct pf_frent *pf_create_fragment(u_short *);
 static struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *key,
 		    struct pf_frag_tree *tree);
 static struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *,
 		    struct pf_frent *, u_short *);
 static int	pf_isfull_fragment(struct pf_fragment *);
 static struct mbuf *pf_join_fragment(struct pf_fragment *);
 #ifdef INET
 static void	pf_scrub_ip(struct mbuf **, uint32_t, uint8_t, uint8_t);
 static int	pf_reassemble(struct mbuf **, struct ip *, int, u_short *);
 static struct mbuf *pf_fragcache(struct mbuf **, struct ip*,
 		    struct pf_fragment **, int, int, int *);
 #endif	/* INET */
 #ifdef INET6
 static int	pf_reassemble6(struct mbuf **, struct ip6_hdr *,
 		    struct ip6_frag *, uint16_t, uint16_t, int, u_short *);
 static void	pf_scrub_ip6(struct mbuf **, uint8_t);
 #endif	/* INET6 */
 
 #define	DPFPRINTF(x) do {				\
 	if (V_pf_status.debug >= PF_DEBUG_MISC) {	\
 		printf("%s: ", __func__);		\
 		printf x ;				\
 	}						\
 } while(0)
 
 #ifdef INET
 static void
 pf_ip2key(struct ip *ip, int dir, struct pf_fragment_cmp *key)
 {
 
 	key->frc_src.v4 = ip->ip_src;
 	key->frc_dst.v4 = ip->ip_dst;
 	key->frc_af = AF_INET;
 	key->frc_proto = ip->ip_p;
 	key->frc_id = ip->ip_id;
 	key->frc_direction = dir;
 }
 #endif	/* INET */
 
 void
 pf_normalize_init(void)
 {
 
 	V_pf_frag_z = uma_zcreate("pf frags", sizeof(struct pf_fragment),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	V_pf_frent_z = uma_zcreate("pf frag entries", sizeof(struct pf_frent),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	V_pf_state_scrub_z = uma_zcreate("pf state scrubs",
 	    sizeof(struct pf_state_scrub),  NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 
 	V_pf_limits[PF_LIMIT_FRAGS].zone = V_pf_frent_z;
 	V_pf_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT;
 	uma_zone_set_max(V_pf_frent_z, PFFRAG_FRENT_HIWAT);
 	uma_zone_set_warning(V_pf_frent_z, "PF frag entries limit reached");
 
 	mtx_init(&pf_frag_mtx, "pf fragments", NULL, MTX_DEF);
 
 	TAILQ_INIT(&V_pf_fragqueue);
 	TAILQ_INIT(&V_pf_cachequeue);
 }
 
 void
 pf_normalize_cleanup(void)
 {
 
 	uma_zdestroy(V_pf_state_scrub_z);
 	uma_zdestroy(V_pf_frent_z);
 	uma_zdestroy(V_pf_frag_z);
 
 	mtx_destroy(&pf_frag_mtx);
 }
 
 static int
 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
 {
 	int	diff;
 
 	if ((diff = a->fr_id - b->fr_id) != 0)
 		return (diff);
 	if ((diff = a->fr_proto - b->fr_proto) != 0)
 		return (diff);
 	if ((diff = a->fr_af - b->fr_af) != 0)
 		return (diff);
 	if ((diff = pf_addr_cmp(&a->fr_src, &b->fr_src, a->fr_af)) != 0)
 		return (diff);
 	if ((diff = pf_addr_cmp(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0)
 		return (diff);
 	return (0);
 }
 
 void
 pf_purge_expired_fragments(void)
 {
 	struct pf_fragment	*frag;
 	u_int32_t		 expire = time_uptime -
 				    V_pf_default_rule.timeout[PFTM_FRAG];
 
 	PF_FRAG_LOCK();
 	while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) {
 		KASSERT((BUFFER_FRAGMENTS(frag)),
 		    ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__));
 		if (frag->fr_timeout > expire)
 			break;
 
 		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
 		pf_free_fragment(frag);
 	}
 
 	while ((frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue)) != NULL) {
 		KASSERT((!BUFFER_FRAGMENTS(frag)),
 		    ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__));
 		if (frag->fr_timeout > expire)
 			break;
 
 		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
 		pf_free_fragment(frag);
 		KASSERT((TAILQ_EMPTY(&V_pf_cachequeue) ||
 		    TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue) != frag),
 		    ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s",
 		    __FUNCTION__));
 	}
 	PF_FRAG_UNLOCK();
 }
 
 /*
  * Try to flush old fragments to make space for new ones
  */
 static void
 pf_flush_fragments(void)
 {
 	struct pf_fragment	*frag, *cache;
 	int			 goal;
 
 	PF_FRAG_ASSERT();
 
 	goal = uma_zone_get_cur(V_pf_frent_z) * 9 / 10;
 	DPFPRINTF(("trying to free %d frag entriess\n", goal));
 	while (goal < uma_zone_get_cur(V_pf_frent_z)) {
 		frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue);
 		if (frag)
 			pf_free_fragment(frag);
 		cache = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue);
 		if (cache)
 			pf_free_fragment(cache);
 		if (frag == NULL && cache == NULL)
 			break;
 	}
 }
 
 /* Frees the fragments and all associated entries */
 static void
 pf_free_fragment(struct pf_fragment *frag)
 {
 	struct pf_frent		*frent;
 
 	PF_FRAG_ASSERT();
 
 	/* Free all fragments */
 	if (BUFFER_FRAGMENTS(frag)) {
 		for (frent = TAILQ_FIRST(&frag->fr_queue); frent;
 		    frent = TAILQ_FIRST(&frag->fr_queue)) {
 			TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
 
 			m_freem(frent->fe_m);
 			uma_zfree(V_pf_frent_z, frent);
 		}
 	} else {
 		for (frent = TAILQ_FIRST(&frag->fr_queue); frent;
 		    frent = TAILQ_FIRST(&frag->fr_queue)) {
 			TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
 
 			KASSERT((TAILQ_EMPTY(&frag->fr_queue) ||
 			    TAILQ_FIRST(&frag->fr_queue)->fe_off >
 			    frent->fe_len),
 			    ("! (TAILQ_EMPTY() || TAILQ_FIRST()->fe_off >"
 			    " frent->fe_len): %s", __func__));
 
 			uma_zfree(V_pf_frent_z, frent);
 		}
 	}
 
 	pf_remove_fragment(frag);
 }
 
 static struct pf_fragment *
 pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree)
 {
 	struct pf_fragment	*frag;
 
 	PF_FRAG_ASSERT();
 
 	frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key);
 	if (frag != NULL) {
 		/* XXX Are we sure we want to update the timeout? */
 		frag->fr_timeout = time_uptime;
 		if (BUFFER_FRAGMENTS(frag)) {
 			TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
 			TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
 		} else {
 			TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
 			TAILQ_INSERT_HEAD(&V_pf_cachequeue, frag, frag_next);
 		}
 	}
 
 	return (frag);
 }
 
 /* Removes a fragment from the fragment queue and frees the fragment */
 static void
 pf_remove_fragment(struct pf_fragment *frag)
 {
 
 	PF_FRAG_ASSERT();
 
 	if (BUFFER_FRAGMENTS(frag)) {
 		RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag);
 		TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
 		uma_zfree(V_pf_frag_z, frag);
 	} else {
 		RB_REMOVE(pf_frag_tree, &V_pf_cache_tree, frag);
 		TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
 		uma_zfree(V_pf_frag_z, frag);
 	}
 }
 
 static struct pf_frent *
 pf_create_fragment(u_short *reason)
 {
 	struct pf_frent *frent;
 
 	PF_FRAG_ASSERT();
 
 	frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 	if (frent == NULL) {
 		pf_flush_fragments();
 		frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 		if (frent == NULL) {
 			REASON_SET(reason, PFRES_MEMORY);
 			return (NULL);
 		}
 	}
 
 	return (frent);
 }
 
 static struct pf_fragment *
 pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
 		u_short *reason)
 {
 	struct pf_frent		*after, *next, *prev;
 	struct pf_fragment	*frag;
 	uint16_t		total;
 
 	PF_FRAG_ASSERT();
 
 	/* No empty fragments. */
 	if (frent->fe_len == 0) {
 		DPFPRINTF(("bad fragment: len 0"));
 		goto bad_fragment;
 	}
 
 	/* All fragments are 8 byte aligned. */
 	if (frent->fe_mff && (frent->fe_len & 0x7)) {
 		DPFPRINTF(("bad fragment: mff and len %d", frent->fe_len));
 		goto bad_fragment;
 	}
 
 	/* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET. */
 	if (frent->fe_off + frent->fe_len > IP_MAXPACKET) {
 		DPFPRINTF(("bad fragment: max packet %d",
 		    frent->fe_off + frent->fe_len));
 		goto bad_fragment;
 	}
 
 	DPFPRINTF((key->frc_af == AF_INET ?
 	    "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d",
 	    key->frc_id, frent->fe_off, frent->fe_off + frent->fe_len));
 
 	/* Fully buffer all of the fragments in this fragment queue. */
 	frag = pf_find_fragment(key, &V_pf_frag_tree);
 
 	/* Create a new reassembly queue for this packet. */
 	if (frag == NULL) {
 		frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
 		if (frag == NULL) {
 			pf_flush_fragments();
 			frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
 			if (frag == NULL) {
 				REASON_SET(reason, PFRES_MEMORY);
 				goto drop_fragment;
 			}
 		}
 
 		*(struct pf_fragment_cmp *)frag = *key;
 		frag->fr_timeout = time_second;
 		frag->fr_maxlen = frent->fe_len;
 		TAILQ_INIT(&frag->fr_queue);
 
 		RB_INSERT(pf_frag_tree, &V_pf_frag_tree, frag);
 		TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
 
 		/* We do not have a previous fragment. */
 		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
 
 		return (frag);
 	}
 
 	KASSERT(!TAILQ_EMPTY(&frag->fr_queue), ("!TAILQ_EMPTY()->fr_queue"));
 
 	/* Remember maximum fragment len for refragmentation. */
 	if (frent->fe_len > frag->fr_maxlen)
 		frag->fr_maxlen = frent->fe_len;
 
 	/* Maximum data we have seen already. */
 	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
 		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
 
 	/* Non terminal fragments must have more fragments flag. */
 	if (frent->fe_off + frent->fe_len < total && !frent->fe_mff)
 		goto bad_fragment;
 
 	/* Check if we saw the last fragment already. */
 	if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) {
 		if (frent->fe_off + frent->fe_len > total ||
 		    (frent->fe_off + frent->fe_len == total && frent->fe_mff))
 			goto bad_fragment;
 	} else {
 		if (frent->fe_off + frent->fe_len == total && !frent->fe_mff)
 			goto bad_fragment;
 	}
 
 	/* Find a fragment after the current one. */
 	prev = NULL;
 	TAILQ_FOREACH(after, &frag->fr_queue, fr_next) {
 		if (after->fe_off > frent->fe_off)
 			break;
 		prev = after;
 	}
 
 	KASSERT(prev != NULL || after != NULL,
 	    ("prev != NULL || after != NULL"));
 
 	if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) {
 		uint16_t precut;
 
 		precut = prev->fe_off + prev->fe_len - frent->fe_off;
 		if (precut >= frent->fe_len)
 			goto bad_fragment;
 		DPFPRINTF(("overlap -%d", precut));
 		m_adj(frent->fe_m, precut);
 		frent->fe_off += precut;
 		frent->fe_len -= precut;
 	}
 
 	for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off;
 	    after = next) {
 		uint16_t aftercut;
 
 		aftercut = frent->fe_off + frent->fe_len - after->fe_off;
 		DPFPRINTF(("adjust overlap %d", aftercut));
 		if (aftercut < after->fe_len) {
 			m_adj(after->fe_m, aftercut);
 			after->fe_off += aftercut;
 			after->fe_len -= aftercut;
 			break;
 		}
 
 		/* This fragment is completely overlapped, lose it. */
 		next = TAILQ_NEXT(after, fr_next);
 		m_freem(after->fe_m);
 		TAILQ_REMOVE(&frag->fr_queue, after, fr_next);
 		uma_zfree(V_pf_frent_z, after);
 	}
 
 	if (prev == NULL)
 		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
 	else
 		TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
 
 	return (frag);
 
 bad_fragment:
 	REASON_SET(reason, PFRES_FRAG);
 drop_fragment:
 	uma_zfree(V_pf_frent_z, frent);
 	return (NULL);
 }
 
 static int
 pf_isfull_fragment(struct pf_fragment *frag)
 {
 	struct pf_frent	*frent, *next;
 	uint16_t off, total;
 
 	/* Check if we are completely reassembled */
 	if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff)
 		return (0);
 
 	/* Maximum data we have seen already */
 	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
 		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
 
 	/* Check if we have all the data */
 	off = 0;
 	for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) {
 		next = TAILQ_NEXT(frent, fr_next);
 
 		off += frent->fe_len;
 		if (off < total && (next == NULL || next->fe_off != off)) {
 			DPFPRINTF(("missing fragment at %d, next %d, total %d",
 			    off, next == NULL ? -1 : next->fe_off, total));
 			return (0);
 		}
 	}
 	DPFPRINTF(("%d < %d?", off, total));
 	if (off < total)
 		return (0);
 	KASSERT(off == total, ("off == total"));
 
 	return (1);
 }
 
 static struct mbuf *
 pf_join_fragment(struct pf_fragment *frag)
 {
 	struct mbuf *m, *m2;
 	struct pf_frent	*frent, *next;
 
 	frent = TAILQ_FIRST(&frag->fr_queue);
 	next = TAILQ_NEXT(frent, fr_next);
 
 	m = frent->fe_m;
 	m_adj(m, (frent->fe_hdrlen + frent->fe_len) - m->m_pkthdr.len);
 	uma_zfree(V_pf_frent_z, frent);
 	for (frent = next; frent != NULL; frent = next) {
 		next = TAILQ_NEXT(frent, fr_next);
 
 		m2 = frent->fe_m;
 		/* Strip off ip header. */
 		m_adj(m2, frent->fe_hdrlen);
 		/* Strip off any trailing bytes. */
 		m_adj(m2, frent->fe_len - m2->m_pkthdr.len);
 
 		uma_zfree(V_pf_frent_z, frent);
 		m_cat(m, m2);
 	}
 
 	/* Remove from fragment queue. */
 	pf_remove_fragment(frag);
 
 	return (m);
 }
 
 #ifdef INET
 static int
 pf_reassemble(struct mbuf **m0, struct ip *ip, int dir, u_short *reason)
 {
 	struct mbuf		*m = *m0;
 	struct pf_frent		*frent;
 	struct pf_fragment	*frag;
 	struct pf_fragment_cmp	key;
 	uint16_t		total, hdrlen;
 
 	/* Get an entry for the fragment queue */
 	if ((frent = pf_create_fragment(reason)) == NULL)
 		return (PF_DROP);
 
 	frent->fe_m = m;
 	frent->fe_hdrlen = ip->ip_hl << 2;
 	frent->fe_extoff = 0;
 	frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
 	frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
 	frent->fe_mff = ntohs(ip->ip_off) & IP_MF;
 
 	pf_ip2key(ip, dir, &key);
 
 	if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL)
 		return (PF_DROP);
 
 	/* The mbuf is part of the fragment entry, no direct free or access */
 	m = *m0 = NULL;
 
 	if (!pf_isfull_fragment(frag))
 		return (PF_PASS);  /* drop because *m0 is NULL, no error */
 
 	/* We have all the data */
 	frent = TAILQ_FIRST(&frag->fr_queue);
 	KASSERT(frent != NULL, ("frent != NULL"));
 	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
 		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
 	hdrlen = frent->fe_hdrlen;
 
 	m = *m0 = pf_join_fragment(frag);
 	frag = NULL;
 
 	if (m->m_flags & M_PKTHDR) {
 		int plen = 0;
 		for (m = *m0; m; m = m->m_next)
 			plen += m->m_len;
 		m = *m0;
 		m->m_pkthdr.len = plen;
 	}
 
 	ip = mtod(m, struct ip *);
 	ip->ip_len = htons(hdrlen + total);
 	ip->ip_off &= ~(IP_MF|IP_OFFMASK);
 
 	if (hdrlen + total > IP_MAXPACKET) {
 		DPFPRINTF(("drop: too big: %d", total));
 		ip->ip_len = 0;
 		REASON_SET(reason, PFRES_SHORT);
 		/* PF_DROP requires a valid mbuf *m0 in pf_test() */
 		return (PF_DROP);
 	}
 
 	DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
 	return (PF_PASS);
 }
 #endif	/* INET */
 
 #ifdef INET6
 static int
 pf_reassemble6(struct mbuf **m0, struct ip6_hdr *ip6, struct ip6_frag *fraghdr,
     uint16_t hdrlen, uint16_t extoff, int dir, u_short *reason)
 {
 	struct mbuf		*m = *m0;
 	struct pf_frent		*frent;
 	struct pf_fragment	*frag;
 	struct pf_fragment_cmp	 key;
 	struct m_tag		*mtag;
 	struct pf_fragment_tag	*ftag;
 	int			 off;
 	uint32_t		 frag_id;
 	uint16_t		 total, maxlen;
 	uint8_t			 proto;
 
 	PF_FRAG_LOCK();
 
 	/* Get an entry for the fragment queue. */
 	if ((frent = pf_create_fragment(reason)) == NULL) {
 		PF_FRAG_UNLOCK();
 		return (PF_DROP);
 	}
 
 	frent->fe_m = m;
 	frent->fe_hdrlen = hdrlen;
 	frent->fe_extoff = extoff;
 	frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen;
 	frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK);
 	frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG;
 
 	key.frc_src.v6 = ip6->ip6_src;
 	key.frc_dst.v6 = ip6->ip6_dst;
 	key.frc_af = AF_INET6;
 	/* Only the first fragment's protocol is relevant. */
 	key.frc_proto = 0;
 	key.frc_id = fraghdr->ip6f_ident;
 	key.frc_direction = dir;
 
 	if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) {
 		PF_FRAG_UNLOCK();
 		return (PF_DROP);
 	}
 
 	/* The mbuf is part of the fragment entry, no direct free or access. */
 	m = *m0 = NULL;
 
 	if (!pf_isfull_fragment(frag)) {
 		PF_FRAG_UNLOCK();
 		return (PF_PASS);  /* Drop because *m0 is NULL, no error. */
 	}
 
 	/* We have all the data. */
 	extoff = frent->fe_extoff;
 	maxlen = frag->fr_maxlen;
 	frag_id = frag->fr_id;
 	frent = TAILQ_FIRST(&frag->fr_queue);
 	KASSERT(frent != NULL, ("frent != NULL"));
 	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
 		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
 	hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag);
 
 	m = *m0 = pf_join_fragment(frag);
 	frag = NULL;
 
 	PF_FRAG_UNLOCK();
 
 	/* Take protocol from first fragment header. */
 	m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), &off);
 	KASSERT(m, ("%s: short mbuf chain", __func__));
 	proto = *(mtod(m, caddr_t) + off);
 	m = *m0;
 
 	/* Delete frag6 header */
 	if (ip6_deletefraghdr(m, hdrlen, M_NOWAIT) != 0)
 		goto fail;
 
 	if (m->m_flags & M_PKTHDR) {
 		int plen = 0;
 		for (m = *m0; m; m = m->m_next)
 			plen += m->m_len;
 		m = *m0;
 		m->m_pkthdr.len = plen;
 	}
 
 	if ((mtag = m_tag_get(PF_REASSEMBLED, sizeof(struct pf_fragment_tag),
 	    M_NOWAIT)) == NULL)
 		goto fail;
 	ftag = (struct pf_fragment_tag *)(mtag + 1);
 	ftag->ft_hdrlen = hdrlen;
 	ftag->ft_extoff = extoff;
 	ftag->ft_maxlen = maxlen;
 	ftag->ft_id = frag_id;
 	m_tag_prepend(m, mtag);
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total);
 	if (extoff) {
 		/* Write protocol into next field of last extension header. */
 		m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt),
 		    &off);
 		KASSERT(m, ("%s: short mbuf chain", __func__));
 		*(mtod(m, char *) + off) = proto;
 		m = *m0;
 	} else
 		ip6->ip6_nxt = proto;
 
 	if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) {
 		DPFPRINTF(("drop: too big: %d", total));
 		ip6->ip6_plen = 0;
 		REASON_SET(reason, PFRES_SHORT);
 		/* PF_DROP requires a valid mbuf *m0 in pf_test6(). */
 		return (PF_DROP);
 	}
 
 	DPFPRINTF(("complete: %p(%d)", m, ntohs(ip6->ip6_plen)));
 	return (PF_PASS);
 
 fail:
 	REASON_SET(reason, PFRES_MEMORY);
 	/* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later. */
 	return (PF_DROP);
 }
 #endif	/* INET6 */
 
 #ifdef INET
 static struct mbuf *
 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
     int drop, int *nomem)
 {
 	struct mbuf		*m = *m0;
 	struct pf_frent		*frp, *fra, *cur = NULL;
 	int			 ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
 	u_int16_t		 off = ntohs(h->ip_off) << 3;
 	u_int16_t		 max = ip_len + off;
 	int			 hosed = 0;
 
 	PF_FRAG_ASSERT();
 	KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)),
 	    ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
 
 	/* Create a new range queue for this packet */
 	if (*frag == NULL) {
 		*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
 		if (*frag == NULL) {
 			pf_flush_fragments();
 			*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
 			if (*frag == NULL)
 				goto no_mem;
 		}
 
 		/* Get an entry for the queue */
 		cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 		if (cur == NULL) {
 			uma_zfree(V_pf_frag_z, *frag);
 			*frag = NULL;
 			goto no_mem;
 		}
 
 		(*frag)->fr_flags = PFFRAG_NOBUFFER;
 		(*frag)->fr_max = 0;
 		(*frag)->fr_src.v4 = h->ip_src;
 		(*frag)->fr_dst.v4 = h->ip_dst;
 		(*frag)->fr_id = h->ip_id;
 		(*frag)->fr_timeout = time_uptime;
 
 		cur->fe_off = off;
 		cur->fe_len = max; /* TODO: fe_len = max - off ? */
 		TAILQ_INIT(&(*frag)->fr_queue);
 		TAILQ_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next);
 
 		RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag);
 		TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next);
 
 		DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
 
 		goto pass;
 	}
 
 	/*
 	 * Find a fragment after the current one:
 	 *  - off contains the real shifted offset.
 	 */
 	frp = NULL;
 	TAILQ_FOREACH(fra, &(*frag)->fr_queue, fr_next) {
 		if (fra->fe_off > off)
 			break;
 		frp = fra;
 	}
 
 	KASSERT((frp != NULL || fra != NULL),
 	    ("!(frp != NULL || fra != NULL): %s", __FUNCTION__));
 
 	if (frp != NULL) {
 		int	precut;
 
 		precut = frp->fe_len - off;
 		if (precut >= ip_len) {
 			/* Fragment is entirely a duplicate */
 			DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
 			    h->ip_id, frp->fe_off, frp->fe_len, off, max));
 			goto drop_fragment;
 		}
 		if (precut == 0) {
 			/* They are adjacent.  Fixup cache entry */
 			DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
 			    h->ip_id, frp->fe_off, frp->fe_len, off, max));
 			frp->fe_len = max;
 		} else if (precut > 0) {
 			/* The first part of this payload overlaps with a
 			 * fragment that has already been passed.
 			 * Need to trim off the first part of the payload.
 			 * But to do so easily, we need to create another
 			 * mbuf to throw the original header into.
 			 */
 
 			DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
 			    h->ip_id, precut, frp->fe_off, frp->fe_len, off,
 			    max));
 
 			off += precut;
 			max -= precut;
 			/* Update the previous frag to encompass this one */
 			frp->fe_len = max;
 
 			if (!drop) {
 				/* XXX Optimization opportunity
 				 * This is a very heavy way to trim the payload.
 				 * we could do it much faster by diddling mbuf
 				 * internals but that would be even less legible
 				 * than this mbuf magic.  For my next trick,
 				 * I'll pull a rabbit out of my laptop.
 				 */
 				*m0 = m_dup(m, M_NOWAIT);
 				if (*m0 == NULL)
 					goto no_mem;
 				/* From KAME Project : We have missed this! */
 				m_adj(*m0, (h->ip_hl << 2) -
 				    (*m0)->m_pkthdr.len);
 
 				KASSERT(((*m0)->m_next == NULL),
 				    ("(*m0)->m_next != NULL: %s",
 				    __FUNCTION__));
 				m_adj(m, precut + (h->ip_hl << 2));
 				m_cat(*m0, m);
 				m = *m0;
 				if (m->m_flags & M_PKTHDR) {
 					int plen = 0;
 					struct mbuf *t;
 					for (t = m; t; t = t->m_next)
 						plen += t->m_len;
 					m->m_pkthdr.len = plen;
 				}
 
 
 				h = mtod(m, struct ip *);
 
 				KASSERT(((int)m->m_len ==
 				    ntohs(h->ip_len) - precut),
 				    ("m->m_len != ntohs(h->ip_len) - precut: %s",
 				    __FUNCTION__));
 				h->ip_off = htons(ntohs(h->ip_off) +
 				    (precut >> 3));
 				h->ip_len = htons(ntohs(h->ip_len) - precut);
 			} else {
 				hosed++;
 			}
 		} else {
 			/* There is a gap between fragments */
 
 			DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
 			    h->ip_id, -precut, frp->fe_off, frp->fe_len, off,
 			    max));
 
 			cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 			if (cur == NULL)
 				goto no_mem;
 
 			cur->fe_off = off;
 			cur->fe_len = max;
 			TAILQ_INSERT_AFTER(&(*frag)->fr_queue, frp, cur, fr_next);
 		}
 	}
 
 	if (fra != NULL) {
 		int	aftercut;
 		int	merge = 0;
 
 		aftercut = max - fra->fe_off;
 		if (aftercut == 0) {
 			/* Adjacent fragments */
 			DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
 			    h->ip_id, off, max, fra->fe_off, fra->fe_len));
 			fra->fe_off = off;
 			merge = 1;
 		} else if (aftercut > 0) {
 			/* Need to chop off the tail of this fragment */
 			DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
 			    h->ip_id, aftercut, off, max, fra->fe_off,
 			    fra->fe_len));
 			fra->fe_off = off;
 			max -= aftercut;
 
 			merge = 1;
 
 			if (!drop) {
 				m_adj(m, -aftercut);
 				if (m->m_flags & M_PKTHDR) {
 					int plen = 0;
 					struct mbuf *t;
 					for (t = m; t; t = t->m_next)
 						plen += t->m_len;
 					m->m_pkthdr.len = plen;
 				}
 				h = mtod(m, struct ip *);
 				KASSERT(((int)m->m_len == ntohs(h->ip_len) - aftercut),
 				    ("m->m_len != ntohs(h->ip_len) - aftercut: %s",
 				    __FUNCTION__));
 				h->ip_len = htons(ntohs(h->ip_len) - aftercut);
 			} else {
 				hosed++;
 			}
 		} else if (frp == NULL) {
 			/* There is a gap between fragments */
 			DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
 			    h->ip_id, -aftercut, off, max, fra->fe_off,
 			    fra->fe_len));
 
 			cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 			if (cur == NULL)
 				goto no_mem;
 
 			cur->fe_off = off;
 			cur->fe_len = max;
 			TAILQ_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next);
 		}
 
 
 		/* Need to glue together two separate fragment descriptors */
 		if (merge) {
 			if (cur && fra->fe_off <= cur->fe_len) {
 				/* Need to merge in a previous 'cur' */
 				DPFPRINTF(("fragcache[%d]: adjacent(merge "
 				    "%d-%d) %d-%d (%d-%d)\n",
 				    h->ip_id, cur->fe_off, cur->fe_len, off,
 				    max, fra->fe_off, fra->fe_len));
 				fra->fe_off = cur->fe_off;
 				TAILQ_REMOVE(&(*frag)->fr_queue, cur, fr_next);
 				uma_zfree(V_pf_frent_z, cur);
 				cur = NULL;
 
 			} else if (frp && fra->fe_off <= frp->fe_len) {
 				/* Need to merge in a modified 'frp' */
 				KASSERT((cur == NULL), ("cur != NULL: %s",
 				    __FUNCTION__));
 				DPFPRINTF(("fragcache[%d]: adjacent(merge "
 				    "%d-%d) %d-%d (%d-%d)\n",
 				    h->ip_id, frp->fe_off, frp->fe_len, off,
 				    max, fra->fe_off, fra->fe_len));
 				fra->fe_off = frp->fe_off;
 				TAILQ_REMOVE(&(*frag)->fr_queue, frp, fr_next);
 				uma_zfree(V_pf_frent_z, frp);
 				frp = NULL;
 
 			}
 		}
 	}
 
 	if (hosed) {
 		/*
 		 * We must keep tracking the overall fragment even when
 		 * we're going to drop it anyway so that we know when to
 		 * free the overall descriptor.  Thus we drop the frag late.
 		 */
 		goto drop_fragment;
 	}
 
 
  pass:
 	/* Update maximum data size */
 	if ((*frag)->fr_max < max)
 		(*frag)->fr_max = max;
 
 	/* This is the last segment */
 	if (!mff)
 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
 
 	/* Check if we are completely reassembled */
 	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
 	    TAILQ_FIRST(&(*frag)->fr_queue)->fe_off == 0 &&
 	    TAILQ_FIRST(&(*frag)->fr_queue)->fe_len == (*frag)->fr_max) {
 		/* Remove from fragment queue */
 		DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
 		    (*frag)->fr_max));
 		pf_free_fragment(*frag);
 		*frag = NULL;
 	}
 
 	return (m);
 
  no_mem:
 	*nomem = 1;
 
 	/* Still need to pay attention to !IP_MF */
 	if (!mff && *frag != NULL)
 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
 
 	m_freem(m);
 	return (NULL);
 
  drop_fragment:
 
 	/* Still need to pay attention to !IP_MF */
 	if (!mff && *frag != NULL)
 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
 
 	if (drop) {
 		/* This fragment has been deemed bad.  Don't reass */
 		if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
 			DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
 			    h->ip_id));
 		(*frag)->fr_flags |= PFFRAG_DROP;
 	}
 
 	m_freem(m);
 	return (NULL);
 }
 #endif	/* INET */
 
 #ifdef INET6
 int
 pf_refragment6(struct ifnet *ifp, struct mbuf **m0, struct m_tag *mtag)
 {
 	struct mbuf		*m = *m0, *t;
 	struct pf_fragment_tag	*ftag = (struct pf_fragment_tag *)(mtag + 1);
 	struct pf_pdesc		 pd;
 	uint32_t		 frag_id;
 	uint16_t		 hdrlen, extoff, maxlen;
 	uint8_t			 proto;
 	int			 error, action;
 
 	hdrlen = ftag->ft_hdrlen;
 	extoff = ftag->ft_extoff;
 	maxlen = ftag->ft_maxlen;
 	frag_id = ftag->ft_id;
 	m_tag_delete(m, mtag);
 	mtag = NULL;
 	ftag = NULL;
 
 	if (extoff) {
 		int off;
 
 		/* Use protocol from next field of last extension header */
 		m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt),
 		    &off);
 		KASSERT((m != NULL), ("pf_refragment6: short mbuf chain"));
 		proto = *(mtod(m, caddr_t) + off);
 		*(mtod(m, char *) + off) = IPPROTO_FRAGMENT;
 		m = *m0;
 	} else {
 		struct ip6_hdr *hdr;
 
 		hdr = mtod(m, struct ip6_hdr *);
 		proto = hdr->ip6_nxt;
 		hdr->ip6_nxt = IPPROTO_FRAGMENT;
 	}
 
 	/*
 	 * Maxlen may be less than 8 if there was only a single
 	 * fragment.  As it was fragmented before, add a fragment
 	 * header also for a single fragment.  If total or maxlen
 	 * is less than 8, ip6_fragment() will return EMSGSIZE and
 	 * we drop the packet.
 	 */
 	error = ip6_fragment(ifp, m, hdrlen, proto, maxlen, frag_id);
 	m = (*m0)->m_nextpkt;
 	(*m0)->m_nextpkt = NULL;
 	if (error == 0) {
 		/* The first mbuf contains the unfragmented packet. */
 		m_freem(*m0);
 		*m0 = NULL;
 		action = PF_PASS;
 	} else {
 		/* Drop expects an mbuf to free. */
 		DPFPRINTF(("refragment error %d", error));
 		action = PF_DROP;
 	}
 	for (t = m; m; m = t) {
 		t = m->m_nextpkt;
 		m->m_nextpkt = NULL;
+		m->m_flags |= M_SKIP_FIREWALL;
 		memset(&pd, 0, sizeof(pd));
 		pd.pf_mtag = pf_find_mtag(m);
 		if (error == 0)
 			ip6_forward(m, 0);
 		else
 			m_freem(m);
 	}
 
 	return (action);
 }
 #endif /* INET6 */
 
 #ifdef INET
 int
 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
     struct pf_pdesc *pd)
 {
 	struct mbuf		*m = *m0;
 	struct pf_rule		*r;
 	struct pf_fragment	*frag = NULL;
 	struct pf_fragment_cmp	key;
 	struct ip		*h = mtod(m, struct ip *);
 	int			 mff = (ntohs(h->ip_off) & IP_MF);
 	int			 hlen = h->ip_hl << 2;
 	u_int16_t		 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
 	u_int16_t		 max;
 	int			 ip_len;
 	int			 ip_off;
 	int			 tag = -1;
 	int			 verdict;
 
 	PF_RULES_RASSERT();
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != dir)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != AF_INET)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != h->ip_p)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr,
 		    (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
 		    r->src.neg, kif, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr,
 		    (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
 		    r->dst.neg, NULL, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (r->match_tag && !pf_match_tag(m, r, &tag,
 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
 			r = TAILQ_NEXT(r, entries);
 		else
 			break;
 	}
 
 	if (r == NULL || r->action == PF_NOSCRUB)
 		return (PF_PASS);
 	else {
 		r->packets[dir == PF_OUT]++;
 		r->bytes[dir == PF_OUT] += pd->tot_len;
 	}
 
 	/* Check for illegal packets */
 	if (hlen < (int)sizeof(struct ip))
 		goto drop;
 
 	if (hlen > ntohs(h->ip_len))
 		goto drop;
 
 	/* Clear IP_DF if the rule uses the no-df option */
 	if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
 		u_int16_t ip_off = h->ip_off;
 
 		h->ip_off &= htons(~IP_DF);
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
 	}
 
 	/* We will need other tests here */
 	if (!fragoff && !mff)
 		goto no_fragment;
 
 	/* We're dealing with a fragment now. Don't allow fragments
 	 * with IP_DF to enter the cache. If the flag was cleared by
 	 * no-df above, fine. Otherwise drop it.
 	 */
 	if (h->ip_off & htons(IP_DF)) {
 		DPFPRINTF(("IP_DF\n"));
 		goto bad;
 	}
 
 	ip_len = ntohs(h->ip_len) - hlen;
 	ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
 
 	/* All fragments are 8 byte aligned */
 	if (mff && (ip_len & 0x7)) {
 		DPFPRINTF(("mff and %d\n", ip_len));
 		goto bad;
 	}
 
 	/* Respect maximum length */
 	if (fragoff + ip_len > IP_MAXPACKET) {
 		DPFPRINTF(("max packet %d\n", fragoff + ip_len));
 		goto bad;
 	}
 	max = fragoff + ip_len;
 
 	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
 
 		/* Fully buffer all of the fragments */
 		PF_FRAG_LOCK();
 
 		pf_ip2key(h, dir, &key);
 		frag = pf_find_fragment(&key, &V_pf_frag_tree);
 
 		/* Check if we saw the last fragment already */
 		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
 		    max > frag->fr_max)
 			goto bad;
 
 		/* Might return a completely reassembled mbuf, or NULL */
 		DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
 		verdict = pf_reassemble(m0, h, dir, reason);
 		PF_FRAG_UNLOCK();
 
 		if (verdict != PF_PASS)
 			return (PF_DROP);
 
 		m = *m0;
 		if (m == NULL)
 			return (PF_DROP);
 
 		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
 			goto drop;
 
 		h = mtod(m, struct ip *);
 	} else {
 		/* non-buffering fragment cache (drops or masks overlaps) */
 		int	nomem = 0;
 
 		if (dir == PF_OUT && pd->pf_mtag &&
 		    pd->pf_mtag->flags & PF_TAG_FRAGCACHE) {
 			/*
 			 * Already passed the fragment cache in the
 			 * input direction.  If we continued, it would
 			 * appear to be a dup and would be dropped.
 			 */
 			goto fragment_pass;
 		}
 
 		PF_FRAG_LOCK();
 		pf_ip2key(h, dir, &key);
 		frag = pf_find_fragment(&key, &V_pf_cache_tree);
 
 		/* Check if we saw the last fragment already */
 		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
 		    max > frag->fr_max) {
 			if (r->rule_flag & PFRULE_FRAGDROP)
 				frag->fr_flags |= PFFRAG_DROP;
 			goto bad;
 		}
 
 		*m0 = m = pf_fragcache(m0, h, &frag, mff,
 		    (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
 		PF_FRAG_UNLOCK();
 		if (m == NULL) {
 			if (nomem)
 				goto no_mem;
 			goto drop;
 		}
 
 		if (dir == PF_IN) {
 			/* Use mtag from copied and trimmed mbuf chain. */
 			pd->pf_mtag = pf_get_mtag(m);
 			if (pd->pf_mtag == NULL) {
 				m_freem(m);
 				*m0 = NULL;
 				goto no_mem;
 			}
 			pd->pf_mtag->flags |= PF_TAG_FRAGCACHE;
 		}
 
 		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
 			goto drop;
 		goto fragment_pass;
 	}
 
  no_fragment:
 	/* At this point, only IP_DF is allowed in ip_off */
 	if (h->ip_off & ~htons(IP_DF)) {
 		u_int16_t ip_off = h->ip_off;
 
 		h->ip_off &= htons(IP_DF);
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
 	}
 
 	/* not missing a return here */
 
  fragment_pass:
 	pf_scrub_ip(&m, r->rule_flag, r->min_ttl, r->set_tos);
 
 	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
 		pd->flags |= PFDESC_IP_REAS;
 	return (PF_PASS);
 
  no_mem:
 	REASON_SET(reason, PFRES_MEMORY);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 
  drop:
 	REASON_SET(reason, PFRES_NORM);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 
  bad:
 	DPFPRINTF(("dropping bad fragment\n"));
 
 	/* Free associated fragments */
 	if (frag != NULL) {
 		pf_free_fragment(frag);
 		PF_FRAG_UNLOCK();
 	}
 
 	REASON_SET(reason, PFRES_FRAG);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
 		    1);
 
 	return (PF_DROP);
 }
 #endif
 
 #ifdef INET6
 int
 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
     u_short *reason, struct pf_pdesc *pd)
 {
 	struct mbuf		*m = *m0;
 	struct pf_rule		*r;
 	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
 	int			 extoff;
 	int			 off;
 	struct ip6_ext		 ext;
 	struct ip6_opt		 opt;
 	struct ip6_opt_jumbo	 jumbo;
 	struct ip6_frag		 frag;
 	u_int32_t		 jumbolen = 0, plen;
 	int			 optend;
 	int			 ooff;
 	u_int8_t		 proto;
 	int			 terminal;
 
 	PF_RULES_RASSERT();
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != dir)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != AF_INET6)
 			r = r->skip[PF_SKIP_AF].ptr;
 #if 0 /* header chain! */
 		else if (r->proto && r->proto != h->ip6_nxt)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 #endif
 		else if (PF_MISMATCHAW(&r->src.addr,
 		    (struct pf_addr *)&h->ip6_src, AF_INET6,
 		    r->src.neg, kif, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr,
 		    (struct pf_addr *)&h->ip6_dst, AF_INET6,
 		    r->dst.neg, NULL, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else
 			break;
 	}
 
 	if (r == NULL || r->action == PF_NOSCRUB)
 		return (PF_PASS);
 	else {
 		r->packets[dir == PF_OUT]++;
 		r->bytes[dir == PF_OUT] += pd->tot_len;
 	}
 
 	/* Check for illegal packets */
 	if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
 		goto drop;
 
 	extoff = 0;
 	off = sizeof(struct ip6_hdr);
 	proto = h->ip6_nxt;
 	terminal = 0;
 	do {
 		switch (proto) {
 		case IPPROTO_FRAGMENT:
 			goto fragment;
 			break;
 		case IPPROTO_AH:
 		case IPPROTO_ROUTING:
 		case IPPROTO_DSTOPTS:
 			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
 			    NULL, AF_INET6))
 				goto shortpkt;
 			extoff = off;
 			if (proto == IPPROTO_AH)
 				off += (ext.ip6e_len + 2) * 4;
 			else
 				off += (ext.ip6e_len + 1) * 8;
 			proto = ext.ip6e_nxt;
 			break;
 		case IPPROTO_HOPOPTS:
 			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
 			    NULL, AF_INET6))
 				goto shortpkt;
 			extoff = off;
 			optend = off + (ext.ip6e_len + 1) * 8;
 			ooff = off + sizeof(ext);
 			do {
 				if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
 				    sizeof(opt.ip6o_type), NULL, NULL,
 				    AF_INET6))
 					goto shortpkt;
 				if (opt.ip6o_type == IP6OPT_PAD1) {
 					ooff++;
 					continue;
 				}
 				if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
 				    NULL, NULL, AF_INET6))
 					goto shortpkt;
 				if (ooff + sizeof(opt) + opt.ip6o_len > optend)
 					goto drop;
 				switch (opt.ip6o_type) {
 				case IP6OPT_JUMBO:
 					if (h->ip6_plen != 0)
 						goto drop;
 					if (!pf_pull_hdr(m, ooff, &jumbo,
 					    sizeof(jumbo), NULL, NULL,
 					    AF_INET6))
 						goto shortpkt;
 					memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
 					    sizeof(jumbolen));
 					jumbolen = ntohl(jumbolen);
 					if (jumbolen <= IPV6_MAXPACKET)
 						goto drop;
 					if (sizeof(struct ip6_hdr) + jumbolen !=
 					    m->m_pkthdr.len)
 						goto drop;
 					break;
 				default:
 					break;
 				}
 				ooff += sizeof(opt) + opt.ip6o_len;
 			} while (ooff < optend);
 
 			off = optend;
 			proto = ext.ip6e_nxt;
 			break;
 		default:
 			terminal = 1;
 			break;
 		}
 	} while (!terminal);
 
 	/* jumbo payload option must be present, or plen > 0 */
 	if (ntohs(h->ip6_plen) == 0)
 		plen = jumbolen;
 	else
 		plen = ntohs(h->ip6_plen);
 	if (plen == 0)
 		goto drop;
 	if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
 		goto shortpkt;
 
 	pf_scrub_ip6(&m, r->min_ttl);
 
 	return (PF_PASS);
 
  fragment:
 	/* Jumbo payload packets cannot be fragmented. */
 	plen = ntohs(h->ip6_plen);
 	if (plen == 0 || jumbolen)
 		goto drop;
 	if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
 		goto shortpkt;
 
 	if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
 		goto shortpkt;
 
 	/* Offset now points to data portion. */
 	off += sizeof(frag);
 
 	/* Returns PF_DROP or *m0 is NULL or completely reassembled mbuf. */
 	if (pf_reassemble6(m0, h, &frag, off, extoff, dir, reason) != PF_PASS)
 		return (PF_DROP);
 	m = *m0;
 	if (m == NULL)
 		return (PF_DROP);
 
 	pd->flags |= PFDESC_IP_REAS;
 	return (PF_PASS);
 
  shortpkt:
 	REASON_SET(reason, PFRES_SHORT);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 
  drop:
 	REASON_SET(reason, PFRES_NORM);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 }
 #endif /* INET6 */
 
 int
 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
     int off, void *h, struct pf_pdesc *pd)
 {
 	struct pf_rule	*r, *rm = NULL;
 	struct tcphdr	*th = pd->hdr.tcp;
 	int		 rewrite = 0;
 	u_short		 reason;
 	u_int8_t	 flags;
 	sa_family_t	 af = pd->af;
 
 	PF_RULES_RASSERT();
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != dir)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != pd->proto)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
 		    r->src.neg, kif, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (r->src.port_op && !pf_match_port(r->src.port_op,
 			    r->src.port[0], r->src.port[1], th->th_sport))
 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
 		    r->dst.neg, NULL, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
 			    r->dst.port[0], r->dst.port[1], th->th_dport))
 			r = r->skip[PF_SKIP_DST_PORT].ptr;
 		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
 			    pf_osfp_fingerprint(pd, m, off, th),
 			    r->os_fingerprint))
 			r = TAILQ_NEXT(r, entries);
 		else {
 			rm = r;
 			break;
 		}
 	}
 
 	if (rm == NULL || rm->action == PF_NOSCRUB)
 		return (PF_PASS);
 	else {
 		r->packets[dir == PF_OUT]++;
 		r->bytes[dir == PF_OUT] += pd->tot_len;
 	}
 
 	if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
 		pd->flags |= PFDESC_TCP_NORM;
 
 	flags = th->th_flags;
 	if (flags & TH_SYN) {
 		/* Illegal packet */
 		if (flags & TH_RST)
 			goto tcp_drop;
 
 		if (flags & TH_FIN)
 			flags &= ~TH_FIN;
 	} else {
 		/* Illegal packet */
 		if (!(flags & (TH_ACK|TH_RST)))
 			goto tcp_drop;
 	}
 
 	if (!(flags & TH_ACK)) {
 		/* These flags are only valid if ACK is set */
 		if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
 			goto tcp_drop;
 	}
 
 	/* Check for illegal header length */
 	if (th->th_off < (sizeof(struct tcphdr) >> 2))
 		goto tcp_drop;
 
 	/* If flags changed, or reserved data set, then adjust */
 	if (flags != th->th_flags || th->th_x2 != 0) {
 		u_int16_t	ov, nv;
 
 		ov = *(u_int16_t *)(&th->th_ack + 1);
 		th->th_flags = flags;
 		th->th_x2 = 0;
 		nv = *(u_int16_t *)(&th->th_ack + 1);
 
 		th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
 		rewrite = 1;
 	}
 
 	/* Remove urgent pointer, if TH_URG is not set */
 	if (!(flags & TH_URG) && th->th_urp) {
 		th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
 		th->th_urp = 0;
 		rewrite = 1;
 	}
 
 	/* Process options */
 	if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af))
 		rewrite = 1;
 
 	/* copy back packet headers if we sanitized */
 	if (rewrite)
 		m_copyback(m, off, sizeof(*th), (caddr_t)th);
 
 	return (PF_PASS);
 
  tcp_drop:
 	REASON_SET(&reason, PFRES_NORM);
 	if (rm != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET, dir, reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 }
 
 int
 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
     struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
 {
 	u_int32_t tsval, tsecr;
 	u_int8_t hdr[60];
 	u_int8_t *opt;
 
 	KASSERT((src->scrub == NULL),
 	    ("pf_normalize_tcp_init: src->scrub != NULL"));
 
 	src->scrub = uma_zalloc(V_pf_state_scrub_z, M_ZERO | M_NOWAIT);
 	if (src->scrub == NULL)
 		return (1);
 
 	switch (pd->af) {
 #ifdef INET
 	case AF_INET: {
 		struct ip *h = mtod(m, struct ip *);
 		src->scrub->pfss_ttl = h->ip_ttl;
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
 		src->scrub->pfss_ttl = h->ip6_hlim;
 		break;
 	}
 #endif /* INET6 */
 	}
 
 
 	/*
 	 * All normalizations below are only begun if we see the start of
 	 * the connections.  They must all set an enabled bit in pfss_flags
 	 */
 	if ((th->th_flags & TH_SYN) == 0)
 		return (0);
 
 
 	if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
 	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
 		/* Diddle with TCP options */
 		int hlen;
 		opt = hdr + sizeof(struct tcphdr);
 		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
 		while (hlen >= TCPOLEN_TIMESTAMP) {
 			switch (*opt) {
 			case TCPOPT_EOL:	/* FALLTHROUGH */
 			case TCPOPT_NOP:
 				opt++;
 				hlen--;
 				break;
 			case TCPOPT_TIMESTAMP:
 				if (opt[1] >= TCPOLEN_TIMESTAMP) {
 					src->scrub->pfss_flags |=
 					    PFSS_TIMESTAMP;
 					src->scrub->pfss_ts_mod =
 					    htonl(arc4random());
 
 					/* note PFSS_PAWS not set yet */
 					memcpy(&tsval, &opt[2],
 					    sizeof(u_int32_t));
 					memcpy(&tsecr, &opt[6],
 					    sizeof(u_int32_t));
 					src->scrub->pfss_tsval0 = ntohl(tsval);
 					src->scrub->pfss_tsval = ntohl(tsval);
 					src->scrub->pfss_tsecr = ntohl(tsecr);
 					getmicrouptime(&src->scrub->pfss_last);
 				}
 				/* FALLTHROUGH */
 			default:
 				hlen -= MAX(opt[1], 2);
 				opt += MAX(opt[1], 2);
 				break;
 			}
 		}
 	}
 
 	return (0);
 }
 
 void
 pf_normalize_tcp_cleanup(struct pf_state *state)
 {
 	if (state->src.scrub)
 		uma_zfree(V_pf_state_scrub_z, state->src.scrub);
 	if (state->dst.scrub)
 		uma_zfree(V_pf_state_scrub_z, state->dst.scrub);
 
 	/* Someday... flush the TCP segment reassembly descriptors. */
 }
 
 int
 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
     u_short *reason, struct tcphdr *th, struct pf_state *state,
     struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
 {
 	struct timeval uptime;
 	u_int32_t tsval, tsecr;
 	u_int tsval_from_last;
 	u_int8_t hdr[60];
 	u_int8_t *opt;
 	int copyback = 0;
 	int got_ts = 0;
 
 	KASSERT((src->scrub || dst->scrub),
 	    ("%s: src->scrub && dst->scrub!", __func__));
 
 	/*
 	 * Enforce the minimum TTL seen for this connection.  Negate a common
 	 * technique to evade an intrusion detection system and confuse
 	 * firewall state code.
 	 */
 	switch (pd->af) {
 #ifdef INET
 	case AF_INET: {
 		if (src->scrub) {
 			struct ip *h = mtod(m, struct ip *);
 			if (h->ip_ttl > src->scrub->pfss_ttl)
 				src->scrub->pfss_ttl = h->ip_ttl;
 			h->ip_ttl = src->scrub->pfss_ttl;
 		}
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		if (src->scrub) {
 			struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
 			if (h->ip6_hlim > src->scrub->pfss_ttl)
 				src->scrub->pfss_ttl = h->ip6_hlim;
 			h->ip6_hlim = src->scrub->pfss_ttl;
 		}
 		break;
 	}
 #endif /* INET6 */
 	}
 
 	if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
 	    ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
 	    (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
 	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
 		/* Diddle with TCP options */
 		int hlen;
 		opt = hdr + sizeof(struct tcphdr);
 		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
 		while (hlen >= TCPOLEN_TIMESTAMP) {
 			switch (*opt) {
 			case TCPOPT_EOL:	/* FALLTHROUGH */
 			case TCPOPT_NOP:
 				opt++;
 				hlen--;
 				break;
 			case TCPOPT_TIMESTAMP:
 				/* Modulate the timestamps.  Can be used for
 				 * NAT detection, OS uptime determination or
 				 * reboot detection.
 				 */
 
 				if (got_ts) {
 					/* Huh?  Multiple timestamps!? */
 					if (V_pf_status.debug >= PF_DEBUG_MISC) {
 						DPFPRINTF(("multiple TS??"));
 						pf_print_state(state);
 						printf("\n");
 					}
 					REASON_SET(reason, PFRES_TS);
 					return (PF_DROP);
 				}
 				if (opt[1] >= TCPOLEN_TIMESTAMP) {
 					memcpy(&tsval, &opt[2],
 					    sizeof(u_int32_t));
 					if (tsval && src->scrub &&
 					    (src->scrub->pfss_flags &
 					    PFSS_TIMESTAMP)) {
 						tsval = ntohl(tsval);
 						pf_change_a(&opt[2],
 						    &th->th_sum,
 						    htonl(tsval +
 						    src->scrub->pfss_ts_mod),
 						    0);
 						copyback = 1;
 					}
 
 					/* Modulate TS reply iff valid (!0) */
 					memcpy(&tsecr, &opt[6],
 					    sizeof(u_int32_t));
 					if (tsecr && dst->scrub &&
 					    (dst->scrub->pfss_flags &
 					    PFSS_TIMESTAMP)) {
 						tsecr = ntohl(tsecr)
 						    - dst->scrub->pfss_ts_mod;
 						pf_change_a(&opt[6],
 						    &th->th_sum, htonl(tsecr),
 						    0);
 						copyback = 1;
 					}
 					got_ts = 1;
 				}
 				/* FALLTHROUGH */
 			default:
 				hlen -= MAX(opt[1], 2);
 				opt += MAX(opt[1], 2);
 				break;
 			}
 		}
 		if (copyback) {
 			/* Copyback the options, caller copys back header */
 			*writeback = 1;
 			m_copyback(m, off + sizeof(struct tcphdr),
 			    (th->th_off << 2) - sizeof(struct tcphdr), hdr +
 			    sizeof(struct tcphdr));
 		}
 	}
 
 
 	/*
 	 * Must invalidate PAWS checks on connections idle for too long.
 	 * The fastest allowed timestamp clock is 1ms.  That turns out to
 	 * be about 24 days before it wraps.  XXX Right now our lowerbound
 	 * TS echo check only works for the first 12 days of a connection
 	 * when the TS has exhausted half its 32bit space
 	 */
 #define TS_MAX_IDLE	(24*24*60*60)
 #define TS_MAX_CONN	(12*24*60*60)	/* XXX remove when better tsecr check */
 
 	getmicrouptime(&uptime);
 	if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
 	    (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
 	    time_uptime - state->creation > TS_MAX_CONN))  {
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			DPFPRINTF(("src idled out of PAWS\n"));
 			pf_print_state(state);
 			printf("\n");
 		}
 		src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
 		    | PFSS_PAWS_IDLED;
 	}
 	if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
 	    uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			DPFPRINTF(("dst idled out of PAWS\n"));
 			pf_print_state(state);
 			printf("\n");
 		}
 		dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
 		    | PFSS_PAWS_IDLED;
 	}
 
 	if (got_ts && src->scrub && dst->scrub &&
 	    (src->scrub->pfss_flags & PFSS_PAWS) &&
 	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
 		/* Validate that the timestamps are "in-window".
 		 * RFC1323 describes TCP Timestamp options that allow
 		 * measurement of RTT (round trip time) and PAWS
 		 * (protection against wrapped sequence numbers).  PAWS
 		 * gives us a set of rules for rejecting packets on
 		 * long fat pipes (packets that were somehow delayed
 		 * in transit longer than the time it took to send the
 		 * full TCP sequence space of 4Gb).  We can use these
 		 * rules and infer a few others that will let us treat
 		 * the 32bit timestamp and the 32bit echoed timestamp
 		 * as sequence numbers to prevent a blind attacker from
 		 * inserting packets into a connection.
 		 *
 		 * RFC1323 tells us:
 		 *  - The timestamp on this packet must be greater than
 		 *    or equal to the last value echoed by the other
 		 *    endpoint.  The RFC says those will be discarded
 		 *    since it is a dup that has already been acked.
 		 *    This gives us a lowerbound on the timestamp.
 		 *        timestamp >= other last echoed timestamp
 		 *  - The timestamp will be less than or equal to
 		 *    the last timestamp plus the time between the
 		 *    last packet and now.  The RFC defines the max
 		 *    clock rate as 1ms.  We will allow clocks to be
 		 *    up to 10% fast and will allow a total difference
 		 *    or 30 seconds due to a route change.  And this
 		 *    gives us an upperbound on the timestamp.
 		 *        timestamp <= last timestamp + max ticks
 		 *    We have to be careful here.  Windows will send an
 		 *    initial timestamp of zero and then initialize it
 		 *    to a random value after the 3whs; presumably to
 		 *    avoid a DoS by having to call an expensive RNG
 		 *    during a SYN flood.  Proof MS has at least one
 		 *    good security geek.
 		 *
 		 *  - The TCP timestamp option must also echo the other
 		 *    endpoints timestamp.  The timestamp echoed is the
 		 *    one carried on the earliest unacknowledged segment
 		 *    on the left edge of the sequence window.  The RFC
 		 *    states that the host will reject any echoed
 		 *    timestamps that were larger than any ever sent.
 		 *    This gives us an upperbound on the TS echo.
 		 *        tescr <= largest_tsval
 		 *  - The lowerbound on the TS echo is a little more
 		 *    tricky to determine.  The other endpoint's echoed
 		 *    values will not decrease.  But there may be
 		 *    network conditions that re-order packets and
 		 *    cause our view of them to decrease.  For now the
 		 *    only lowerbound we can safely determine is that
 		 *    the TS echo will never be less than the original
 		 *    TS.  XXX There is probably a better lowerbound.
 		 *    Remove TS_MAX_CONN with better lowerbound check.
 		 *        tescr >= other original TS
 		 *
 		 * It is also important to note that the fastest
 		 * timestamp clock of 1ms will wrap its 32bit space in
 		 * 24 days.  So we just disable TS checking after 24
 		 * days of idle time.  We actually must use a 12d
 		 * connection limit until we can come up with a better
 		 * lowerbound to the TS echo check.
 		 */
 		struct timeval delta_ts;
 		int ts_fudge;
 
 
 		/*
 		 * PFTM_TS_DIFF is how many seconds of leeway to allow
 		 * a host's timestamp.  This can happen if the previous
 		 * packet got delayed in transit for much longer than
 		 * this packet.
 		 */
 		if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
 			ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF];
 
 		/* Calculate max ticks since the last timestamp */
 #define TS_MAXFREQ	1100		/* RFC max TS freq of 1Khz + 10% skew */
 #define TS_MICROSECS	1000000		/* microseconds per second */
 		delta_ts = uptime;
 		timevalsub(&delta_ts, &src->scrub->pfss_last);
 		tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
 		tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
 
 		if ((src->state >= TCPS_ESTABLISHED &&
 		    dst->state >= TCPS_ESTABLISHED) &&
 		    (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
 		    SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
 		    (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
 		    SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
 			/* Bad RFC1323 implementation or an insertion attack.
 			 *
 			 * - Solaris 2.6 and 2.7 are known to send another ACK
 			 *   after the FIN,FIN|ACK,ACK closing that carries
 			 *   an old timestamp.
 			 */
 
 			DPFPRINTF(("Timestamp failed %c%c%c%c\n",
 			    SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
 			    SEQ_GT(tsval, src->scrub->pfss_tsval +
 			    tsval_from_last) ? '1' : ' ',
 			    SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
 			    SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
 			DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
 			    "idle: %jus %lums\n",
 			    tsval, tsecr, tsval_from_last,
 			    (uintmax_t)delta_ts.tv_sec,
 			    delta_ts.tv_usec / 1000));
 			DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
 			    src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
 			DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u"
 			    "\n", dst->scrub->pfss_tsval,
 			    dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
 				pf_print_state(state);
 				pf_print_flags(th->th_flags);
 				printf("\n");
 			}
 			REASON_SET(reason, PFRES_TS);
 			return (PF_DROP);
 		}
 
 		/* XXX I'd really like to require tsecr but it's optional */
 
 	} else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
 	    ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
 	    || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
 	    src->scrub && dst->scrub &&
 	    (src->scrub->pfss_flags & PFSS_PAWS) &&
 	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
 		/* Didn't send a timestamp.  Timestamps aren't really useful
 		 * when:
 		 *  - connection opening or closing (often not even sent).
 		 *    but we must not let an attacker to put a FIN on a
 		 *    data packet to sneak it through our ESTABLISHED check.
 		 *  - on a TCP reset.  RFC suggests not even looking at TS.
 		 *  - on an empty ACK.  The TS will not be echoed so it will
 		 *    probably not help keep the RTT calculation in sync and
 		 *    there isn't as much danger when the sequence numbers
 		 *    got wrapped.  So some stacks don't include TS on empty
 		 *    ACKs :-(
 		 *
 		 * To minimize the disruption to mostly RFC1323 conformant
 		 * stacks, we will only require timestamps on data packets.
 		 *
 		 * And what do ya know, we cannot require timestamps on data
 		 * packets.  There appear to be devices that do legitimate
 		 * TCP connection hijacking.  There are HTTP devices that allow
 		 * a 3whs (with timestamps) and then buffer the HTTP request.
 		 * If the intermediate device has the HTTP response cache, it
 		 * will spoof the response but not bother timestamping its
 		 * packets.  So we can look for the presence of a timestamp in
 		 * the first data packet and if there, require it in all future
 		 * packets.
 		 */
 
 		if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
 			/*
 			 * Hey!  Someone tried to sneak a packet in.  Or the
 			 * stack changed its RFC1323 behavior?!?!
 			 */
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
 				DPFPRINTF(("Did not receive expected RFC1323 "
 				    "timestamp\n"));
 				pf_print_state(state);
 				pf_print_flags(th->th_flags);
 				printf("\n");
 			}
 			REASON_SET(reason, PFRES_TS);
 			return (PF_DROP);
 		}
 	}
 
 
 	/*
 	 * We will note if a host sends his data packets with or without
 	 * timestamps.  And require all data packets to contain a timestamp
 	 * if the first does.  PAWS implicitly requires that all data packets be
 	 * timestamped.  But I think there are middle-man devices that hijack
 	 * TCP streams immediately after the 3whs and don't timestamp their
 	 * packets (seen in a WWW accelerator or cache).
 	 */
 	if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
 	    (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
 		if (got_ts)
 			src->scrub->pfss_flags |= PFSS_DATA_TS;
 		else {
 			src->scrub->pfss_flags |= PFSS_DATA_NOTS;
 			if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
 			    (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
 				/* Don't warn if other host rejected RFC1323 */
 				DPFPRINTF(("Broken RFC1323 stack did not "
 				    "timestamp data packet. Disabled PAWS "
 				    "security.\n"));
 				pf_print_state(state);
 				pf_print_flags(th->th_flags);
 				printf("\n");
 			}
 		}
 	}
 
 
 	/*
 	 * Update PAWS values
 	 */
 	if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
 	    (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
 		getmicrouptime(&src->scrub->pfss_last);
 		if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
 		    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
 			src->scrub->pfss_tsval = tsval;
 
 		if (tsecr) {
 			if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
 			    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
 				src->scrub->pfss_tsecr = tsecr;
 
 			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
 			    (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
 			    src->scrub->pfss_tsval0 == 0)) {
 				/* tsval0 MUST be the lowest timestamp */
 				src->scrub->pfss_tsval0 = tsval;
 			}
 
 			/* Only fully initialized after a TS gets echoed */
 			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
 				src->scrub->pfss_flags |= PFSS_PAWS;
 		}
 	}
 
 	/* I have a dream....  TCP segment reassembly.... */
 	return (0);
 }
 
 static int
 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
     int off, sa_family_t af)
 {
 	u_int16_t	*mss;
 	int		 thoff;
 	int		 opt, cnt, optlen = 0;
 	int		 rewrite = 0;
 	u_char		 opts[TCP_MAXOLEN];
 	u_char		*optp = opts;
 
 	thoff = th->th_off << 2;
 	cnt = thoff - sizeof(struct tcphdr);
 
 	if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt,
 	    NULL, NULL, af))
 		return (rewrite);
 
 	for (; cnt > 0; cnt -= optlen, optp += optlen) {
 		opt = optp[0];
 		if (opt == TCPOPT_EOL)
 			break;
 		if (opt == TCPOPT_NOP)
 			optlen = 1;
 		else {
 			if (cnt < 2)
 				break;
 			optlen = optp[1];
 			if (optlen < 2 || optlen > cnt)
 				break;
 		}
 		switch (opt) {
 		case TCPOPT_MAXSEG:
 			mss = (u_int16_t *)(optp + 2);
 			if ((ntohs(*mss)) > r->max_mss) {
 				th->th_sum = pf_cksum_fixup(th->th_sum,
 				    *mss, htons(r->max_mss), 0);
 				*mss = htons(r->max_mss);
 				rewrite = 1;
 			}
 			break;
 		default:
 			break;
 		}
 	}
 
 	if (rewrite)
 		m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts);
 
 	return (rewrite);
 }
 
 #ifdef INET
 static void
 pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos)
 {
 	struct mbuf		*m = *m0;
 	struct ip		*h = mtod(m, struct ip *);
 
 	/* Clear IP_DF if no-df was requested */
 	if (flags & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
 		u_int16_t ip_off = h->ip_off;
 
 		h->ip_off &= htons(~IP_DF);
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
 	}
 
 	/* Enforce a minimum ttl, may cause endless packet loops */
 	if (min_ttl && h->ip_ttl < min_ttl) {
 		u_int16_t ip_ttl = h->ip_ttl;
 
 		h->ip_ttl = min_ttl;
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
 	}
 
 	/* Enforce tos */
 	if (flags & PFRULE_SET_TOS) {
 		u_int16_t	ov, nv;
 
 		ov = *(u_int16_t *)h;
 		h->ip_tos = tos;
 		nv = *(u_int16_t *)h;
 
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0);
 	}
 
 	/* random-id, but not for fragments */
 	if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) {
 		uint16_t ip_id = h->ip_id;
 
 		ip_fillid(h);
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
 	}
 }
 #endif /* INET */
 
 #ifdef INET6
 static void
 pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl)
 {
 	struct mbuf		*m = *m0;
 	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
 
 	/* Enforce a minimum ttl, may cause endless packet loops */
 	if (min_ttl && h->ip6_hlim < min_ttl)
 		h->ip6_hlim = min_ttl;
 }
 #endif
Index: projects/ifnet/sys/powerpc/powerpc/trap.c
===================================================================
--- projects/ifnet/sys/powerpc/powerpc/trap.c	(revision 281172)
+++ projects/ifnet/sys/powerpc/powerpc/trap.c	(revision 281173)
@@ -1,816 +1,816 @@
 /*-
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $NetBSD: trap.c,v 1.58 2002/03/04 04:07:35 dbj Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kdb.h>
 #include <sys/proc.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/pioctl.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/uio.h>
 #include <sys/signalvar.h>
 #include <sys/vmmeter.h>
 
 #include <security/audit/audit.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 
 #include <machine/_inttypes.h>
 #include <machine/altivec.h>
 #include <machine/cpu.h>
 #include <machine/db_machdep.h>
 #include <machine/fpu.h>
 #include <machine/frame.h>
 #include <machine/pcb.h>
 #include <machine/pmap.h>
 #include <machine/psl.h>
 #include <machine/trap.h>
 #include <machine/spr.h>
 #include <machine/sr.h>
 
 #define	FAULTBUF_LR	0
 #define	FAULTBUF_R1	1
 #define	FAULTBUF_R2	2
 #define	FAULTBUF_CR	3
-#define	FAULTBUF_R13	6
+#define	FAULTBUF_R13	4
 
 static void	trap_fatal(struct trapframe *frame);
 static void	printtrap(u_int vector, struct trapframe *frame, int isfatal,
 		    int user);
 static int	trap_pfault(struct trapframe *frame, int user);
 static int	fix_unaligned(struct thread *td, struct trapframe *frame);
 static int	handle_onfault(struct trapframe *frame);
 static void	syscall(struct trapframe *frame);
 
 #ifdef __powerpc64__
        void	handle_kernel_slb_spill(int, register_t, register_t);
 static int	handle_user_slb_spill(pmap_t pm, vm_offset_t addr);
 extern int	n_slbs;
 #endif
 
 struct powerpc_exception {
 	u_int	vector;
 	char	*name;
 };
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 
 int (*dtrace_invop_jump_addr)(struct trapframe *);
 #endif
 
 static struct powerpc_exception powerpc_exceptions[] = {
 	{ EXC_CRIT,	"critical input" },
 	{ EXC_RST,	"system reset" },
 	{ EXC_MCHK,	"machine check" },
 	{ EXC_DSI,	"data storage interrupt" },
 	{ EXC_DSE,	"data segment exception" },
 	{ EXC_ISI,	"instruction storage interrupt" },
 	{ EXC_ISE,	"instruction segment exception" },
 	{ EXC_EXI,	"external interrupt" },
 	{ EXC_ALI,	"alignment" },
 	{ EXC_PGM,	"program" },
 	{ EXC_FPU,	"floating-point unavailable" },
 	{ EXC_APU,	"auxiliary proc unavailable" },
 	{ EXC_DECR,	"decrementer" },
 	{ EXC_FIT,	"fixed-interval timer" },
 	{ EXC_WDOG,	"watchdog timer" },
 	{ EXC_SC,	"system call" },
 	{ EXC_TRC,	"trace" },
 	{ EXC_FPA,	"floating-point assist" },
 	{ EXC_DEBUG,	"debug" },
 	{ EXC_PERF,	"performance monitoring" },
 	{ EXC_VEC,	"altivec unavailable" },
 	{ EXC_VSX,	"vsx unavailable" },
 	{ EXC_ITMISS,	"instruction tlb miss" },
 	{ EXC_DLMISS,	"data load tlb miss" },
 	{ EXC_DSMISS,	"data store tlb miss" },
 	{ EXC_BPT,	"instruction breakpoint" },
 	{ EXC_SMI,	"system management" },
 	{ EXC_VECAST_G4,	"altivec assist" },
 	{ EXC_THRM,	"thermal management" },
 	{ EXC_RUNMODETRC,	"run mode/trace" },
 	{ EXC_LAST,	NULL }
 };
 
 static const char *
 trapname(u_int vector)
 {
 	struct	powerpc_exception *pe;
 
 	for (pe = powerpc_exceptions; pe->vector != EXC_LAST; pe++) {
 		if (pe->vector == vector)
 			return (pe->name);
 	}
 
 	return ("unknown");
 }
 
 void
 trap(struct trapframe *frame)
 {
 	struct thread	*td;
 	struct proc	*p;
 #ifdef KDTRACE_HOOKS
 	uint32_t inst;
 #endif
 	int		sig, type, user;
 	u_int		ucode;
 	ksiginfo_t	ksi;
 
 	PCPU_INC(cnt.v_trap);
 
 	td = curthread;
 	p = td->td_proc;
 
 	type = ucode = frame->exc;
 	sig = 0;
 	user = frame->srr1 & PSL_PR;
 
 	CTR3(KTR_TRAP, "trap: %s type=%s (%s)", td->td_name,
 	    trapname(type), user ? "user" : "kernel");
 
 #ifdef KDTRACE_HOOKS
 	/*
 	 * A trap can occur while DTrace executes a probe. Before
 	 * executing the probe, DTrace blocks re-scheduling and sets
 	 * a flag in its per-cpu flags to indicate that it doesn't
 	 * want to fault. On returning from the probe, the no-fault
 	 * flag is cleared and finally re-scheduling is enabled.
 	 *
 	 * If the DTrace kernel module has registered a trap handler,
 	 * call it and if it returns non-zero, assume that it has
 	 * handled the trap and modified the trap frame so that this
 	 * function can return normally.
 	 */
 	if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type) != 0)
 		return;
 #endif
 
 	if (user) {
 		td->td_pticks = 0;
 		td->td_frame = frame;
 		if (td->td_ucred != p->p_ucred)
 			cred_update_thread(td);
 
 		/* User Mode Traps */
 		switch (type) {
 		case EXC_RUNMODETRC:
 		case EXC_TRC:
 			frame->srr1 &= ~PSL_SE;
 			sig = SIGTRAP;
 			ucode = TRAP_TRACE;
 			break;
 
 #ifdef __powerpc64__
 		case EXC_ISE:
 		case EXC_DSE:
 			if (handle_user_slb_spill(&p->p_vmspace->vm_pmap,
 			    (type == EXC_ISE) ? frame->srr0 : frame->dar) != 0){
 				sig = SIGSEGV;
 				ucode = SEGV_MAPERR;
 			}
 			break;
 #endif
 		case EXC_DSI:
 		case EXC_ISI:
 			sig = trap_pfault(frame, 1);
 			if (sig == SIGSEGV)
 				ucode = SEGV_MAPERR;
 			break;
 
 		case EXC_SC:
 			syscall(frame);
 			break;
 
 		case EXC_FPU:
 			KASSERT((td->td_pcb->pcb_flags & PCB_FPU) != PCB_FPU,
 			    ("FPU already enabled for thread"));
 			enable_fpu(td);
 			break;
 
 		case EXC_VEC:
 			KASSERT((td->td_pcb->pcb_flags & PCB_VEC) != PCB_VEC,
 			    ("Altivec already enabled for thread"));
 			enable_vec(td);
 			break;
 
 		case EXC_VSX:
 			KASSERT((td->td_pcb->pcb_flags & PCB_VSX) != PCB_VSX,
 			    ("VSX already enabled for thread"));
 			if (!(td->td_pcb->pcb_flags & PCB_VEC))
 				enable_vec(td);
 			if (!(td->td_pcb->pcb_flags & PCB_FPU))
 				save_fpu(td);
 			td->td_pcb->pcb_flags |= PCB_VSX;
 			enable_fpu(td);
 			break;
 
 		case EXC_VECAST_G4:
 		case EXC_VECAST_G5:
 			/*
 			 * We get a VPU assist exception for IEEE mode
 			 * vector operations on denormalized floats.
 			 * Emulating this is a giant pain, so for now,
 			 * just switch off IEEE mode and treat them as
 			 * zero.
 			 */
 
 			save_vec(td);
 			td->td_pcb->pcb_vec.vscr |= ALTIVEC_VSCR_NJ;
 			enable_vec(td);
 			break;
 
 		case EXC_ALI:
 			if (fix_unaligned(td, frame) != 0) {
 				sig = SIGBUS;
 				ucode = BUS_ADRALN;
 			}
 			else
 				frame->srr0 += 4;
 			break;
 
 		case EXC_DEBUG:	/* Single stepping */
 			mtspr(SPR_DBSR, mfspr(SPR_DBSR));
 			frame->srr1 &= ~PSL_DE;
 			frame->cpu.booke.dbcr0 &= ~(DBCR0_IDM || DBCR0_IC);
 			sig = SIGTRAP;
 			ucode = TRAP_TRACE;
 			break;
 
 		case EXC_PGM:
 			/* Identify the trap reason */
 #ifdef AIM
 			if (frame->srr1 & EXC_PGM_TRAP) {
 #else
 			if (frame->cpu.booke.esr & ESR_PTR) {
 #endif
 #ifdef KDTRACE_HOOKS
 				inst = fuword32((const void *)frame->srr0);
 				if (inst == 0x0FFFDDDD &&
 				    dtrace_pid_probe_ptr != NULL) {
 					struct reg regs;
 					fill_regs(td, &regs);
 					(*dtrace_pid_probe_ptr)(&regs);
 					break;
 				}
 #endif
  				sig = SIGTRAP;
 				ucode = TRAP_BRKPT;
 			} else {
 				sig = ppc_instr_emulate(frame, td->td_pcb);
 				if (sig == SIGILL) {
 					if (frame->srr1 & EXC_PGM_PRIV)
 						ucode = ILL_PRVOPC;
 					else if (frame->srr1 & EXC_PGM_ILLEGAL)
 						ucode = ILL_ILLOPC;
 				} else if (sig == SIGFPE)
 					ucode = FPE_FLTINV;	/* Punt for now, invalid operation. */
 			}
 			break;
 
 		case EXC_MCHK:
 			/*
 			 * Note that this may not be recoverable for the user
 			 * process, depending on the type of machine check,
 			 * but it at least prevents the kernel from dying.
 			 */
 			sig = SIGBUS;
 			ucode = BUS_OBJERR;
 			break;
 
 		default:
 			trap_fatal(frame);
 		}
 	} else {
 		/* Kernel Mode Traps */
 
 		KASSERT(cold || td->td_ucred != NULL,
 		    ("kernel trap doesn't have ucred"));
 		switch (type) {
 #ifdef KDTRACE_HOOKS
 		case EXC_PGM:
 			if (frame->srr1 & EXC_PGM_TRAP) {
 				if (*(uint32_t *)frame->srr0 == EXC_DTRACE) {
 					if (dtrace_invop_jump_addr != NULL) {
 						dtrace_invop_jump_addr(frame);
 						return;
 					}
 				}
 			}
 			break;
 #endif
 #ifdef __powerpc64__
 		case EXC_DSE:
 			if ((frame->dar & SEGMENT_MASK) == USER_ADDR) {
 				__asm __volatile ("slbmte %0, %1" ::
 					"r"(td->td_pcb->pcb_cpu.aim.usr_vsid),
 					"r"(USER_SLB_SLBE));
 				return;
 			}
 			break;
 #endif
 		case EXC_DSI:
 			if (trap_pfault(frame, 0) == 0)
  				return;
 			break;
 		case EXC_MCHK:
 			if (handle_onfault(frame))
  				return;
 			break;
 		default:
 			break;
 		}
 		trap_fatal(frame);
 	}
 
 	if (sig != 0) {
 		if (p->p_sysent->sv_transtrap != NULL)
 			sig = (p->p_sysent->sv_transtrap)(sig, type);
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = sig;
 		ksi.ksi_code = (int) ucode; /* XXX, not POSIX */
 		/* ksi.ksi_addr = ? */
 		ksi.ksi_trapno = type;
 		trapsignal(td, &ksi);
 	}
 
 	userret(td, frame);
 }
 
 static void
 trap_fatal(struct trapframe *frame)
 {
 
 	printtrap(frame->exc, frame, 1, (frame->srr1 & PSL_PR));
 #ifdef KDB
 	if ((debugger_on_panic || kdb_active) &&
 	    kdb_trap(frame->exc, 0, frame))
 		return;
 #endif
 	panic("%s trap", trapname(frame->exc));
 }
 
 static void
 printtrap(u_int vector, struct trapframe *frame, int isfatal, int user)
 {
 
 	printf("\n");
 	printf("%s %s trap:\n", isfatal ? "fatal" : "handled",
 	    user ? "user" : "kernel");
 	printf("\n");
 	printf("   exception       = 0x%x (%s)\n", vector, trapname(vector));
 	switch (vector) {
 	case EXC_DTMISS:
 	case EXC_DSE:
 	case EXC_DSI:
 		printf("   virtual address = 0x%" PRIxPTR "\n", frame->dar);
 		printf("   dsisr           = 0x%" PRIxPTR "\n",
 		    frame->cpu.aim.dsisr);
 		break;
 	case EXC_ITMISS:
 	case EXC_ISE:
 	case EXC_ISI:
 		printf("   virtual address = 0x%" PRIxPTR "\n", frame->srr0);
 		break;
 	}
 	printf("   srr0            = 0x%" PRIxPTR "\n", frame->srr0);
 	printf("   srr1            = 0x%" PRIxPTR "\n", frame->srr1);
 	printf("   lr              = 0x%" PRIxPTR "\n", frame->lr);
 	printf("   curthread       = %p\n", curthread);
 	if (curthread != NULL)
 		printf("          pid = %d, comm = %s\n",
 		    curthread->td_proc->p_pid, curthread->td_name);
 	printf("\n");
 }
 
 /*
  * Handles a fatal fault when we have onfault state to recover.  Returns
  * non-zero if there was onfault recovery state available.
  */
 static int
 handle_onfault(struct trapframe *frame)
 {
 	struct		thread *td;
 	faultbuf	*fb;
 
 	td = curthread;
 	fb = td->td_pcb->pcb_onfault;
 	if (fb != NULL) {
 		frame->srr0 = (*fb)[FAULTBUF_LR];
 		frame->fixreg[1] = (*fb)[FAULTBUF_R1];
 		frame->fixreg[2] = (*fb)[FAULTBUF_R2];
 		frame->fixreg[3] = 1;
 		frame->cr = (*fb)[FAULTBUF_CR];
 		bcopy(&(*fb)[FAULTBUF_R13], &frame->fixreg[13],
 		    19 * sizeof(register_t));
 		return (1);
 	}
 	return (0);
 }
 
 int
 cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
 {
 	struct proc *p;
 	struct trapframe *frame;
 	caddr_t	params;
 	size_t argsz;
 	int error, n, i;
 
 	p = td->td_proc;
 	frame = td->td_frame;
 
 	sa->code = frame->fixreg[0];
 	params = (caddr_t)(frame->fixreg + FIRSTARG);
 	n = NARGREG;
 
 	if (sa->code == SYS_syscall) {
 		/*
 		 * code is first argument,
 		 * followed by actual args.
 		 */
 		sa->code = *(register_t *) params;
 		params += sizeof(register_t);
 		n -= 1;
 	} else if (sa->code == SYS___syscall) {
 		/*
 		 * Like syscall, but code is a quad,
 		 * so as to maintain quad alignment
 		 * for the rest of the args.
 		 */
 		if (SV_PROC_FLAG(p, SV_ILP32)) {
 			params += sizeof(register_t);
 			sa->code = *(register_t *) params;
 			params += sizeof(register_t);
 			n -= 2;
 		} else {
 			sa->code = *(register_t *) params;
 			params += sizeof(register_t);
 			n -= 1;
 		}
 	}
 
  	if (p->p_sysent->sv_mask)
 		sa->code &= p->p_sysent->sv_mask;
 	if (sa->code >= p->p_sysent->sv_size)
 		sa->callp = &p->p_sysent->sv_table[0];
 	else
 		sa->callp = &p->p_sysent->sv_table[sa->code];
 
 	sa->narg = sa->callp->sy_narg;
 
 	if (SV_PROC_FLAG(p, SV_ILP32)) {
 		argsz = sizeof(uint32_t);
 
 		for (i = 0; i < n; i++)
 			sa->args[i] = ((u_register_t *)(params))[i] &
 			    0xffffffff;
 	} else {
 		argsz = sizeof(uint64_t);
 
 		for (i = 0; i < n; i++)
 			sa->args[i] = ((u_register_t *)(params))[i];
 	}
 
 	if (sa->narg > n)
 		error = copyin(MOREARGS(frame->fixreg[1]), sa->args + n,
 			       (sa->narg - n) * argsz);
 	else
 		error = 0;
 
 #ifdef __powerpc64__
 	if (SV_PROC_FLAG(p, SV_ILP32) && sa->narg > n) {
 		/* Expand the size of arguments copied from the stack */
 
 		for (i = sa->narg; i >= n; i--)
 			sa->args[i] = ((uint32_t *)(&sa->args[n]))[i-n];
 	}
 #endif
 
 	if (error == 0) {
 		td->td_retval[0] = 0;
 		td->td_retval[1] = frame->fixreg[FIRSTARG + 1];
 	}
 	return (error);
 }
 
 #include "../../kern/subr_syscall.c"
 
 void
 syscall(struct trapframe *frame)
 {
 	struct thread *td;
 	struct syscall_args sa;
 	int error;
 
 	td = curthread;
 	td->td_frame = frame;
 
 #ifdef __powerpc64__
 	/*
 	 * Speculatively restore last user SLB segment, which we know is
 	 * invalid already, since we are likely to do copyin()/copyout().
 	 */
 	__asm __volatile ("slbmte %0, %1; isync" ::
             "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE));
 #endif
 
 	error = syscallenter(td, &sa);
 	syscallret(td, error, &sa);
 }
 
 #ifdef __powerpc64__
 /* Handle kernel SLB faults -- runs in real mode, all seat belts off */
 void
 handle_kernel_slb_spill(int type, register_t dar, register_t srr0)
 {
 	struct slb *slbcache;
 	uint64_t slbe, slbv;
 	uint64_t esid, addr;
 	int i;
 
 	addr = (type == EXC_ISE) ? srr0 : dar;
 	slbcache = PCPU_GET(slb);
 	esid = (uintptr_t)addr >> ADDR_SR_SHFT;
 	slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
 	
 	/* See if the hardware flushed this somehow (can happen in LPARs) */
 	for (i = 0; i < n_slbs; i++)
 		if (slbcache[i].slbe == (slbe | (uint64_t)i))
 			return;
 
 	/* Not in the map, needs to actually be added */
 	slbv = kernel_va_to_slbv(addr);
 	if (slbcache[USER_SLB_SLOT].slbe == 0) {
 		for (i = 0; i < n_slbs; i++) {
 			if (i == USER_SLB_SLOT)
 				continue;
 			if (!(slbcache[i].slbe & SLBE_VALID))
 				goto fillkernslb;
 		}
 
 		if (i == n_slbs)
 			slbcache[USER_SLB_SLOT].slbe = 1;
 	}
 
 	/* Sacrifice a random SLB entry that is not the user entry */
 	i = mftb() % n_slbs;
 	if (i == USER_SLB_SLOT)
 		i = (i+1) % n_slbs;
 
 fillkernslb:
 	/* Write new entry */
 	slbcache[i].slbv = slbv;
 	slbcache[i].slbe = slbe | (uint64_t)i;
 
 	/* Trap handler will restore from cache on exit */
 }
 
 static int 
 handle_user_slb_spill(pmap_t pm, vm_offset_t addr)
 {
 	struct slb *user_entry;
 	uint64_t esid;
 	int i;
 
 	esid = (uintptr_t)addr >> ADDR_SR_SHFT;
 
 	PMAP_LOCK(pm);
 	user_entry = user_va_to_slb_entry(pm, addr);
 
 	if (user_entry == NULL) {
 		/* allocate_vsid auto-spills it */
 		(void)allocate_user_vsid(pm, esid, 0);
 	} else {
 		/*
 		 * Check that another CPU has not already mapped this.
 		 * XXX: Per-thread SLB caches would be better.
 		 */
 		for (i = 0; i < pm->pm_slb_len; i++)
 			if (pm->pm_slb[i] == user_entry)
 				break;
 
 		if (i == pm->pm_slb_len)
 			slb_insert_user(pm, user_entry);
 	}
 	PMAP_UNLOCK(pm);
 
 	return (0);
 }
 #endif
 
 static int
 trap_pfault(struct trapframe *frame, int user)
 {
 	vm_offset_t	eva, va;
 	struct		thread *td;
 	struct		proc *p;
 	vm_map_t	map;
 	vm_prot_t	ftype;
 	int		rv;
 #ifdef AIM
 	register_t	user_sr;
 #endif
 
 	td = curthread;
 	p = td->td_proc;
 	if (frame->exc == EXC_ISI) {
 		eva = frame->srr0;
 		ftype = VM_PROT_EXECUTE;
 		if (frame->srr1 & SRR1_ISI_PFAULT)
 			ftype |= VM_PROT_READ;
 	} else {
 		eva = frame->dar;
 #ifdef BOOKE
 		if (frame->cpu.booke.esr & ESR_ST)
 #else
 		if (frame->cpu.aim.dsisr & DSISR_STORE)
 #endif
 			ftype = VM_PROT_WRITE;
 		else
 			ftype = VM_PROT_READ;
 	}
 
 	if (user) {
 		KASSERT(p->p_vmspace != NULL, ("trap_pfault: vmspace  NULL"));
 		map = &p->p_vmspace->vm_map;
 	} else {
 #ifdef BOOKE
 		if (eva < VM_MAXUSER_ADDRESS) {
 #else
 		if ((eva >> ADDR_SR_SHFT) == (USER_ADDR >> ADDR_SR_SHFT)) {
 #endif
 			if (p->p_vmspace == NULL)
 				return (SIGSEGV);
 
 			map = &p->p_vmspace->vm_map;
 
 #ifdef AIM
 			user_sr = td->td_pcb->pcb_cpu.aim.usr_segm;
 			eva &= ADDR_PIDX | ADDR_POFF;
 			eva |= user_sr << ADDR_SR_SHFT;
 #endif
 		} else {
 			map = kernel_map;
 		}
 	}
 	va = trunc_page(eva);
 
 	if (map != kernel_map) {
 		/*
 		 * Keep swapout from messing with us during this
 		 *	critical time.
 		 */
 		PROC_LOCK(p);
 		++p->p_lock;
 		PROC_UNLOCK(p);
 
 		/* Fault in the user page: */
 		rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
 
 		PROC_LOCK(p);
 		--p->p_lock;
 		PROC_UNLOCK(p);
 		/*
 		 * XXXDTRACE: add dtrace_doubletrap_func here?
 		 */
 	} else {
 		/*
 		 * Don't have to worry about process locking or stacks in the
 		 * kernel.
 		 */
 		rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
 	}
 
 	if (rv == KERN_SUCCESS)
 		return (0);
 
 	if (!user && handle_onfault(frame))
 		return (0);
 
 	return (SIGSEGV);
 }
 
 /*
  * For now, this only deals with the particular unaligned access case
  * that gcc tends to generate.  Eventually it should handle all of the
  * possibilities that can happen on a 32-bit PowerPC in big-endian mode.
  */
 
 static int
 fix_unaligned(struct thread *td, struct trapframe *frame)
 {
 	struct thread	*fputhread;
 	int		indicator, reg;
 	double		*fpr;
 
 	indicator = EXC_ALI_OPCODE_INDICATOR(frame->cpu.aim.dsisr);
 
 	switch (indicator) {
 	case EXC_ALI_LFD:
 	case EXC_ALI_STFD:
 		reg = EXC_ALI_RST(frame->cpu.aim.dsisr);
 		fpr = &td->td_pcb->pcb_fpu.fpr[reg].fpr;
 		fputhread = PCPU_GET(fputhread);
 
 		/* Juggle the FPU to ensure that we've initialized
 		 * the FPRs, and that their current state is in
 		 * the PCB.
 		 */
 		if (fputhread != td) {
 			if (fputhread)
 				save_fpu(fputhread);
 			enable_fpu(td);
 		}
 		save_fpu(td);
 
 		if (indicator == EXC_ALI_LFD) {
 			if (copyin((void *)frame->dar, fpr,
 			    sizeof(double)) != 0)
 				return (-1);
 			enable_fpu(td);
 		} else {
 			if (copyout(fpr, (void *)frame->dar,
 			    sizeof(double)) != 0)
 				return (-1);
 		}
 		return (0);
 		break;
 	}
 
 	return (-1);
 }
 
 #ifdef KDB
 int db_trap_glue(struct trapframe *);		/* Called from trap_subr.S */
 
 int
 db_trap_glue(struct trapframe *frame)
 {
 	if (!(frame->srr1 & PSL_PR)
 	    && (frame->exc == EXC_TRC || frame->exc == EXC_RUNMODETRC
 		|| (frame->exc == EXC_PGM
 		    && (frame->srr1 & 0x20000))
 		|| frame->exc == EXC_BPT
 		|| frame->exc == EXC_DSI)) {
 		int type = frame->exc;
 
 		/* Ignore DTrace traps. */
 		if (*(uint32_t *)frame->srr0 == EXC_DTRACE)
 			return (0);
 		if (type == EXC_PGM && (frame->srr1 & 0x20000)) {
 			type = T_BREAKPOINT;
 		}
 		return (kdb_trap(type, 0, frame));
 	}
 
 	return (0);
 }
 #endif
Index: projects/ifnet/sys/sys/param.h
===================================================================
--- projects/ifnet/sys/sys/param.h	(revision 281172)
+++ projects/ifnet/sys/sys/param.h	(revision 281173)
@@ -1,348 +1,348 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)param.h	8.3 (Berkeley) 4/4/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_PARAM_H_
 #define _SYS_PARAM_H_
 
 #include <sys/_null.h>
 
 #define	BSD	199506		/* System version (year & month). */
 #define BSD4_3	1
 #define BSD4_4	1
 
 /* 
  * __FreeBSD_version numbers are documented in the Porter's Handbook.
  * If you bump the version for any reason, you should update the documentation
  * there.
  * Currently this lives here in the doc/ repository:
  *
  *	head/en_US.ISO8859-1/books/porters-handbook/book.xml
  *
  * scheme is:  <major><two digit minor>Rxx
  *		'R' is in the range 0 to 4 if this is a release branch or
  *		x.0-CURRENT before RELENG_*_0 is created, otherwise 'R' is
  *		in the range 5 to 9.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1100067	/* Master, propagated to newvers */
+#define __FreeBSD_version 1100068	/* Master, propagated to newvers */
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
  * which by definition is always true on FreeBSD. This macro is also defined
  * on other systems that use the kernel of FreeBSD, such as GNU/kFreeBSD.
  *
  * It is tempting to use this macro in userland code when we want to enable
  * kernel-specific routines, and in fact it's fine to do this in code that
  * is part of FreeBSD itself.  However, be aware that as presence of this
  * macro is still not widespread (e.g. older FreeBSD versions, 3rd party
  * compilers, etc), it is STRONGLY DISCOURAGED to check for this macro in
  * external applications without also checking for __FreeBSD__ as an
  * alternative.
  */
 #undef __FreeBSD_kernel__
 #define __FreeBSD_kernel__
 
 #ifdef _KERNEL
 #define	P_OSREL_SIGWAIT		700000
 #define	P_OSREL_SIGSEGV		700004
 #define	P_OSREL_MAP_ANON	800104
 #define	P_OSREL_MAP_FSTRICT	1100036
 
 #define	P_OSREL_MAJOR(x)	((x) / 100000)
 #endif
 
 #ifndef LOCORE
 #include <sys/types.h>
 #endif
 
 /*
  * Machine-independent constants (some used in following include files).
  * Redefined constants are from POSIX 1003.1 limits file.
  *
  * MAXCOMLEN should be >= sizeof(ac_comm) (see <acct.h>)
  */
 #include <sys/syslimits.h>
 
 #define	MAXCOMLEN	19		/* max command name remembered */
 #define	MAXINTERP	PATH_MAX	/* max interpreter file name length */
 #define	MAXLOGNAME	33		/* max login name length (incl. NUL) */
 #define	MAXUPRC		CHILD_MAX	/* max simultaneous processes */
 #define	NCARGS		ARG_MAX		/* max bytes for an exec function */
 #define	NGROUPS		(NGROUPS_MAX+1)	/* max number groups */
 #define	NOFILE		OPEN_MAX	/* max open files per process */
 #define	NOGROUP		65535		/* marker for empty group set member */
 #define MAXHOSTNAMELEN	256		/* max hostname size */
 #define SPECNAMELEN	63		/* max length of devicename */
 
 /* More types and definitions used throughout the kernel. */
 #ifdef _KERNEL
 #include <sys/cdefs.h>
 #include <sys/errno.h>
 #ifndef LOCORE
 #include <sys/time.h>
 #include <sys/priority.h>
 #endif
 
 #ifndef FALSE
 #define	FALSE	0
 #endif
 #ifndef TRUE
 #define	TRUE	1
 #endif
 #endif
 
 #ifndef _KERNEL
 /* Signals. */
 #include <sys/signal.h>
 #endif
 
 /* Machine type dependent parameters. */
 #include <machine/param.h>
 #ifndef _KERNEL
 #include <sys/limits.h>
 #endif
 
 #ifndef DEV_BSHIFT
 #define	DEV_BSHIFT	9		/* log2(DEV_BSIZE) */
 #endif
 #define	DEV_BSIZE	(1<<DEV_BSHIFT)
 
 #ifndef BLKDEV_IOSIZE
 #define BLKDEV_IOSIZE  PAGE_SIZE	/* default block device I/O size */
 #endif
 #ifndef DFLTPHYS
 #define DFLTPHYS	(64 * 1024)	/* default max raw I/O transfer size */
 #endif
 #ifndef MAXPHYS
 #define MAXPHYS		(128 * 1024)	/* max raw I/O transfer size */
 #endif
 #ifndef MAXDUMPPGS
 #define MAXDUMPPGS	(DFLTPHYS/PAGE_SIZE)
 #endif
 
 /*
  * Constants related to network buffer management.
  * MCLBYTES must be no larger than PAGE_SIZE.
  */
 #ifndef	MSIZE
 #define	MSIZE		256		/* size of an mbuf */
 #endif
 
 #ifndef	MCLSHIFT
 #define MCLSHIFT	11		/* convert bytes to mbuf clusters */
 #endif	/* MCLSHIFT */
 
 #define MCLBYTES	(1 << MCLSHIFT)	/* size of an mbuf cluster */
 
 #if PAGE_SIZE < 2048
 #define	MJUMPAGESIZE	MCLBYTES
 #elif PAGE_SIZE <= 8192
 #define	MJUMPAGESIZE	PAGE_SIZE
 #else
 #define	MJUMPAGESIZE	(8 * 1024)
 #endif
 
 #define	MJUM9BYTES	(9 * 1024)	/* jumbo cluster 9k */
 #define	MJUM16BYTES	(16 * 1024)	/* jumbo cluster 16k */
 
 /*
  * Some macros for units conversion
  */
 
 /* clicks to bytes */
 #ifndef ctob
 #define ctob(x)	((x)<<PAGE_SHIFT)
 #endif
 
 /* bytes to clicks */
 #ifndef btoc
 #define btoc(x)	(((vm_offset_t)(x)+PAGE_MASK)>>PAGE_SHIFT)
 #endif
 
 /*
  * btodb() is messy and perhaps slow because `bytes' may be an off_t.  We
  * want to shift an unsigned type to avoid sign extension and we don't
  * want to widen `bytes' unnecessarily.  Assume that the result fits in
  * a daddr_t.
  */
 #ifndef btodb
 #define btodb(bytes)	 		/* calculates (bytes / DEV_BSIZE) */ \
 	(sizeof (bytes) > sizeof(long) \
 	 ? (daddr_t)((unsigned long long)(bytes) >> DEV_BSHIFT) \
 	 : (daddr_t)((unsigned long)(bytes) >> DEV_BSHIFT))
 #endif
 
 #ifndef dbtob
 #define dbtob(db)			/* calculates (db * DEV_BSIZE) */ \
 	((off_t)(db) << DEV_BSHIFT)
 #endif
 
 #define	PRIMASK	0x0ff
 #define	PCATCH	0x100		/* OR'd with pri for tsleep to check signals */
 #define	PDROP	0x200	/* OR'd with pri to stop re-entry of interlock mutex */
 
 #define	NZERO	0		/* default "nice" */
 
 #define	NBBY	8		/* number of bits in a byte */
 #define	NBPW	sizeof(int)	/* number of bytes per word (integer) */
 
 #define	CMASK	022		/* default file mask: S_IWGRP|S_IWOTH */
 
 #define	NODEV	(dev_t)(-1)	/* non-existent device */
 
 /*
  * File system parameters and macros.
  *
  * MAXBSIZE -	Filesystems are made out of blocks of at most MAXBSIZE bytes
  *		per block.  MAXBSIZE may be made larger without effecting
  *		any existing filesystems as long as it does not exceed MAXPHYS,
  *		and may be made smaller at the risk of not being able to use
  *		filesystems which require a block size exceeding MAXBSIZE.
  *
  * BKVASIZE -	Nominal buffer space per buffer, in bytes.  BKVASIZE is the
  *		minimum KVM memory reservation the kernel is willing to make.
  *		Filesystems can of course request smaller chunks.  Actual 
  *		backing memory uses a chunk size of a page (PAGE_SIZE).
  *
  *		If you make BKVASIZE too small you risk seriously fragmenting
  *		the buffer KVM map which may slow things down a bit.  If you
  *		make it too big the kernel will not be able to optimally use 
  *		the KVM memory reserved for the buffer cache and will wind 
  *		up with too-few buffers.
  *
  *		The default is 16384, roughly 2x the block size used by a
  *		normal UFS filesystem.
  */
 #define MAXBSIZE	65536	/* must be power of 2 */
 #define BKVASIZE	16384	/* must be power of 2 */
 #define BKVAMASK	(BKVASIZE-1)
 
 /*
  * MAXPATHLEN defines the longest permissible path length after expanding
  * symbolic links. It is used to allocate a temporary buffer from the buffer
  * pool in which to do the name expansion, hence should be a power of two,
  * and must be less than or equal to MAXBSIZE.  MAXSYMLINKS defines the
  * maximum number of symbolic links that may be expanded in a path name.
  * It should be set high enough to allow all legitimate uses, but halt
  * infinite loops reasonably quickly.
  */
 #define	MAXPATHLEN	PATH_MAX
 #define MAXSYMLINKS	32
 
 /* Bit map related macros. */
 #define	setbit(a,i)	(((unsigned char *)(a))[(i)/NBBY] |= 1<<((i)%NBBY))
 #define	clrbit(a,i)	(((unsigned char *)(a))[(i)/NBBY] &= ~(1<<((i)%NBBY)))
 #define	isset(a,i)							\
 	(((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY)))
 #define	isclr(a,i)							\
 	((((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) == 0)
 
 /* Macros for counting and rounding. */
 #ifndef howmany
 #define	howmany(x, y)	(((x)+((y)-1))/(y))
 #endif
 #define	nitems(x)	(sizeof((x)) / sizeof((x)[0]))
 #define	rounddown(x, y)	(((x)/(y))*(y))
 #define	rounddown2(x, y) ((x)&(~((y)-1)))          /* if y is power of two */
 #define	roundup(x, y)	((((x)+((y)-1))/(y))*(y))  /* to any y */
 #define	roundup2(x, y)	(((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */
 #define powerof2(x)	((((x)-1)&(x))==0)
 
 /* Macros for min/max. */
 #define	MIN(a,b) (((a)<(b))?(a):(b))
 #define	MAX(a,b) (((a)>(b))?(a):(b))
 
 #ifdef _KERNEL
 /*
  * Basic byte order function prototypes for non-inline functions.
  */
 #ifndef LOCORE
 #ifndef _BYTEORDER_PROTOTYPED
 #define	_BYTEORDER_PROTOTYPED
 __BEGIN_DECLS
 __uint32_t	 htonl(__uint32_t);
 __uint16_t	 htons(__uint16_t);
 __uint32_t	 ntohl(__uint32_t);
 __uint16_t	 ntohs(__uint16_t);
 __END_DECLS
 #endif
 #endif
 
 #ifndef lint
 #ifndef _BYTEORDER_FUNC_DEFINED
 #define	_BYTEORDER_FUNC_DEFINED
 #define	htonl(x)	__htonl(x)
 #define	htons(x)	__htons(x)
 #define	ntohl(x)	__ntohl(x)
 #define	ntohs(x)	__ntohs(x)
 #endif /* !_BYTEORDER_FUNC_DEFINED */
 #endif /* lint */
 #endif /* _KERNEL */
 
 /*
  * Scale factor for scaled integers used to count %cpu time and load avgs.
  *
  * The number of CPU `tick's that map to a unique `%age' can be expressed
  * by the formula (1 / (2 ^ (FSHIFT - 11))).  The maximum load average that
  * can be calculated (assuming 32 bits) can be closely approximated using
  * the formula (2 ^ (2 * (16 - FSHIFT))) for (FSHIFT < 15).
  *
  * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age',
  * FSHIFT must be at least 11; this gives us a maximum load avg of ~1024.
  */
 #define	FSHIFT	11		/* bits to right of fixed binary point */
 #define FSCALE	(1<<FSHIFT)
 
 #define dbtoc(db)			/* calculates devblks to pages */ \
 	((db + (ctodb(1) - 1)) >> (PAGE_SHIFT - DEV_BSHIFT))
  
 #define ctodb(db)			/* calculates pages to devblks */ \
 	((db) << (PAGE_SHIFT - DEV_BSHIFT))
 
 /*
  * Old spelling of __containerof().
  */
 #define	member2struct(s, m, x)						\
 	((struct s *)(void *)((char *)(x) - offsetof(struct s, m)))
 
 /*
  * Access a variable length array that has been declared as a fixed
  * length array.
  */
 #define __PAST_END(array, offset) (((__typeof__(*(array)) *)(array))[offset])
 
 #endif	/* _SYS_PARAM_H_ */
Index: projects/ifnet/sys/vm/uma_core.c
===================================================================
--- projects/ifnet/sys/vm/uma_core.c	(revision 281172)
+++ projects/ifnet/sys/vm/uma_core.c	(revision 281173)
@@ -1,3625 +1,3625 @@
 /*-
  * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
  * Copyright (c) 2004-2006 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * uma_core.c  Implementation of the Universal Memory allocator
  *
  * This allocator is intended to replace the multitude of similar object caches
  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
  * effecient.  A primary design goal is to return unused memory to the rest of
  * the system.  This will make the system as a whole more flexible due to the
  * ability to move memory to subsystems which most need it instead of leaving
  * pools of reserved memory unused.
  *
  * The basic ideas stem from similar slab/zone based allocators whose algorithms
  * are well known.
  *
  */
 
 /*
  * TODO:
  *	- Improve memory usage for large allocations
  *	- Investigate cache size adjustments
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /* I should really use ktr.. */
 /*
 #define UMA_DEBUG 1
 #define UMA_DEBUG_ALLOC 1
 #define UMA_DEBUG_ALLOC_1 1
 */
 
 #include "opt_ddb.h"
 #include "opt_param.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitset.h>
 #include <sys/kernel.h>
 #include <sys/types.h>
 #include <sys/queue.h>
 #include <sys/malloc.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/sysctl.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/random.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_param.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 #include <vm/uma_dbg.h>
 
 #include <ddb/ddb.h>
 
 #ifdef DEBUG_MEMGUARD
 #include <vm/memguard.h>
 #endif
 
 /*
  * This is the zone and keg from which all zones are spawned.  The idea is that
  * even the zone & keg heads are allocated from the allocator, so we use the
  * bss section to bootstrap us.
  */
 static struct uma_keg masterkeg;
 static struct uma_zone masterzone_k;
 static struct uma_zone masterzone_z;
 static uma_zone_t kegs = &masterzone_k;
 static uma_zone_t zones = &masterzone_z;
 
 /* This is the zone from which all of uma_slab_t's are allocated. */
 static uma_zone_t slabzone;
 static uma_zone_t slabrefzone;	/* With refcounters (for UMA_ZONE_REFCNT) */
 
 /*
  * The initial hash tables come out of this zone so they can be allocated
  * prior to malloc coming up.
  */
 static uma_zone_t hashzone;
 
 /* The boot-time adjusted value for cache line alignment. */
 int uma_align_cache = 64 - 1;
 
 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
 
 /*
  * Are we allowed to allocate buckets?
  */
 static int bucketdisable = 1;
 
 /* Linked list of all kegs in the system */
 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
 
 /* Linked list of all cache-only zones in the system */
 static LIST_HEAD(,uma_zone) uma_cachezones =
     LIST_HEAD_INITIALIZER(uma_cachezones);
 
 /* This RW lock protects the keg list */
 static struct rwlock_padalign uma_rwlock;
 
 /* Linked list of boot time pages */
 static LIST_HEAD(,uma_slab) uma_boot_pages =
     LIST_HEAD_INITIALIZER(uma_boot_pages);
 
 /* This mutex protects the boot time pages list */
 static struct mtx_padalign uma_boot_pages_mtx;
 
 static struct sx uma_drain_lock;
 
 /* Is the VM done starting up? */
 static int booted = 0;
 #define	UMA_STARTUP	1
 #define	UMA_STARTUP2	2
 
 /*
  * Only mbuf clusters use ref zones.  Just provide enough references
  * to support the one user.  New code should not use the ref facility.
  */
 static const u_int uma_max_ipers_ref = PAGE_SIZE / MCLBYTES;
 
 /*
  * This is the handle used to schedule events that need to happen
  * outside of the allocation fast path.
  */
 static struct callout uma_callout;
 #define	UMA_TIMEOUT	20		/* Seconds for callout interval. */
 
 /*
  * This structure is passed as the zone ctor arg so that I don't have to create
  * a special allocation function just for zones.
  */
 struct uma_zctor_args {
 	const char *name;
 	size_t size;
 	uma_ctor ctor;
 	uma_dtor dtor;
 	uma_init uminit;
 	uma_fini fini;
 	uma_import import;
 	uma_release release;
 	void *arg;
 	uma_keg_t keg;
 	int align;
 	uint32_t flags;
 };
 
 struct uma_kctor_args {
 	uma_zone_t zone;
 	size_t size;
 	uma_init uminit;
 	uma_fini fini;
 	int align;
 	uint32_t flags;
 };
 
 struct uma_bucket_zone {
 	uma_zone_t	ubz_zone;
 	char		*ubz_name;
 	int		ubz_entries;	/* Number of items it can hold. */
 	int		ubz_maxsize;	/* Maximum allocation size per-item. */
 };
 
 /*
  * Compute the actual number of bucket entries to pack them in power
  * of two sizes for more efficient space utilization.
  */
 #define	BUCKET_SIZE(n)						\
     (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
 
 #define	BUCKET_MAX	BUCKET_SIZE(256)
 
 struct uma_bucket_zone bucket_zones[] = {
 	{ NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
 	{ NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
 	{ NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
 	{ NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
 	{ NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
 	{ NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
 	{ NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
 	{ NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
 	{ NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
 	{ NULL, NULL, 0}
 };
 
 /*
  * Flags and enumerations to be passed to internal functions.
  */
 enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
 
 /* Prototypes.. */
 
 static void *noobj_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
 static void *page_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
 static void *startup_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
 static void page_free(void *, vm_size_t, uint8_t);
 static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
 static void cache_drain(uma_zone_t);
 static void bucket_drain(uma_zone_t, uma_bucket_t);
 static void bucket_cache_drain(uma_zone_t zone);
 static int keg_ctor(void *, int, void *, int);
 static void keg_dtor(void *, int, void *);
 static int zone_ctor(void *, int, void *, int);
 static void zone_dtor(void *, int, void *);
 static int zero_init(void *, int, int);
 static void keg_small_init(uma_keg_t keg);
 static void keg_large_init(uma_keg_t keg);
 static void zone_foreach(void (*zfunc)(uma_zone_t));
 static void zone_timeout(uma_zone_t zone);
 static int hash_alloc(struct uma_hash *);
 static int hash_expand(struct uma_hash *, struct uma_hash *);
 static void hash_free(struct uma_hash *hash);
 static void uma_timeout(void *);
 static void uma_startup3(void);
 static void *zone_alloc_item(uma_zone_t, void *, int);
 static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
 static void bucket_enable(void);
 static void bucket_init(void);
 static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
 static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
 static void bucket_zone_drain(void);
 static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags);
 static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
 static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
 static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
 static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
 static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
     uma_fini fini, int align, uint32_t flags);
 static int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
 static void zone_release(uma_zone_t zone, void **bucket, int cnt);
 static void uma_zero_item(void *item, uma_zone_t zone);
 
 void uma_print_zone(uma_zone_t);
 void uma_print_stats(void);
 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
 
 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
 
 SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
     0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
 
 SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
     0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
 
 static int zone_warnings = 1;
 SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
     "Warn when UMA zones becomes full");
 
 /*
  * This routine checks to see whether or not it's safe to enable buckets.
  */
 static void
 bucket_enable(void)
 {
 	bucketdisable = vm_page_count_min();
 }
 
 /*
  * Initialize bucket_zones, the array of zones of buckets of various sizes.
  *
  * For each zone, calculate the memory required for each bucket, consisting
  * of the header and an array of pointers.
  */
 static void
 bucket_init(void)
 {
 	struct uma_bucket_zone *ubz;
 	int size;
 	int i;
 
 	for (i = 0, ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
 		size = roundup(sizeof(struct uma_bucket), sizeof(void *));
 		size += sizeof(void *) * ubz->ubz_entries;
 		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 		    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET);
 	}
 }
 
 /*
  * Given a desired number of entries for a bucket, return the zone from which
  * to allocate the bucket.
  */
 static struct uma_bucket_zone *
 bucket_zone_lookup(int entries)
 {
 	struct uma_bucket_zone *ubz;
 
 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
 		if (ubz->ubz_entries >= entries)
 			return (ubz);
 	ubz--;
 	return (ubz);
 }
 
 static int
 bucket_select(int size)
 {
 	struct uma_bucket_zone *ubz;
 
 	ubz = &bucket_zones[0];
 	if (size > ubz->ubz_maxsize)
 		return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
 
 	for (; ubz->ubz_entries != 0; ubz++)
 		if (ubz->ubz_maxsize < size)
 			break;
 	ubz--;
 	return (ubz->ubz_entries);
 }
 
 static uma_bucket_t
 bucket_alloc(uma_zone_t zone, void *udata, int flags)
 {
 	struct uma_bucket_zone *ubz;
 	uma_bucket_t bucket;
 
 	/*
 	 * This is to stop us from allocating per cpu buckets while we're
 	 * running out of vm.boot_pages.  Otherwise, we would exhaust the
 	 * boot pages.  This also prevents us from allocating buckets in
 	 * low memory situations.
 	 */
 	if (bucketdisable)
 		return (NULL);
 	/*
 	 * To limit bucket recursion we store the original zone flags
 	 * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
 	 * NOVM flag to persist even through deep recursions.  We also
 	 * store ZFLAG_BUCKET once we have recursed attempting to allocate
 	 * a bucket for a bucket zone so we do not allow infinite bucket
 	 * recursion.  This cookie will even persist to frees of unused
 	 * buckets via the allocation path or bucket allocations in the
 	 * free path.
 	 */
 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
 		udata = (void *)(uintptr_t)zone->uz_flags;
 	else {
 		if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
 			return (NULL);
 		udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
 	}
 	if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
 		flags |= M_NOVM;
 	ubz = bucket_zone_lookup(zone->uz_count);
 	if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
 		ubz++;
 	bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
 	if (bucket) {
 #ifdef INVARIANTS
 		bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
 #endif
 		bucket->ub_cnt = 0;
 		bucket->ub_entries = ubz->ubz_entries;
 	}
 
 	return (bucket);
 }
 
 static void
 bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
 {
 	struct uma_bucket_zone *ubz;
 
 	KASSERT(bucket->ub_cnt == 0,
 	    ("bucket_free: Freeing a non free bucket."));
 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
 		udata = (void *)(uintptr_t)zone->uz_flags;
 	ubz = bucket_zone_lookup(bucket->ub_entries);
 	uma_zfree_arg(ubz->ubz_zone, bucket, udata);
 }
 
 static void
 bucket_zone_drain(void)
 {
 	struct uma_bucket_zone *ubz;
 
 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
 		zone_drain(ubz->ubz_zone);
 }
 
 static void
 zone_log_warning(uma_zone_t zone)
 {
 	static const struct timeval warninterval = { 300, 0 };
 
 	if (!zone_warnings || zone->uz_warning == NULL)
 		return;
 
 	if (ratecheck(&zone->uz_ratecheck, &warninterval))
 		printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
 }
 
 static void
 zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
 {
 	uma_klink_t klink;
 
 	LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
 		kegfn(klink->kl_keg);
 }
 
 /*
  * Routine called by timeout which is used to fire off some time interval
  * based calculations.  (stats, hash size, etc.)
  *
  * Arguments:
  *	arg   Unused
  *
  * Returns:
  *	Nothing
  */
 static void
 uma_timeout(void *unused)
 {
 	bucket_enable();
 	zone_foreach(zone_timeout);
 
 	/* Reschedule this event */
 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
 }
 
 /*
  * Routine to perform timeout driven calculations.  This expands the
  * hashes and does per cpu statistics aggregation.
  *
  *  Returns nothing.
  */
 static void
 keg_timeout(uma_keg_t keg)
 {
 
 	KEG_LOCK(keg);
 	/*
 	 * Expand the keg hash table.
 	 *
 	 * This is done if the number of slabs is larger than the hash size.
 	 * What I'm trying to do here is completely reduce collisions.  This
 	 * may be a little aggressive.  Should I allow for two collisions max?
 	 */
 	if (keg->uk_flags & UMA_ZONE_HASH &&
 	    keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
 		struct uma_hash newhash;
 		struct uma_hash oldhash;
 		int ret;
 
 		/*
 		 * This is so involved because allocating and freeing
 		 * while the keg lock is held will lead to deadlock.
 		 * I have to do everything in stages and check for
 		 * races.
 		 */
 		newhash = keg->uk_hash;
 		KEG_UNLOCK(keg);
 		ret = hash_alloc(&newhash);
 		KEG_LOCK(keg);
 		if (ret) {
 			if (hash_expand(&keg->uk_hash, &newhash)) {
 				oldhash = keg->uk_hash;
 				keg->uk_hash = newhash;
 			} else
 				oldhash = newhash;
 
 			KEG_UNLOCK(keg);
 			hash_free(&oldhash);
 			return;
 		}
 	}
 	KEG_UNLOCK(keg);
 }
 
 static void
 zone_timeout(uma_zone_t zone)
 {
 
 	zone_foreach_keg(zone, &keg_timeout);
 }
 
 /*
  * Allocate and zero fill the next sized hash table from the appropriate
  * backing store.
  *
  * Arguments:
  *	hash  A new hash structure with the old hash size in uh_hashsize
  *
  * Returns:
  *	1 on sucess and 0 on failure.
  */
 static int
 hash_alloc(struct uma_hash *hash)
 {
 	int oldsize;
 	int alloc;
 
 	oldsize = hash->uh_hashsize;
 
 	/* We're just going to go to a power of two greater */
 	if (oldsize)  {
 		hash->uh_hashsize = oldsize * 2;
 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
 		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
 		    M_UMAHASH, M_NOWAIT);
 	} else {
 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
 		hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
 		    M_WAITOK);
 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
 	}
 	if (hash->uh_slab_hash) {
 		bzero(hash->uh_slab_hash, alloc);
 		hash->uh_hashmask = hash->uh_hashsize - 1;
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Expands the hash table for HASH zones.  This is done from zone_timeout
  * to reduce collisions.  This must not be done in the regular allocation
  * path, otherwise, we can recurse on the vm while allocating pages.
  *
  * Arguments:
  *	oldhash  The hash you want to expand
  *	newhash  The hash structure for the new table
  *
  * Returns:
  *	Nothing
  *
  * Discussion:
  */
 static int
 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
 {
 	uma_slab_t slab;
 	int hval;
 	int i;
 
 	if (!newhash->uh_slab_hash)
 		return (0);
 
 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
 		return (0);
 
 	/*
 	 * I need to investigate hash algorithms for resizing without a
 	 * full rehash.
 	 */
 
 	for (i = 0; i < oldhash->uh_hashsize; i++)
 		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
 			slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
 			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
 			hval = UMA_HASH(newhash, slab->us_data);
 			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
 			    slab, us_hlink);
 		}
 
 	return (1);
 }
 
 /*
  * Free the hash bucket to the appropriate backing store.
  *
  * Arguments:
  *	slab_hash  The hash bucket we're freeing
  *	hashsize   The number of entries in that hash bucket
  *
  * Returns:
  *	Nothing
  */
 static void
 hash_free(struct uma_hash *hash)
 {
 	if (hash->uh_slab_hash == NULL)
 		return;
 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
 		zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
 	else
 		free(hash->uh_slab_hash, M_UMAHASH);
 }
 
 /*
  * Frees all outstanding items in a bucket
  *
  * Arguments:
  *	zone   The zone to free to, must be unlocked.
  *	bucket The free/alloc bucket with items, cpu queue must be locked.
  *
  * Returns:
  *	Nothing
  */
 
 static void
 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
 {
 	int i;
 
 	if (bucket == NULL)
 		return;
 
 	if (zone->uz_fini)
 		for (i = 0; i < bucket->ub_cnt; i++) 
 			zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
 	zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
 	bucket->ub_cnt = 0;
 }
 
 /*
  * Drains the per cpu caches for a zone.
  *
  * NOTE: This may only be called while the zone is being turn down, and not
  * during normal operation.  This is necessary in order that we do not have
  * to migrate CPUs to drain the per-CPU caches.
  *
  * Arguments:
  *	zone     The zone to drain, must be unlocked.
  *
  * Returns:
  *	Nothing
  */
 static void
 cache_drain(uma_zone_t zone)
 {
 	uma_cache_t cache;
 	int cpu;
 
 	/*
 	 * XXX: It is safe to not lock the per-CPU caches, because we're
 	 * tearing down the zone anyway.  I.e., there will be no further use
 	 * of the caches at this point.
 	 *
 	 * XXX: It would good to be able to assert that the zone is being
 	 * torn down to prevent improper use of cache_drain().
 	 *
 	 * XXX: We lock the zone before passing into bucket_cache_drain() as
 	 * it is used elsewhere.  Should the tear-down path be made special
 	 * there in some form?
 	 */
 	CPU_FOREACH(cpu) {
 		cache = &zone->uz_cpu[cpu];
 		bucket_drain(zone, cache->uc_allocbucket);
 		bucket_drain(zone, cache->uc_freebucket);
 		if (cache->uc_allocbucket != NULL)
 			bucket_free(zone, cache->uc_allocbucket, NULL);
 		if (cache->uc_freebucket != NULL)
 			bucket_free(zone, cache->uc_freebucket, NULL);
 		cache->uc_allocbucket = cache->uc_freebucket = NULL;
 	}
 	ZONE_LOCK(zone);
 	bucket_cache_drain(zone);
 	ZONE_UNLOCK(zone);
 }
 
 static void
 cache_shrink(uma_zone_t zone)
 {
 
 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
 		return;
 
 	ZONE_LOCK(zone);
 	zone->uz_count = (zone->uz_count_min + zone->uz_count) / 2;
 	ZONE_UNLOCK(zone);
 }
 
 static void
 cache_drain_safe_cpu(uma_zone_t zone)
 {
 	uma_cache_t cache;
 	uma_bucket_t b1, b2;
 
 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
 		return;
 
 	b1 = b2 = NULL;
 	ZONE_LOCK(zone);
 	critical_enter();
 	cache = &zone->uz_cpu[curcpu];
 	if (cache->uc_allocbucket) {
 		if (cache->uc_allocbucket->ub_cnt != 0)
 			LIST_INSERT_HEAD(&zone->uz_buckets,
 			    cache->uc_allocbucket, ub_link);
 		else
 			b1 = cache->uc_allocbucket;
 		cache->uc_allocbucket = NULL;
 	}
 	if (cache->uc_freebucket) {
 		if (cache->uc_freebucket->ub_cnt != 0)
 			LIST_INSERT_HEAD(&zone->uz_buckets,
 			    cache->uc_freebucket, ub_link);
 		else
 			b2 = cache->uc_freebucket;
 		cache->uc_freebucket = NULL;
 	}
 	critical_exit();
 	ZONE_UNLOCK(zone);
 	if (b1)
 		bucket_free(zone, b1, NULL);
 	if (b2)
 		bucket_free(zone, b2, NULL);
 }
 
 /*
  * Safely drain per-CPU caches of a zone(s) to alloc bucket.
  * This is an expensive call because it needs to bind to all CPUs
  * one by one and enter a critical section on each of them in order
  * to safely access their cache buckets.
  * Zone lock must not be held on call this function.
  */
 static void
 cache_drain_safe(uma_zone_t zone)
 {
 	int cpu;
 
 	/*
 	 * Polite bucket sizes shrinking was not enouth, shrink aggressively.
 	 */
 	if (zone)
 		cache_shrink(zone);
 	else
 		zone_foreach(cache_shrink);
 
 	CPU_FOREACH(cpu) {
 		thread_lock(curthread);
 		sched_bind(curthread, cpu);
 		thread_unlock(curthread);
 
 		if (zone)
 			cache_drain_safe_cpu(zone);
 		else
 			zone_foreach(cache_drain_safe_cpu);
 	}
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
 }
 
 /*
  * Drain the cached buckets from a zone.  Expects a locked zone on entry.
  */
 static void
 bucket_cache_drain(uma_zone_t zone)
 {
 	uma_bucket_t bucket;
 
 	/*
 	 * Drain the bucket queues and free the buckets, we just keep two per
 	 * cpu (alloc/free).
 	 */
 	while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
 		LIST_REMOVE(bucket, ub_link);
 		ZONE_UNLOCK(zone);
 		bucket_drain(zone, bucket);
 		bucket_free(zone, bucket, NULL);
 		ZONE_LOCK(zone);
 	}
 
 	/*
 	 * Shrink further bucket sizes.  Price of single zone lock collision
 	 * is probably lower then price of global cache drain.
 	 */
 	if (zone->uz_count > zone->uz_count_min)
 		zone->uz_count--;
 }
 
 static void
 keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
 {
 	uint8_t *mem;
 	int i;
 	uint8_t flags;
 
 	mem = slab->us_data;
 	flags = slab->us_flags;
 	i = start;
 	if (keg->uk_fini != NULL) {
 		for (i--; i > -1; i--)
 			keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
 			    keg->uk_size);
 	}
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 		zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
 #ifdef UMA_DEBUG
 	printf("%s: Returning %d bytes.\n", keg->uk_name,
 	    PAGE_SIZE * keg->uk_ppera);
 #endif
 	keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
 }
 
 /*
  * Frees pages from a keg back to the system.  This is done on demand from
  * the pageout daemon.
  *
  * Returns nothing.
  */
 static void
 keg_drain(uma_keg_t keg)
 {
 	struct slabhead freeslabs = { 0 };
 	uma_slab_t slab;
 	uma_slab_t n;
 
 	/*
 	 * We don't want to take pages from statically allocated kegs at this
 	 * time
 	 */
 	if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
 		return;
 
 #ifdef UMA_DEBUG
 	printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
 #endif
 	KEG_LOCK(keg);
 	if (keg->uk_free == 0)
 		goto finished;
 
 	slab = LIST_FIRST(&keg->uk_free_slab);
 	while (slab) {
 		n = LIST_NEXT(slab, us_link);
 
 		/* We have no where to free these to */
 		if (slab->us_flags & UMA_SLAB_BOOT) {
 			slab = n;
 			continue;
 		}
 
 		LIST_REMOVE(slab, us_link);
 		keg->uk_pages -= keg->uk_ppera;
 		keg->uk_free -= keg->uk_ipers;
 
 		if (keg->uk_flags & UMA_ZONE_HASH)
 			UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
 
 		SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
 
 		slab = n;
 	}
 finished:
 	KEG_UNLOCK(keg);
 
 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
 		keg_free_slab(keg, slab, keg->uk_ipers);
 	}
 }
 
 static void
 zone_drain_wait(uma_zone_t zone, int waitok)
 {
 
 	/*
 	 * Set draining to interlock with zone_dtor() so we can release our
 	 * locks as we go.  Only dtor() should do a WAITOK call since it
 	 * is the only call that knows the structure will still be available
 	 * when it wakes up.
 	 */
 	ZONE_LOCK(zone);
 	while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
 		if (waitok == M_NOWAIT)
 			goto out;
 		msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
 	}
 	zone->uz_flags |= UMA_ZFLAG_DRAINING;
 	bucket_cache_drain(zone);
 	ZONE_UNLOCK(zone);
 	/*
 	 * The DRAINING flag protects us from being freed while
 	 * we're running.  Normally the uma_rwlock would protect us but we
 	 * must be able to release and acquire the right lock for each keg.
 	 */
 	zone_foreach_keg(zone, &keg_drain);
 	ZONE_LOCK(zone);
 	zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
 	wakeup(zone);
 out:
 	ZONE_UNLOCK(zone);
 }
 
 void
 zone_drain(uma_zone_t zone)
 {
 
 	zone_drain_wait(zone, M_NOWAIT);
 }
 
 /*
  * Allocate a new slab for a keg.  This does not insert the slab onto a list.
  *
  * Arguments:
  *	wait  Shall we wait?
  *
  * Returns:
  *	The slab that was allocated or NULL if there is no memory and the
  *	caller specified M_NOWAIT.
  */
 static uma_slab_t
 keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
 {
 	uma_slabrefcnt_t slabref;
 	uma_alloc allocf;
 	uma_slab_t slab;
 	uint8_t *mem;
 	uint8_t flags;
 	int i;
 
 	mtx_assert(&keg->uk_lock, MA_OWNED);
 	slab = NULL;
 	mem = NULL;
 
 #ifdef UMA_DEBUG
 	printf("alloc_slab:  Allocating a new slab for %s\n", keg->uk_name);
 #endif
 	allocf = keg->uk_allocf;
 	KEG_UNLOCK(keg);
 
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
 		slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
 		if (slab == NULL)
 			goto out;
 	}
 
 	/*
 	 * This reproduces the old vm_zone behavior of zero filling pages the
 	 * first time they are added to a zone.
 	 *
 	 * Malloced items are zeroed in uma_zalloc.
 	 */
 
 	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
 		wait |= M_ZERO;
 	else
 		wait &= ~M_ZERO;
 
 	if (keg->uk_flags & UMA_ZONE_NODUMP)
 		wait |= M_NODUMP;
 
 	/* zone is passed for legacy reasons. */
 	mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
 	if (mem == NULL) {
 		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 			zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
 		slab = NULL;
 		goto out;
 	}
 
 	/* Point the slab into the allocated memory */
 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
 		slab = (uma_slab_t )(mem + keg->uk_pgoff);
 
 	if (keg->uk_flags & UMA_ZONE_VTOSLAB)
 		for (i = 0; i < keg->uk_ppera; i++)
 			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
 
 	slab->us_keg = keg;
 	slab->us_data = mem;
 	slab->us_freecount = keg->uk_ipers;
 	slab->us_flags = flags;
 	BIT_FILL(SLAB_SETSIZE, &slab->us_free);
 #ifdef INVARIANTS
 	BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
 #endif
 	if (keg->uk_flags & UMA_ZONE_REFCNT) {
 		slabref = (uma_slabrefcnt_t)slab;
 		for (i = 0; i < keg->uk_ipers; i++)
 			slabref->us_refcnt[i] = 0;
 	}
 
 	if (keg->uk_init != NULL) {
 		for (i = 0; i < keg->uk_ipers; i++)
 			if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
 			    keg->uk_size, wait) != 0)
 				break;
 		if (i != keg->uk_ipers) {
 			keg_free_slab(keg, slab, i);
 			slab = NULL;
 			goto out;
 		}
 	}
 out:
 	KEG_LOCK(keg);
 
 	if (slab != NULL) {
 		if (keg->uk_flags & UMA_ZONE_HASH)
 			UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
 
 		keg->uk_pages += keg->uk_ppera;
 		keg->uk_free += keg->uk_ipers;
 	}
 
 	return (slab);
 }
 
 /*
  * This function is intended to be used early on in place of page_alloc() so
  * that we may use the boot time page cache to satisfy allocations before
  * the VM is ready.
  */
 static void *
 startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
 {
 	uma_keg_t keg;
 	uma_slab_t tmps;
 	int pages, check_pages;
 
 	keg = zone_first_keg(zone);
 	pages = howmany(bytes, PAGE_SIZE);
 	check_pages = pages - 1;
 	KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
 
 	/*
 	 * Check our small startup cache to see if it has pages remaining.
 	 */
 	mtx_lock(&uma_boot_pages_mtx);
 
 	/* First check if we have enough room. */
 	tmps = LIST_FIRST(&uma_boot_pages);
 	while (tmps != NULL && check_pages-- > 0)
 		tmps = LIST_NEXT(tmps, us_link);
 	if (tmps != NULL) {
 		/*
 		 * It's ok to lose tmps references.  The last one will
 		 * have tmps->us_data pointing to the start address of
 		 * "pages" contiguous pages of memory.
 		 */
 		while (pages-- > 0) {
 			tmps = LIST_FIRST(&uma_boot_pages);
 			LIST_REMOVE(tmps, us_link);
 		}
 		mtx_unlock(&uma_boot_pages_mtx);
 		*pflag = tmps->us_flags;
 		return (tmps->us_data);
 	}
 	mtx_unlock(&uma_boot_pages_mtx);
 	if (booted < UMA_STARTUP2)
 		panic("UMA: Increase vm.boot_pages");
 	/*
 	 * Now that we've booted reset these users to their real allocator.
 	 */
 #ifdef UMA_MD_SMALL_ALLOC
 	keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
 #else
 	keg->uk_allocf = page_alloc;
 #endif
 	return keg->uk_allocf(zone, bytes, pflag, wait);
 }
 
 /*
  * Allocates a number of pages from the system
  *
  * Arguments:
  *	bytes  The number of bytes requested
  *	wait  Shall we wait?
  *
  * Returns:
  *	A pointer to the alloced memory or possibly
  *	NULL if M_NOWAIT is set.
  */
 static void *
 page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
 {
 	void *p;	/* Returned page */
 
 	*pflag = UMA_SLAB_KMEM;
 	p = (void *) kmem_malloc(kmem_arena, bytes, wait);
 
 	return (p);
 }
 
 /*
  * Allocates a number of pages from within an object
  *
  * Arguments:
  *	bytes  The number of bytes requested
  *	wait   Shall we wait?
  *
  * Returns:
  *	A pointer to the alloced memory or possibly
  *	NULL if M_NOWAIT is set.
  */
 static void *
 noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
 {
 	TAILQ_HEAD(, vm_page) alloctail;
 	u_long npages;
 	vm_offset_t retkva, zkva;
 	vm_page_t p, p_next;
 	uma_keg_t keg;
 
 	TAILQ_INIT(&alloctail);
 	keg = zone_first_keg(zone);
 
 	npages = howmany(bytes, PAGE_SIZE);
 	while (npages > 0) {
 		p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
 		    VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
 		if (p != NULL) {
 			/*
 			 * Since the page does not belong to an object, its
 			 * listq is unused.
 			 */
 			TAILQ_INSERT_TAIL(&alloctail, p, listq);
 			npages--;
 			continue;
 		}
 		if (wait & M_WAITOK) {
 			VM_WAIT;
 			continue;
 		}
 
 		/*
 		 * Page allocation failed, free intermediate pages and
 		 * exit.
 		 */
 		TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
 			vm_page_unwire(p, PQ_INACTIVE);
 			vm_page_free(p); 
 		}
 		return (NULL);
 	}
 	*flags = UMA_SLAB_PRIV;
 	zkva = keg->uk_kva +
 	    atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
 	retkva = zkva;
 	TAILQ_FOREACH(p, &alloctail, listq) {
 		pmap_qenter(zkva, &p, 1);
 		zkva += PAGE_SIZE;
 	}
 
 	return ((void *)retkva);
 }
 
 /*
  * Frees a number of pages to the system
  *
  * Arguments:
  *	mem   A pointer to the memory to be freed
  *	size  The size of the memory being freed
  *	flags The original p->us_flags field
  *
  * Returns:
  *	Nothing
  */
 static void
 page_free(void *mem, vm_size_t size, uint8_t flags)
 {
 	struct vmem *vmem;
 
 	if (flags & UMA_SLAB_KMEM)
 		vmem = kmem_arena;
 	else if (flags & UMA_SLAB_KERNEL)
 		vmem = kernel_arena;
 	else
 		panic("UMA: page_free used with invalid flags %d", flags);
 
 	kmem_free(vmem, (vm_offset_t)mem, size);
 }
 
 /*
  * Zero fill initializer
  *
  * Arguments/Returns follow uma_init specifications
  */
 static int
 zero_init(void *mem, int size, int flags)
 {
 	bzero(mem, size);
 	return (0);
 }
 
 /*
  * Finish creating a small uma keg.  This calculates ipers, and the keg size.
  *
  * Arguments
  *	keg  The zone we should initialize
  *
  * Returns
  *	Nothing
  */
 static void
 keg_small_init(uma_keg_t keg)
 {
 	u_int rsize;
 	u_int memused;
 	u_int wastedspace;
 	u_int shsize;
 
 	if (keg->uk_flags & UMA_ZONE_PCPU) {
 		u_int ncpus = mp_ncpus ? mp_ncpus : MAXCPU;
 
 		keg->uk_slabsize = sizeof(struct pcpu);
 		keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
 		    PAGE_SIZE);
 	} else {
 		keg->uk_slabsize = UMA_SLAB_SIZE;
 		keg->uk_ppera = 1;
 	}
 
 	/*
 	 * Calculate the size of each allocation (rsize) according to
 	 * alignment.  If the requested size is smaller than we have
 	 * allocation bits for we round it up.
 	 */
 	rsize = keg->uk_size;
 	if (rsize < keg->uk_slabsize / SLAB_SETSIZE)
 		rsize = keg->uk_slabsize / SLAB_SETSIZE;
 	if (rsize & keg->uk_align)
 		rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
 	keg->uk_rsize = rsize;
 
 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
 	    keg->uk_rsize < sizeof(struct pcpu),
 	    ("%s: size %u too large", __func__, keg->uk_rsize));
 
 	if (keg->uk_flags & UMA_ZONE_REFCNT)
 		rsize += sizeof(uint32_t);
 
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 		shsize = 0;
 	else 
 		shsize = sizeof(struct uma_slab);
 
 	keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize;
 	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
 	    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
 
 	memused = keg->uk_ipers * rsize + shsize;
 	wastedspace = keg->uk_slabsize - memused;
 
 	/*
 	 * We can't do OFFPAGE if we're internal or if we've been
 	 * asked to not go to the VM for buckets.  If we do this we
 	 * may end up going to the VM  for slabs which we do not
 	 * want to do if we're UMA_ZFLAG_CACHEONLY as a result
 	 * of UMA_ZONE_VM, which clearly forbids it.
 	 */
 	if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
 	    (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
 		return;
 
 	/*
 	 * See if using an OFFPAGE slab will limit our waste.  Only do
 	 * this if it permits more items per-slab.
 	 *
 	 * XXX We could try growing slabsize to limit max waste as well.
 	 * Historically this was not done because the VM could not
 	 * efficiently handle contiguous allocations.
 	 */
 	if ((wastedspace >= keg->uk_slabsize / UMA_MAX_WASTE) &&
 	    (keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) {
 		keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize;
 		KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
 		    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
 #ifdef UMA_DEBUG
 		printf("UMA decided we need offpage slab headers for "
 		    "keg: %s, calculated wastedspace = %d, "
 		    "maximum wasted space allowed = %d, "
 		    "calculated ipers = %d, "
 		    "new wasted space = %d\n", keg->uk_name, wastedspace,
 		    keg->uk_slabsize / UMA_MAX_WASTE, keg->uk_ipers,
 		    keg->uk_slabsize - keg->uk_ipers * keg->uk_rsize);
 #endif
 		keg->uk_flags |= UMA_ZONE_OFFPAGE;
 	}
 
 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
 	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
 		keg->uk_flags |= UMA_ZONE_HASH;
 }
 
 /*
  * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
  * more complicated.
  *
  * Arguments
  *	keg  The keg we should initialize
  *
  * Returns
  *	Nothing
  */
 static void
 keg_large_init(uma_keg_t keg)
 {
 	u_int shsize;
 
 	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
 	KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
 	    ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
 	    ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
 
 	keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
 	keg->uk_slabsize = keg->uk_ppera * PAGE_SIZE;
 	keg->uk_ipers = 1;
 	keg->uk_rsize = keg->uk_size;
 
 	/* We can't do OFFPAGE if we're internal, bail out here. */
 	if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
 		return;
 
 	/* Check whether we have enough space to not do OFFPAGE. */
 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) {
 		shsize = sizeof(struct uma_slab);
 		if (keg->uk_flags & UMA_ZONE_REFCNT)
 			shsize += keg->uk_ipers * sizeof(uint32_t);
 		if (shsize & UMA_ALIGN_PTR)
 			shsize = (shsize & ~UMA_ALIGN_PTR) +
 			    (UMA_ALIGN_PTR + 1);
 
 		if ((PAGE_SIZE * keg->uk_ppera) - keg->uk_rsize < shsize)
 			keg->uk_flags |= UMA_ZONE_OFFPAGE;
 	}
 
 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
 	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
 		keg->uk_flags |= UMA_ZONE_HASH;
 }
 
 static void
 keg_cachespread_init(uma_keg_t keg)
 {
 	int alignsize;
 	int trailer;
 	int pages;
 	int rsize;
 
 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
 	    ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
 
 	alignsize = keg->uk_align + 1;
 	rsize = keg->uk_size;
 	/*
 	 * We want one item to start on every align boundary in a page.  To
 	 * do this we will span pages.  We will also extend the item by the
 	 * size of align if it is an even multiple of align.  Otherwise, it
 	 * would fall on the same boundary every time.
 	 */
 	if (rsize & keg->uk_align)
 		rsize = (rsize & ~keg->uk_align) + alignsize;
 	if ((rsize & alignsize) == 0)
 		rsize += alignsize;
 	trailer = rsize - keg->uk_size;
 	pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
 	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
 	keg->uk_rsize = rsize;
 	keg->uk_ppera = pages;
 	keg->uk_slabsize = UMA_SLAB_SIZE;
 	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
 	keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
 	KASSERT(keg->uk_ipers <= SLAB_SETSIZE,
 	    ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
 	    keg->uk_ipers));
 }
 
 /*
  * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
  * the keg onto the global keg list.
  *
  * Arguments/Returns follow uma_ctor specifications
  *	udata  Actually uma_kctor_args
  */
 static int
 keg_ctor(void *mem, int size, void *udata, int flags)
 {
 	struct uma_kctor_args *arg = udata;
 	uma_keg_t keg = mem;
 	uma_zone_t zone;
 
 	bzero(keg, size);
 	keg->uk_size = arg->size;
 	keg->uk_init = arg->uminit;
 	keg->uk_fini = arg->fini;
 	keg->uk_align = arg->align;
 	keg->uk_free = 0;
 	keg->uk_reserve = 0;
 	keg->uk_pages = 0;
 	keg->uk_flags = arg->flags;
 	keg->uk_allocf = page_alloc;
 	keg->uk_freef = page_free;
 	keg->uk_slabzone = NULL;
 
 	/*
 	 * The master zone is passed to us at keg-creation time.
 	 */
 	zone = arg->zone;
 	keg->uk_name = zone->uz_name;
 
 	if (arg->flags & UMA_ZONE_VM)
 		keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
 
 	if (arg->flags & UMA_ZONE_ZINIT)
 		keg->uk_init = zero_init;
 
 	if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
 		keg->uk_flags |= UMA_ZONE_VTOSLAB;
 
 	if (arg->flags & UMA_ZONE_PCPU)
 #ifdef SMP
 		keg->uk_flags |= UMA_ZONE_OFFPAGE;
 #else
 		keg->uk_flags &= ~UMA_ZONE_PCPU;
 #endif
 
 	if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
 		keg_cachespread_init(keg);
 	} else if (keg->uk_flags & UMA_ZONE_REFCNT) {
 		if (keg->uk_size >
 		    (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) -
 		    sizeof(uint32_t)))
 			keg_large_init(keg);
 		else
 			keg_small_init(keg);
 	} else {
 		if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
 			keg_large_init(keg);
 		else
 			keg_small_init(keg);
 	}
 
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
 		if (keg->uk_flags & UMA_ZONE_REFCNT) {
 			if (keg->uk_ipers > uma_max_ipers_ref)
 				panic("Too many ref items per zone: %d > %d\n",
 				    keg->uk_ipers, uma_max_ipers_ref);
 			keg->uk_slabzone = slabrefzone;
 		} else
 			keg->uk_slabzone = slabzone;
 	}
 
 	/*
 	 * If we haven't booted yet we need allocations to go through the
 	 * startup cache until the vm is ready.
 	 */
 	if (keg->uk_ppera == 1) {
 #ifdef UMA_MD_SMALL_ALLOC
 		keg->uk_allocf = uma_small_alloc;
 		keg->uk_freef = uma_small_free;
 
 		if (booted < UMA_STARTUP)
 			keg->uk_allocf = startup_alloc;
 #else
 		if (booted < UMA_STARTUP2)
 			keg->uk_allocf = startup_alloc;
 #endif
 	} else if (booted < UMA_STARTUP2 &&
 	    (keg->uk_flags & UMA_ZFLAG_INTERNAL))
 		keg->uk_allocf = startup_alloc;
 
 	/*
 	 * Initialize keg's lock
 	 */
 	KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
 
 	/*
 	 * If we're putting the slab header in the actual page we need to
 	 * figure out where in each page it goes.  This calculates a right
 	 * justified offset into the memory on an ALIGN_PTR boundary.
 	 */
 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
 		u_int totsize;
 
 		/* Size of the slab struct and free list */
 		totsize = sizeof(struct uma_slab);
 
 		/* Size of the reference counts. */
 		if (keg->uk_flags & UMA_ZONE_REFCNT)
 			totsize += keg->uk_ipers * sizeof(uint32_t);
 
 		if (totsize & UMA_ALIGN_PTR)
 			totsize = (totsize & ~UMA_ALIGN_PTR) +
 			    (UMA_ALIGN_PTR + 1);
 		keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
 
 		/*
 		 * The only way the following is possible is if with our
 		 * UMA_ALIGN_PTR adjustments we are now bigger than
 		 * UMA_SLAB_SIZE.  I haven't checked whether this is
 		 * mathematically possible for all cases, so we make
 		 * sure here anyway.
 		 */
 		totsize = keg->uk_pgoff + sizeof(struct uma_slab);
 		if (keg->uk_flags & UMA_ZONE_REFCNT)
 			totsize += keg->uk_ipers * sizeof(uint32_t);
 		if (totsize > PAGE_SIZE * keg->uk_ppera) {
 			printf("zone %s ipers %d rsize %d size %d\n",
 			    zone->uz_name, keg->uk_ipers, keg->uk_rsize,
 			    keg->uk_size);
 			panic("UMA slab won't fit.");
 		}
 	}
 
 	if (keg->uk_flags & UMA_ZONE_HASH)
 		hash_alloc(&keg->uk_hash);
 
 #ifdef UMA_DEBUG
 	printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
 	    zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
 	    keg->uk_ipers, keg->uk_ppera,
 	    (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
 #endif
 
 	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
 
 	rw_wlock(&uma_rwlock);
 	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
 	rw_wunlock(&uma_rwlock);
 	return (0);
 }
 
 /*
  * Zone header ctor.  This initializes all fields, locks, etc.
  *
  * Arguments/Returns follow uma_ctor specifications
  *	udata  Actually uma_zctor_args
  */
 static int
 zone_ctor(void *mem, int size, void *udata, int flags)
 {
 	struct uma_zctor_args *arg = udata;
 	uma_zone_t zone = mem;
 	uma_zone_t z;
 	uma_keg_t keg;
 
 	bzero(zone, size);
 	zone->uz_name = arg->name;
 	zone->uz_ctor = arg->ctor;
 	zone->uz_dtor = arg->dtor;
 	zone->uz_slab = zone_fetch_slab;
 	zone->uz_init = NULL;
 	zone->uz_fini = NULL;
 	zone->uz_allocs = 0;
 	zone->uz_frees = 0;
 	zone->uz_fails = 0;
 	zone->uz_sleeps = 0;
 	zone->uz_count = 0;
 	zone->uz_count_min = 0;
 	zone->uz_flags = 0;
 	zone->uz_warning = NULL;
 	timevalclear(&zone->uz_ratecheck);
 	keg = arg->keg;
 
 	ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
 
 	/*
 	 * This is a pure cache zone, no kegs.
 	 */
 	if (arg->import) {
 		if (arg->flags & UMA_ZONE_VM)
 			arg->flags |= UMA_ZFLAG_CACHEONLY;
 		zone->uz_flags = arg->flags;
 		zone->uz_size = arg->size;
 		zone->uz_import = arg->import;
 		zone->uz_release = arg->release;
 		zone->uz_arg = arg->arg;
 		zone->uz_lockptr = &zone->uz_lock;
 		rw_wlock(&uma_rwlock);
 		LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
 		rw_wunlock(&uma_rwlock);
 		goto out;
 	}
 
 	/*
 	 * Use the regular zone/keg/slab allocator.
 	 */
 	zone->uz_import = (uma_import)zone_import;
 	zone->uz_release = (uma_release)zone_release;
 	zone->uz_arg = zone; 
 
 	if (arg->flags & UMA_ZONE_SECONDARY) {
 		KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
 		zone->uz_init = arg->uminit;
 		zone->uz_fini = arg->fini;
 		zone->uz_lockptr = &keg->uk_lock;
 		zone->uz_flags |= UMA_ZONE_SECONDARY;
 		rw_wlock(&uma_rwlock);
 		ZONE_LOCK(zone);
 		LIST_FOREACH(z, &keg->uk_zones, uz_link) {
 			if (LIST_NEXT(z, uz_link) == NULL) {
 				LIST_INSERT_AFTER(z, zone, uz_link);
 				break;
 			}
 		}
 		ZONE_UNLOCK(zone);
 		rw_wunlock(&uma_rwlock);
 	} else if (keg == NULL) {
 		if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
 		    arg->align, arg->flags)) == NULL)
 			return (ENOMEM);
 	} else {
 		struct uma_kctor_args karg;
 		int error;
 
 		/* We should only be here from uma_startup() */
 		karg.size = arg->size;
 		karg.uminit = arg->uminit;
 		karg.fini = arg->fini;
 		karg.align = arg->align;
 		karg.flags = arg->flags;
 		karg.zone = zone;
 		error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
 		    flags);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * Link in the first keg.
 	 */
 	zone->uz_klink.kl_keg = keg;
 	LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
 	zone->uz_lockptr = &keg->uk_lock;
 	zone->uz_size = keg->uk_size;
 	zone->uz_flags |= (keg->uk_flags &
 	    (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
 
 	/*
 	 * Some internal zones don't have room allocated for the per cpu
 	 * caches.  If we're internal, bail out here.
 	 */
 	if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
 		KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
 		    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
 		return (0);
 	}
 
 out:
 	if ((arg->flags & UMA_ZONE_MAXBUCKET) == 0)
 		zone->uz_count = bucket_select(zone->uz_size);
 	else
 		zone->uz_count = BUCKET_MAX;
 	zone->uz_count_min = zone->uz_count;
 
 	return (0);
 }
 
 /*
  * Keg header dtor.  This frees all data, destroys locks, frees the hash
  * table and removes the keg from the global list.
  *
  * Arguments/Returns follow uma_dtor specifications
  *	udata  unused
  */
 static void
 keg_dtor(void *arg, int size, void *udata)
 {
 	uma_keg_t keg;
 
 	keg = (uma_keg_t)arg;
 	KEG_LOCK(keg);
 	if (keg->uk_free != 0) {
 		printf("Freed UMA keg (%s) was not empty (%d items). "
 		    " Lost %d pages of memory.\n",
 		    keg->uk_name ? keg->uk_name : "",
 		    keg->uk_free, keg->uk_pages);
 	}
 	KEG_UNLOCK(keg);
 
 	hash_free(&keg->uk_hash);
 
 	KEG_LOCK_FINI(keg);
 }
 
 /*
  * Zone header dtor.
  *
  * Arguments/Returns follow uma_dtor specifications
  *	udata  unused
  */
 static void
 zone_dtor(void *arg, int size, void *udata)
 {
 	uma_klink_t klink;
 	uma_zone_t zone;
 	uma_keg_t keg;
 
 	zone = (uma_zone_t)arg;
 	keg = zone_first_keg(zone);
 
 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
 		cache_drain(zone);
 
 	rw_wlock(&uma_rwlock);
 	LIST_REMOVE(zone, uz_link);
 	rw_wunlock(&uma_rwlock);
 	/*
 	 * XXX there are some races here where
 	 * the zone can be drained but zone lock
 	 * released and then refilled before we
 	 * remove it... we dont care for now
 	 */
 	zone_drain_wait(zone, M_WAITOK);
 	/*
 	 * Unlink all of our kegs.
 	 */
 	while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
 		klink->kl_keg = NULL;
 		LIST_REMOVE(klink, kl_link);
 		if (klink == &zone->uz_klink)
 			continue;
 		free(klink, M_TEMP);
 	}
 	/*
 	 * We only destroy kegs from non secondary zones.
 	 */
 	if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0)  {
 		rw_wlock(&uma_rwlock);
 		LIST_REMOVE(keg, uk_link);
 		rw_wunlock(&uma_rwlock);
 		zone_free_item(kegs, keg, NULL, SKIP_NONE);
 	}
 	ZONE_LOCK_FINI(zone);
 }
 
 /*
  * Traverses every zone in the system and calls a callback
  *
  * Arguments:
  *	zfunc  A pointer to a function which accepts a zone
  *		as an argument.
  *
  * Returns:
  *	Nothing
  */
 static void
 zone_foreach(void (*zfunc)(uma_zone_t))
 {
 	uma_keg_t keg;
 	uma_zone_t zone;
 
 	rw_rlock(&uma_rwlock);
 	LIST_FOREACH(keg, &uma_kegs, uk_link) {
 		LIST_FOREACH(zone, &keg->uk_zones, uz_link)
 			zfunc(zone);
 	}
 	rw_runlock(&uma_rwlock);
 }
 
 /* Public functions */
 /* See uma.h */
 void
 uma_startup(void *bootmem, int boot_pages)
 {
 	struct uma_zctor_args args;
 	uma_slab_t slab;
 	u_int slabsize;
 	int i;
 
 #ifdef UMA_DEBUG
 	printf("Creating uma keg headers zone and keg.\n");
 #endif
 	rw_init(&uma_rwlock, "UMA lock");
 
 	/* "manually" create the initial zone */
 	memset(&args, 0, sizeof(args));
 	args.name = "UMA Kegs";
 	args.size = sizeof(struct uma_keg);
 	args.ctor = keg_ctor;
 	args.dtor = keg_dtor;
 	args.uminit = zero_init;
 	args.fini = NULL;
 	args.keg = &masterkeg;
 	args.align = 32 - 1;
 	args.flags = UMA_ZFLAG_INTERNAL;
 	/* The initial zone has no Per cpu queues so it's smaller */
 	zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
 
 #ifdef UMA_DEBUG
 	printf("Filling boot free list.\n");
 #endif
 	for (i = 0; i < boot_pages; i++) {
 		slab = (uma_slab_t)((uint8_t *)bootmem + (i * UMA_SLAB_SIZE));
 		slab->us_data = (uint8_t *)slab;
 		slab->us_flags = UMA_SLAB_BOOT;
 		LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
 	}
 	mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
 
 #ifdef UMA_DEBUG
 	printf("Creating uma zone headers zone and keg.\n");
 #endif
 	args.name = "UMA Zones";
 	args.size = sizeof(struct uma_zone) +
-	    (sizeof(struct uma_cache) * (mp_maxid + 1));
+	    (sizeof(struct uma_cache) * (mp_maxid));
 	args.ctor = zone_ctor;
 	args.dtor = zone_dtor;
 	args.uminit = zero_init;
 	args.fini = NULL;
 	args.keg = NULL;
 	args.align = 32 - 1;
 	args.flags = UMA_ZFLAG_INTERNAL;
 	/* The initial zone has no Per cpu queues so it's smaller */
 	zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
 
 #ifdef UMA_DEBUG
 	printf("Creating slab and hash zones.\n");
 #endif
 
 	/* Now make a zone for slab headers */
 	slabzone = uma_zcreate("UMA Slabs",
 				sizeof(struct uma_slab),
 				NULL, NULL, NULL, NULL,
 				UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
 
 	/*
 	 * We also create a zone for the bigger slabs with reference
 	 * counts in them, to accomodate UMA_ZONE_REFCNT zones.
 	 */
 	slabsize = sizeof(struct uma_slab_refcnt);
 	slabsize += uma_max_ipers_ref * sizeof(uint32_t);
 	slabrefzone = uma_zcreate("UMA RCntSlabs",
 				  slabsize,
 				  NULL, NULL, NULL, NULL,
 				  UMA_ALIGN_PTR,
 				  UMA_ZFLAG_INTERNAL);
 
 	hashzone = uma_zcreate("UMA Hash",
 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
 	    NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
 
 	bucket_init();
 
 	booted = UMA_STARTUP;
 
 #ifdef UMA_DEBUG
 	printf("UMA startup complete.\n");
 #endif
 }
 
 /* see uma.h */
 void
 uma_startup2(void)
 {
 	booted = UMA_STARTUP2;
 	bucket_enable();
 	sx_init(&uma_drain_lock, "umadrain");
 #ifdef UMA_DEBUG
 	printf("UMA startup2 complete.\n");
 #endif
 }
 
 /*
  * Initialize our callout handle
  *
  */
 
 static void
 uma_startup3(void)
 {
 #ifdef UMA_DEBUG
 	printf("Starting callout.\n");
 #endif
 	callout_init(&uma_callout, CALLOUT_MPSAFE);
 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
 #ifdef UMA_DEBUG
 	printf("UMA startup3 complete.\n");
 #endif
 }
 
 static uma_keg_t
 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
 		int align, uint32_t flags)
 {
 	struct uma_kctor_args args;
 
 	args.size = size;
 	args.uminit = uminit;
 	args.fini = fini;
 	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
 	args.flags = flags;
 	args.zone = zone;
 	return (zone_alloc_item(kegs, &args, M_WAITOK));
 }
 
 /* See uma.h */
 void
 uma_set_align(int align)
 {
 
 	if (align != UMA_ALIGN_CACHE)
 		uma_align_cache = align;
 }
 
 /* See uma.h */
 uma_zone_t
 uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
 		uma_init uminit, uma_fini fini, int align, uint32_t flags)
 
 {
 	struct uma_zctor_args args;
 	uma_zone_t res;
 	bool locked;
 
 	/* This stuff is essential for the zone ctor */
 	memset(&args, 0, sizeof(args));
 	args.name = name;
 	args.size = size;
 	args.ctor = ctor;
 	args.dtor = dtor;
 	args.uminit = uminit;
 	args.fini = fini;
 	args.align = align;
 	args.flags = flags;
 	args.keg = NULL;
 
 	if (booted < UMA_STARTUP2) {
 		locked = false;
 	} else {
 		sx_slock(&uma_drain_lock);
 		locked = true;
 	}
 	res = zone_alloc_item(zones, &args, M_WAITOK);
 	if (locked)
 		sx_sunlock(&uma_drain_lock);
 	return (res);
 }
 
 /* See uma.h */
 uma_zone_t
 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
 		    uma_init zinit, uma_fini zfini, uma_zone_t master)
 {
 	struct uma_zctor_args args;
 	uma_keg_t keg;
 	uma_zone_t res;
 	bool locked;
 
 	keg = zone_first_keg(master);
 	memset(&args, 0, sizeof(args));
 	args.name = name;
 	args.size = keg->uk_size;
 	args.ctor = ctor;
 	args.dtor = dtor;
 	args.uminit = zinit;
 	args.fini = zfini;
 	args.align = keg->uk_align;
 	args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
 	args.keg = keg;
 
 	if (booted < UMA_STARTUP2) {
 		locked = false;
 	} else {
 		sx_slock(&uma_drain_lock);
 		locked = true;
 	}
 	/* XXX Attaches only one keg of potentially many. */
 	res = zone_alloc_item(zones, &args, M_WAITOK);
 	if (locked)
 		sx_sunlock(&uma_drain_lock);
 	return (res);
 }
 
 /* See uma.h */
 uma_zone_t
 uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
 		    uma_init zinit, uma_fini zfini, uma_import zimport,
 		    uma_release zrelease, void *arg, int flags)
 {
 	struct uma_zctor_args args;
 
 	memset(&args, 0, sizeof(args));
 	args.name = name;
 	args.size = size;
 	args.ctor = ctor;
 	args.dtor = dtor;
 	args.uminit = zinit;
 	args.fini = zfini;
 	args.import = zimport;
 	args.release = zrelease;
 	args.arg = arg;
 	args.align = 0;
 	args.flags = flags;
 
 	return (zone_alloc_item(zones, &args, M_WAITOK));
 }
 
 static void
 zone_lock_pair(uma_zone_t a, uma_zone_t b)
 {
 	if (a < b) {
 		ZONE_LOCK(a);
 		mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
 	} else {
 		ZONE_LOCK(b);
 		mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
 	}
 }
 
 static void
 zone_unlock_pair(uma_zone_t a, uma_zone_t b)
 {
 
 	ZONE_UNLOCK(a);
 	ZONE_UNLOCK(b);
 }
 
 int
 uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
 {
 	uma_klink_t klink;
 	uma_klink_t kl;
 	int error;
 
 	error = 0;
 	klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
 
 	zone_lock_pair(zone, master);
 	/*
 	 * zone must use vtoslab() to resolve objects and must already be
 	 * a secondary.
 	 */
 	if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
 	    != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * The new master must also use vtoslab().
 	 */
 	if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * Both must either be refcnt, or not be refcnt.
 	 */
 	if ((zone->uz_flags & UMA_ZONE_REFCNT) !=
 	    (master->uz_flags & UMA_ZONE_REFCNT)) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * The underlying object must be the same size.  rsize
 	 * may be different.
 	 */
 	if (master->uz_size != zone->uz_size) {
 		error = E2BIG;
 		goto out;
 	}
 	/*
 	 * Put it at the end of the list.
 	 */
 	klink->kl_keg = zone_first_keg(master);
 	LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
 		if (LIST_NEXT(kl, kl_link) == NULL) {
 			LIST_INSERT_AFTER(kl, klink, kl_link);
 			break;
 		}
 	}
 	klink = NULL;
 	zone->uz_flags |= UMA_ZFLAG_MULTI;
 	zone->uz_slab = zone_fetch_slab_multi;
 
 out:
 	zone_unlock_pair(zone, master);
 	if (klink != NULL)
 		free(klink, M_TEMP);
 
 	return (error);
 }
 
 
 /* See uma.h */
 void
 uma_zdestroy(uma_zone_t zone)
 {
 
 	sx_slock(&uma_drain_lock);
 	zone_free_item(zones, zone, NULL, SKIP_NONE);
 	sx_sunlock(&uma_drain_lock);
 }
 
 /* See uma.h */
 void *
 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
 {
 	void *item;
 	uma_cache_t cache;
 	uma_bucket_t bucket;
 	int lockfail;
 	int cpu;
 
 #if 0
 	/* XXX: FIX!! Do not enable this in CURRENT!! MarkM */
 	/* The entropy here is desirable, but the harvesting is expensive */
 	random_harvest(&(zone->uz_name), sizeof(void *), 1, RANDOM_UMA_ALLOC);
 #endif
 
 	/* This is the fast path allocation */
 #ifdef UMA_DEBUG_ALLOC_1
 	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
 #endif
 	CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
 	    zone->uz_name, flags);
 
 	if (flags & M_WAITOK) {
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
 	}
 #ifdef DEBUG_MEMGUARD
 	if (memguard_cmp_zone(zone)) {
 		item = memguard_alloc(zone->uz_size, flags);
 		if (item != NULL) {
 			/*
 			 * Avoid conflict with the use-after-free
 			 * protecting infrastructure from INVARIANTS.
 			 */
 			if (zone->uz_init != NULL &&
 			    zone->uz_init != mtrash_init &&
 			    zone->uz_init(item, zone->uz_size, flags) != 0)
 				return (NULL);
 			if (zone->uz_ctor != NULL &&
 			    zone->uz_ctor != mtrash_ctor &&
 			    zone->uz_ctor(item, zone->uz_size, udata,
 			    flags) != 0) {
 			    	zone->uz_fini(item, zone->uz_size);
 				return (NULL);
 			}
 #if 0
 			/* XXX: FIX!! Do not enable this in CURRENT!! MarkM */
 			/* The entropy here is desirable, but the harvesting is expensive */
 			random_harvest(&item, sizeof(void *), 1, RANDOM_UMA_ALLOC);
 #endif
 			return (item);
 		}
 		/* This is unfortunate but should not be fatal. */
 	}
 #endif
 	/*
 	 * If possible, allocate from the per-CPU cache.  There are two
 	 * requirements for safe access to the per-CPU cache: (1) the thread
 	 * accessing the cache must not be preempted or yield during access,
 	 * and (2) the thread must not migrate CPUs without switching which
 	 * cache it accesses.  We rely on a critical section to prevent
 	 * preemption and migration.  We release the critical section in
 	 * order to acquire the zone mutex if we are unable to allocate from
 	 * the current cache; when we re-acquire the critical section, we
 	 * must detect and handle migration if it has occurred.
 	 */
 	critical_enter();
 	cpu = curcpu;
 	cache = &zone->uz_cpu[cpu];
 
 zalloc_start:
 	bucket = cache->uc_allocbucket;
 	if (bucket != NULL && bucket->ub_cnt > 0) {
 		bucket->ub_cnt--;
 		item = bucket->ub_bucket[bucket->ub_cnt];
 #ifdef INVARIANTS
 		bucket->ub_bucket[bucket->ub_cnt] = NULL;
 #endif
 		KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
 		cache->uc_allocs++;
 		critical_exit();
 		if (zone->uz_ctor != NULL &&
 		    zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
 			atomic_add_long(&zone->uz_fails, 1);
 			zone_free_item(zone, item, udata, SKIP_DTOR);
 			return (NULL);
 		}
 #ifdef INVARIANTS
 		uma_dbg_alloc(zone, NULL, item);
 #endif
 		if (flags & M_ZERO)
 			uma_zero_item(item, zone);
 #if 0
 		/* XXX: FIX!! Do not enable this in CURRENT!! MarkM */
 		/* The entropy here is desirable, but the harvesting is expensive */
 		random_harvest(&item, sizeof(void *), 1, RANDOM_UMA_ALLOC);
 #endif
 		return (item);
 	}
 
 	/*
 	 * We have run out of items in our alloc bucket.
 	 * See if we can switch with our free bucket.
 	 */
 	bucket = cache->uc_freebucket;
 	if (bucket != NULL && bucket->ub_cnt > 0) {
 #ifdef UMA_DEBUG_ALLOC
 		printf("uma_zalloc: Swapping empty with alloc.\n");
 #endif
 		cache->uc_freebucket = cache->uc_allocbucket;
 		cache->uc_allocbucket = bucket;
 		goto zalloc_start;
 	}
 
 	/*
 	 * Discard any empty allocation bucket while we hold no locks.
 	 */
 	bucket = cache->uc_allocbucket;
 	cache->uc_allocbucket = NULL;
 	critical_exit();
 	if (bucket != NULL)
 		bucket_free(zone, bucket, udata);
 
 	/* Short-circuit for zones without buckets and low memory. */
 	if (zone->uz_count == 0 || bucketdisable)
 		goto zalloc_item;
 
 	/*
 	 * Attempt to retrieve the item from the per-CPU cache has failed, so
 	 * we must go back to the zone.  This requires the zone lock, so we
 	 * must drop the critical section, then re-acquire it when we go back
 	 * to the cache.  Since the critical section is released, we may be
 	 * preempted or migrate.  As such, make sure not to maintain any
 	 * thread-local state specific to the cache from prior to releasing
 	 * the critical section.
 	 */
 	lockfail = 0;
 	if (ZONE_TRYLOCK(zone) == 0) {
 		/* Record contention to size the buckets. */
 		ZONE_LOCK(zone);
 		lockfail = 1;
 	}
 	critical_enter();
 	cpu = curcpu;
 	cache = &zone->uz_cpu[cpu];
 
 	/*
 	 * Since we have locked the zone we may as well send back our stats.
 	 */
 	atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
 	atomic_add_long(&zone->uz_frees, cache->uc_frees);
 	cache->uc_allocs = 0;
 	cache->uc_frees = 0;
 
 	/* See if we lost the race to fill the cache. */
 	if (cache->uc_allocbucket != NULL) {
 		ZONE_UNLOCK(zone);
 		goto zalloc_start;
 	}
 
 	/*
 	 * Check the zone's cache of buckets.
 	 */
 	if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
 		KASSERT(bucket->ub_cnt != 0,
 		    ("uma_zalloc_arg: Returning an empty bucket."));
 
 		LIST_REMOVE(bucket, ub_link);
 		cache->uc_allocbucket = bucket;
 		ZONE_UNLOCK(zone);
 		goto zalloc_start;
 	}
 	/* We are no longer associated with this CPU. */
 	critical_exit();
 
 	/*
 	 * We bump the uz count when the cache size is insufficient to
 	 * handle the working set.
 	 */
 	if (lockfail && zone->uz_count < BUCKET_MAX)
 		zone->uz_count++;
 	ZONE_UNLOCK(zone);
 
 	/*
 	 * Now lets just fill a bucket and put it on the free list.  If that
 	 * works we'll restart the allocation from the begining and it
 	 * will use the just filled bucket.
 	 */
 	bucket = zone_alloc_bucket(zone, udata, flags);
 	if (bucket != NULL) {
 		ZONE_LOCK(zone);
 		critical_enter();
 		cpu = curcpu;
 		cache = &zone->uz_cpu[cpu];
 		/*
 		 * See if we lost the race or were migrated.  Cache the
 		 * initialized bucket to make this less likely or claim
 		 * the memory directly.
 		 */
 		if (cache->uc_allocbucket == NULL)
 			cache->uc_allocbucket = bucket;
 		else
 			LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
 		ZONE_UNLOCK(zone);
 		goto zalloc_start;
 	}
 
 	/*
 	 * We may not be able to get a bucket so return an actual item.
 	 */
 #ifdef UMA_DEBUG
 	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
 #endif
 
 zalloc_item:
 	item = zone_alloc_item(zone, udata, flags);
 
 #if 0
 	/* XXX: FIX!! Do not enable this in CURRENT!! MarkM */
 	/* The entropy here is desirable, but the harvesting is expensive */
 	random_harvest(&item, sizeof(void *), 1, RANDOM_UMA_ALLOC);
 #endif
 	return (item);
 }
 
 static uma_slab_t
 keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
 {
 	uma_slab_t slab;
 	int reserve;
 
 	mtx_assert(&keg->uk_lock, MA_OWNED);
 	slab = NULL;
 	reserve = 0;
 	if ((flags & M_USE_RESERVE) == 0)
 		reserve = keg->uk_reserve;
 
 	for (;;) {
 		/*
 		 * Find a slab with some space.  Prefer slabs that are partially
 		 * used over those that are totally full.  This helps to reduce
 		 * fragmentation.
 		 */
 		if (keg->uk_free > reserve) {
 			if (!LIST_EMPTY(&keg->uk_part_slab)) {
 				slab = LIST_FIRST(&keg->uk_part_slab);
 			} else {
 				slab = LIST_FIRST(&keg->uk_free_slab);
 				LIST_REMOVE(slab, us_link);
 				LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
 				    us_link);
 			}
 			MPASS(slab->us_keg == keg);
 			return (slab);
 		}
 
 		/*
 		 * M_NOVM means don't ask at all!
 		 */
 		if (flags & M_NOVM)
 			break;
 
 		if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
 			keg->uk_flags |= UMA_ZFLAG_FULL;
 			/*
 			 * If this is not a multi-zone, set the FULL bit.
 			 * Otherwise slab_multi() takes care of it.
 			 */
 			if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
 				zone->uz_flags |= UMA_ZFLAG_FULL;
 				zone_log_warning(zone);
 			}
 			if (flags & M_NOWAIT)
 				break;
 			zone->uz_sleeps++;
 			msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
 			continue;
 		}
 		slab = keg_alloc_slab(keg, zone, flags);
 		/*
 		 * If we got a slab here it's safe to mark it partially used
 		 * and return.  We assume that the caller is going to remove
 		 * at least one item.
 		 */
 		if (slab) {
 			MPASS(slab->us_keg == keg);
 			LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
 			return (slab);
 		}
 		/*
 		 * We might not have been able to get a slab but another cpu
 		 * could have while we were unlocked.  Check again before we
 		 * fail.
 		 */
 		flags |= M_NOVM;
 	}
 	return (slab);
 }
 
 static uma_slab_t
 zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
 {
 	uma_slab_t slab;
 
 	if (keg == NULL) {
 		keg = zone_first_keg(zone);
 		KEG_LOCK(keg);
 	}
 
 	for (;;) {
 		slab = keg_fetch_slab(keg, zone, flags);
 		if (slab)
 			return (slab);
 		if (flags & (M_NOWAIT | M_NOVM))
 			break;
 	}
 	KEG_UNLOCK(keg);
 	return (NULL);
 }
 
 /*
  * uma_zone_fetch_slab_multi:  Fetches a slab from one available keg.  Returns
  * with the keg locked.  On NULL no lock is held.
  *
  * The last pointer is used to seed the search.  It is not required.
  */
 static uma_slab_t
 zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
 {
 	uma_klink_t klink;
 	uma_slab_t slab;
 	uma_keg_t keg;
 	int flags;
 	int empty;
 	int full;
 
 	/*
 	 * Don't wait on the first pass.  This will skip limit tests
 	 * as well.  We don't want to block if we can find a provider
 	 * without blocking.
 	 */
 	flags = (rflags & ~M_WAITOK) | M_NOWAIT;
 	/*
 	 * Use the last slab allocated as a hint for where to start
 	 * the search.
 	 */
 	if (last != NULL) {
 		slab = keg_fetch_slab(last, zone, flags);
 		if (slab)
 			return (slab);
 		KEG_UNLOCK(last);
 	}
 	/*
 	 * Loop until we have a slab incase of transient failures
 	 * while M_WAITOK is specified.  I'm not sure this is 100%
 	 * required but we've done it for so long now.
 	 */
 	for (;;) {
 		empty = 0;
 		full = 0;
 		/*
 		 * Search the available kegs for slabs.  Be careful to hold the
 		 * correct lock while calling into the keg layer.
 		 */
 		LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
 			keg = klink->kl_keg;
 			KEG_LOCK(keg);
 			if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
 				slab = keg_fetch_slab(keg, zone, flags);
 				if (slab)
 					return (slab);
 			}
 			if (keg->uk_flags & UMA_ZFLAG_FULL)
 				full++;
 			else
 				empty++;
 			KEG_UNLOCK(keg);
 		}
 		if (rflags & (M_NOWAIT | M_NOVM))
 			break;
 		flags = rflags;
 		/*
 		 * All kegs are full.  XXX We can't atomically check all kegs
 		 * and sleep so just sleep for a short period and retry.
 		 */
 		if (full && !empty) {
 			ZONE_LOCK(zone);
 			zone->uz_flags |= UMA_ZFLAG_FULL;
 			zone->uz_sleeps++;
 			zone_log_warning(zone);
 			msleep(zone, zone->uz_lockptr, PVM,
 			    "zonelimit", hz/100);
 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
 			ZONE_UNLOCK(zone);
 			continue;
 		}
 	}
 	return (NULL);
 }
 
 static void *
 slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
 {
 	void *item;
 	uint8_t freei;
 
 	MPASS(keg == slab->us_keg);
 	mtx_assert(&keg->uk_lock, MA_OWNED);
 
 	freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
 	BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
 	item = slab->us_data + (keg->uk_rsize * freei);
 	slab->us_freecount--;
 	keg->uk_free--;
 
 	/* Move this slab to the full list */
 	if (slab->us_freecount == 0) {
 		LIST_REMOVE(slab, us_link);
 		LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
 	}
 
 	return (item);
 }
 
 static int
 zone_import(uma_zone_t zone, void **bucket, int max, int flags)
 {
 	uma_slab_t slab;
 	uma_keg_t keg;
 	int i;
 
 	slab = NULL;
 	keg = NULL;
 	/* Try to keep the buckets totally full */
 	for (i = 0; i < max; ) {
 		if ((slab = zone->uz_slab(zone, keg, flags)) == NULL)
 			break;
 		keg = slab->us_keg;
 		while (slab->us_freecount && i < max) { 
 			bucket[i++] = slab_alloc_item(keg, slab);
 			if (keg->uk_free <= keg->uk_reserve)
 				break;
 		}
 		/* Don't grab more than one slab at a time. */
 		flags &= ~M_WAITOK;
 		flags |= M_NOWAIT;
 	}
 	if (slab != NULL)
 		KEG_UNLOCK(keg);
 
 	return i;
 }
 
 static uma_bucket_t
 zone_alloc_bucket(uma_zone_t zone, void *udata, int flags)
 {
 	uma_bucket_t bucket;
 	int max;
 
 	/* Don't wait for buckets, preserve caller's NOVM setting. */
 	bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
 	if (bucket == NULL)
 		return (NULL);
 
 	max = MIN(bucket->ub_entries, zone->uz_count);
 	bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
 	    max, flags);
 
 	/*
 	 * Initialize the memory if necessary.
 	 */
 	if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
 		int i;
 
 		for (i = 0; i < bucket->ub_cnt; i++)
 			if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
 			    flags) != 0)
 				break;
 		/*
 		 * If we couldn't initialize the whole bucket, put the
 		 * rest back onto the freelist.
 		 */
 		if (i != bucket->ub_cnt) {
 			zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
 			    bucket->ub_cnt - i);
 #ifdef INVARIANTS
 			bzero(&bucket->ub_bucket[i],
 			    sizeof(void *) * (bucket->ub_cnt - i));
 #endif
 			bucket->ub_cnt = i;
 		}
 	}
 
 	if (bucket->ub_cnt == 0) {
 		bucket_free(zone, bucket, udata);
 		atomic_add_long(&zone->uz_fails, 1);
 		return (NULL);
 	}
 
 	return (bucket);
 }
 
 /*
  * Allocates a single item from a zone.
  *
  * Arguments
  *	zone   The zone to alloc for.
  *	udata  The data to be passed to the constructor.
  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
  *
  * Returns
  *	NULL if there is no memory and M_NOWAIT is set
  *	An item if successful
  */
 
 static void *
 zone_alloc_item(uma_zone_t zone, void *udata, int flags)
 {
 	void *item;
 
 	item = NULL;
 
 #ifdef UMA_DEBUG_ALLOC
 	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
 #endif
 	if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1)
 		goto fail;
 	atomic_add_long(&zone->uz_allocs, 1);
 
 	/*
 	 * We have to call both the zone's init (not the keg's init)
 	 * and the zone's ctor.  This is because the item is going from
 	 * a keg slab directly to the user, and the user is expecting it
 	 * to be both zone-init'd as well as zone-ctor'd.
 	 */
 	if (zone->uz_init != NULL) {
 		if (zone->uz_init(item, zone->uz_size, flags) != 0) {
 			zone_free_item(zone, item, udata, SKIP_FINI);
 			goto fail;
 		}
 	}
 	if (zone->uz_ctor != NULL) {
 		if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
 			zone_free_item(zone, item, udata, SKIP_DTOR);
 			goto fail;
 		}
 	}
 #ifdef INVARIANTS
 	uma_dbg_alloc(zone, NULL, item);
 #endif
 	if (flags & M_ZERO)
 		uma_zero_item(item, zone);
 
 	return (item);
 
 fail:
 	atomic_add_long(&zone->uz_fails, 1);
 	return (NULL);
 }
 
 /* See uma.h */
 void
 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
 {
 	uma_cache_t cache;
 	uma_bucket_t bucket;
 	int lockfail;
 	int cpu;
 
 #if 0
 	/* XXX: FIX!! Do not enable this in CURRENT!! MarkM */
 	/* The entropy here is desirable, but the harvesting is expensive */
 	struct entropy {
 		const void *uz_name;
 		const void *item;
 	} entropy;
 
 	entropy.uz_name = zone->uz_name;
 	entropy.item = item;
 	random_harvest(&entropy, sizeof(struct entropy), 2, RANDOM_UMA_ALLOC);
 #endif
 
 #ifdef UMA_DEBUG_ALLOC_1
 	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
 #endif
 	CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
 	    zone->uz_name);
 
         /* uma_zfree(..., NULL) does nothing, to match free(9). */
         if (item == NULL)
                 return;
 #ifdef DEBUG_MEMGUARD
 	if (is_memguard_addr(item)) {
 		if (zone->uz_dtor != NULL && zone->uz_dtor != mtrash_dtor)
 			zone->uz_dtor(item, zone->uz_size, udata);
 		if (zone->uz_fini != NULL && zone->uz_fini != mtrash_fini)
 			zone->uz_fini(item, zone->uz_size);
 		memguard_free(item);
 		return;
 	}
 #endif
 #ifdef INVARIANTS
 	if (zone->uz_flags & UMA_ZONE_MALLOC)
 		uma_dbg_free(zone, udata, item);
 	else
 		uma_dbg_free(zone, NULL, item);
 #endif
 	if (zone->uz_dtor != NULL)
 		zone->uz_dtor(item, zone->uz_size, udata);
 
 	/*
 	 * The race here is acceptable.  If we miss it we'll just have to wait
 	 * a little longer for the limits to be reset.
 	 */
 	if (zone->uz_flags & UMA_ZFLAG_FULL)
 		goto zfree_item;
 
 	/*
 	 * If possible, free to the per-CPU cache.  There are two
 	 * requirements for safe access to the per-CPU cache: (1) the thread
 	 * accessing the cache must not be preempted or yield during access,
 	 * and (2) the thread must not migrate CPUs without switching which
 	 * cache it accesses.  We rely on a critical section to prevent
 	 * preemption and migration.  We release the critical section in
 	 * order to acquire the zone mutex if we are unable to free to the
 	 * current cache; when we re-acquire the critical section, we must
 	 * detect and handle migration if it has occurred.
 	 */
 zfree_restart:
 	critical_enter();
 	cpu = curcpu;
 	cache = &zone->uz_cpu[cpu];
 
 zfree_start:
 	/*
 	 * Try to free into the allocbucket first to give LIFO ordering
 	 * for cache-hot datastructures.  Spill over into the freebucket
 	 * if necessary.  Alloc will swap them if one runs dry.
 	 */
 	bucket = cache->uc_allocbucket;
 	if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
 		bucket = cache->uc_freebucket;
 	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
 		KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
 		    ("uma_zfree: Freeing to non free bucket index."));
 		bucket->ub_bucket[bucket->ub_cnt] = item;
 		bucket->ub_cnt++;
 		cache->uc_frees++;
 		critical_exit();
 		return;
 	}
 
 	/*
 	 * We must go back the zone, which requires acquiring the zone lock,
 	 * which in turn means we must release and re-acquire the critical
 	 * section.  Since the critical section is released, we may be
 	 * preempted or migrate.  As such, make sure not to maintain any
 	 * thread-local state specific to the cache from prior to releasing
 	 * the critical section.
 	 */
 	critical_exit();
 	if (zone->uz_count == 0 || bucketdisable)
 		goto zfree_item;
 
 	lockfail = 0;
 	if (ZONE_TRYLOCK(zone) == 0) {
 		/* Record contention to size the buckets. */
 		ZONE_LOCK(zone);
 		lockfail = 1;
 	}
 	critical_enter();
 	cpu = curcpu;
 	cache = &zone->uz_cpu[cpu];
 
 	/*
 	 * Since we have locked the zone we may as well send back our stats.
 	 */
 	atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
 	atomic_add_long(&zone->uz_frees, cache->uc_frees);
 	cache->uc_allocs = 0;
 	cache->uc_frees = 0;
 
 	bucket = cache->uc_freebucket;
 	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
 		ZONE_UNLOCK(zone);
 		goto zfree_start;
 	}
 	cache->uc_freebucket = NULL;
 
 	/* Can we throw this on the zone full list? */
 	if (bucket != NULL) {
 #ifdef UMA_DEBUG_ALLOC
 		printf("uma_zfree: Putting old bucket on the free list.\n");
 #endif
 		/* ub_cnt is pointing to the last free item */
 		KASSERT(bucket->ub_cnt != 0,
 		    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
 		LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
 	}
 
 	/* We are no longer associated with this CPU. */
 	critical_exit();
 
 	/*
 	 * We bump the uz count when the cache size is insufficient to
 	 * handle the working set.
 	 */
 	if (lockfail && zone->uz_count < BUCKET_MAX)
 		zone->uz_count++;
 	ZONE_UNLOCK(zone);
 
 #ifdef UMA_DEBUG_ALLOC
 	printf("uma_zfree: Allocating new free bucket.\n");
 #endif
 	bucket = bucket_alloc(zone, udata, M_NOWAIT);
 	if (bucket) {
 		critical_enter();
 		cpu = curcpu;
 		cache = &zone->uz_cpu[cpu];
 		if (cache->uc_freebucket == NULL) {
 			cache->uc_freebucket = bucket;
 			goto zfree_start;
 		}
 		/*
 		 * We lost the race, start over.  We have to drop our
 		 * critical section to free the bucket.
 		 */
 		critical_exit();
 		bucket_free(zone, bucket, udata);
 		goto zfree_restart;
 	}
 
 	/*
 	 * If nothing else caught this, we'll just do an internal free.
 	 */
 zfree_item:
 	zone_free_item(zone, item, udata, SKIP_DTOR);
 
 	return;
 }
 
 static void
 slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
 {
 	uint8_t freei;
 
 	mtx_assert(&keg->uk_lock, MA_OWNED);
 	MPASS(keg == slab->us_keg);
 
 	/* Do we need to remove from any lists? */
 	if (slab->us_freecount+1 == keg->uk_ipers) {
 		LIST_REMOVE(slab, us_link);
 		LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
 	} else if (slab->us_freecount == 0) {
 		LIST_REMOVE(slab, us_link);
 		LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
 	}
 
 	/* Slab management. */
 	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
 	BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
 	slab->us_freecount++;
 
 	/* Keg statistics. */
 	keg->uk_free++;
 }
 
 static void
 zone_release(uma_zone_t zone, void **bucket, int cnt)
 {
 	void *item;
 	uma_slab_t slab;
 	uma_keg_t keg;
 	uint8_t *mem;
 	int clearfull;
 	int i;
 
 	clearfull = 0;
 	keg = zone_first_keg(zone);
 	KEG_LOCK(keg);
 	for (i = 0; i < cnt; i++) {
 		item = bucket[i];
 		if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
 			mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
 			if (zone->uz_flags & UMA_ZONE_HASH) {
 				slab = hash_sfind(&keg->uk_hash, mem);
 			} else {
 				mem += keg->uk_pgoff;
 				slab = (uma_slab_t)mem;
 			}
 		} else {
 			slab = vtoslab((vm_offset_t)item);
 			if (slab->us_keg != keg) {
 				KEG_UNLOCK(keg);
 				keg = slab->us_keg;
 				KEG_LOCK(keg);
 			}
 		}
 		slab_free_item(keg, slab, item);
 		if (keg->uk_flags & UMA_ZFLAG_FULL) {
 			if (keg->uk_pages < keg->uk_maxpages) {
 				keg->uk_flags &= ~UMA_ZFLAG_FULL;
 				clearfull = 1;
 			}
 
 			/* 
 			 * We can handle one more allocation. Since we're
 			 * clearing ZFLAG_FULL, wake up all procs blocked
 			 * on pages. This should be uncommon, so keeping this
 			 * simple for now (rather than adding count of blocked 
 			 * threads etc).
 			 */
 			wakeup(keg);
 		}
 	}
 	KEG_UNLOCK(keg);
 	if (clearfull) {
 		ZONE_LOCK(zone);
 		zone->uz_flags &= ~UMA_ZFLAG_FULL;
 		wakeup(zone);
 		ZONE_UNLOCK(zone);
 	}
 
 }
 
 /*
  * Frees a single item to any zone.
  *
  * Arguments:
  *	zone   The zone to free to
  *	item   The item we're freeing
  *	udata  User supplied data for the dtor
  *	skip   Skip dtors and finis
  */
 static void
 zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
 {
 
 #ifdef INVARIANTS
 	if (skip == SKIP_NONE) {
 		if (zone->uz_flags & UMA_ZONE_MALLOC)
 			uma_dbg_free(zone, udata, item);
 		else
 			uma_dbg_free(zone, NULL, item);
 	}
 #endif
 	if (skip < SKIP_DTOR && zone->uz_dtor)
 		zone->uz_dtor(item, zone->uz_size, udata);
 
 	if (skip < SKIP_FINI && zone->uz_fini)
 		zone->uz_fini(item, zone->uz_size);
 
 	atomic_add_long(&zone->uz_frees, 1);
 	zone->uz_release(zone->uz_arg, &item, 1);
 }
 
 /* See uma.h */
 int
 uma_zone_set_max(uma_zone_t zone, int nitems)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	if (keg == NULL)
 		return (0);
 	KEG_LOCK(keg);
 	keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
 	if (keg->uk_maxpages * keg->uk_ipers < nitems)
 		keg->uk_maxpages += keg->uk_ppera;
 	nitems = keg->uk_maxpages * keg->uk_ipers;
 	KEG_UNLOCK(keg);
 
 	return (nitems);
 }
 
 /* See uma.h */
 int
 uma_zone_get_max(uma_zone_t zone)
 {
 	int nitems;
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	if (keg == NULL)
 		return (0);
 	KEG_LOCK(keg);
 	nitems = keg->uk_maxpages * keg->uk_ipers;
 	KEG_UNLOCK(keg);
 
 	return (nitems);
 }
 
 /* See uma.h */
 void
 uma_zone_set_warning(uma_zone_t zone, const char *warning)
 {
 
 	ZONE_LOCK(zone);
 	zone->uz_warning = warning;
 	ZONE_UNLOCK(zone);
 }
 
 /* See uma.h */
 int
 uma_zone_get_cur(uma_zone_t zone)
 {
 	int64_t nitems;
 	u_int i;
 
 	ZONE_LOCK(zone);
 	nitems = zone->uz_allocs - zone->uz_frees;
 	CPU_FOREACH(i) {
 		/*
 		 * See the comment in sysctl_vm_zone_stats() regarding the
 		 * safety of accessing the per-cpu caches. With the zone lock
 		 * held, it is safe, but can potentially result in stale data.
 		 */
 		nitems += zone->uz_cpu[i].uc_allocs -
 		    zone->uz_cpu[i].uc_frees;
 	}
 	ZONE_UNLOCK(zone);
 
 	return (nitems < 0 ? 0 : nitems);
 }
 
 /* See uma.h */
 void
 uma_zone_set_init(uma_zone_t zone, uma_init uminit)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
 	KEG_LOCK(keg);
 	KASSERT(keg->uk_pages == 0,
 	    ("uma_zone_set_init on non-empty keg"));
 	keg->uk_init = uminit;
 	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
 void
 uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	KASSERT(keg != NULL, ("uma_zone_set_fini: Invalid zone type"));
 	KEG_LOCK(keg);
 	KASSERT(keg->uk_pages == 0,
 	    ("uma_zone_set_fini on non-empty keg"));
 	keg->uk_fini = fini;
 	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
 void
 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
 {
 
 	ZONE_LOCK(zone);
 	KASSERT(zone_first_keg(zone)->uk_pages == 0,
 	    ("uma_zone_set_zinit on non-empty keg"));
 	zone->uz_init = zinit;
 	ZONE_UNLOCK(zone);
 }
 
 /* See uma.h */
 void
 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
 {
 
 	ZONE_LOCK(zone);
 	KASSERT(zone_first_keg(zone)->uk_pages == 0,
 	    ("uma_zone_set_zfini on non-empty keg"));
 	zone->uz_fini = zfini;
 	ZONE_UNLOCK(zone);
 }
 
 /* See uma.h */
 /* XXX uk_freef is not actually used with the zone locked */
 void
 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
 	KEG_LOCK(keg);
 	keg->uk_freef = freef;
 	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
 /* XXX uk_allocf is not actually used with the zone locked */
 void
 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	KEG_LOCK(keg);
 	keg->uk_allocf = allocf;
 	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
 void
 uma_zone_reserve(uma_zone_t zone, int items)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	if (keg == NULL)
 		return;
 	KEG_LOCK(keg);
 	keg->uk_reserve = items;
 	KEG_UNLOCK(keg);
 
 	return;
 }
 
 /* See uma.h */
 int
 uma_zone_reserve_kva(uma_zone_t zone, int count)
 {
 	uma_keg_t keg;
 	vm_offset_t kva;
 	int pages;
 
 	keg = zone_first_keg(zone);
 	if (keg == NULL)
 		return (0);
 	pages = count / keg->uk_ipers;
 
 	if (pages * keg->uk_ipers < count)
 		pages++;
 
 #ifdef UMA_MD_SMALL_ALLOC
 	if (keg->uk_ppera > 1) {
 #else
 	if (1) {
 #endif
 		kva = kva_alloc(pages * UMA_SLAB_SIZE);
 		if (kva == 0)
 			return (0);
 	} else
 		kva = 0;
 	KEG_LOCK(keg);
 	keg->uk_kva = kva;
 	keg->uk_offset = 0;
 	keg->uk_maxpages = pages;
 #ifdef UMA_MD_SMALL_ALLOC
 	keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
 #else
 	keg->uk_allocf = noobj_alloc;
 #endif
 	keg->uk_flags |= UMA_ZONE_NOFREE;
 	KEG_UNLOCK(keg);
 
 	return (1);
 }
 
 /* See uma.h */
 void
 uma_prealloc(uma_zone_t zone, int items)
 {
 	int slabs;
 	uma_slab_t slab;
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	if (keg == NULL)
 		return;
 	KEG_LOCK(keg);
 	slabs = items / keg->uk_ipers;
 	if (slabs * keg->uk_ipers < items)
 		slabs++;
 	while (slabs > 0) {
 		slab = keg_alloc_slab(keg, zone, M_WAITOK);
 		if (slab == NULL)
 			break;
 		MPASS(slab->us_keg == keg);
 		LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
 		slabs--;
 	}
 	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
 uint32_t *
 uma_find_refcnt(uma_zone_t zone, void *item)
 {
 	uma_slabrefcnt_t slabref;
 	uma_slab_t slab;
 	uma_keg_t keg;
 	uint32_t *refcnt;
 	int idx;
 
 	slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
 	slabref = (uma_slabrefcnt_t)slab;
 	keg = slab->us_keg;
 	KASSERT(keg->uk_flags & UMA_ZONE_REFCNT,
 	    ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
 	idx = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
 	refcnt = &slabref->us_refcnt[idx];
 	return refcnt;
 }
 
 /* See uma.h */
 void
 uma_reclaim(void)
 {
 #ifdef UMA_DEBUG
 	printf("UMA: vm asked us to release pages!\n");
 #endif
 	sx_xlock(&uma_drain_lock);
 	bucket_enable();
 	zone_foreach(zone_drain);
 	if (vm_page_count_min()) {
 		cache_drain_safe(NULL);
 		zone_foreach(zone_drain);
 	}
 	/*
 	 * Some slabs may have been freed but this zone will be visited early
 	 * we visit again so that we can free pages that are empty once other
 	 * zones are drained.  We have to do the same for buckets.
 	 */
 	zone_drain(slabzone);
 	zone_drain(slabrefzone);
 	bucket_zone_drain();
 	sx_xunlock(&uma_drain_lock);
 }
 
 /* See uma.h */
 int
 uma_zone_exhausted(uma_zone_t zone)
 {
 	int full;
 
 	ZONE_LOCK(zone);
 	full = (zone->uz_flags & UMA_ZFLAG_FULL);
 	ZONE_UNLOCK(zone);
 	return (full);	
 }
 
 int
 uma_zone_exhausted_nolock(uma_zone_t zone)
 {
 	return (zone->uz_flags & UMA_ZFLAG_FULL);
 }
 
 void *
 uma_large_malloc(vm_size_t size, int wait)
 {
 	void *mem;
 	uma_slab_t slab;
 	uint8_t flags;
 
 	slab = zone_alloc_item(slabzone, NULL, wait);
 	if (slab == NULL)
 		return (NULL);
 	mem = page_alloc(NULL, size, &flags, wait);
 	if (mem) {
 		vsetslab((vm_offset_t)mem, slab);
 		slab->us_data = mem;
 		slab->us_flags = flags | UMA_SLAB_MALLOC;
 		slab->us_size = size;
 	} else {
 		zone_free_item(slabzone, slab, NULL, SKIP_NONE);
 	}
 
 	return (mem);
 }
 
 void
 uma_large_free(uma_slab_t slab)
 {
 
 	page_free(slab->us_data, slab->us_size, slab->us_flags);
 	zone_free_item(slabzone, slab, NULL, SKIP_NONE);
 }
 
 static void
 uma_zero_item(void *item, uma_zone_t zone)
 {
 
 	if (zone->uz_flags & UMA_ZONE_PCPU) {
 		for (int i = 0; i < mp_ncpus; i++)
 			bzero(zpcpu_get_cpu(item, i), zone->uz_size);
 	} else
 		bzero(item, zone->uz_size);
 }
 
 void
 uma_print_stats(void)
 {
 	zone_foreach(uma_print_zone);
 }
 
 static void
 slab_print(uma_slab_t slab)
 {
 	printf("slab: keg %p, data %p, freecount %d\n",
 		slab->us_keg, slab->us_data, slab->us_freecount);
 }
 
 static void
 cache_print(uma_cache_t cache)
 {
 	printf("alloc: %p(%d), free: %p(%d)\n",
 		cache->uc_allocbucket,
 		cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
 		cache->uc_freebucket,
 		cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
 }
 
 static void
 uma_print_keg(uma_keg_t keg)
 {
 	uma_slab_t slab;
 
 	printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
 	    "out %d free %d limit %d\n",
 	    keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
 	    keg->uk_ipers, keg->uk_ppera,
 	    (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free,
 	    (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
 	printf("Part slabs:\n");
 	LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
 		slab_print(slab);
 	printf("Free slabs:\n");
 	LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
 		slab_print(slab);
 	printf("Full slabs:\n");
 	LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
 		slab_print(slab);
 }
 
 void
 uma_print_zone(uma_zone_t zone)
 {
 	uma_cache_t cache;
 	uma_klink_t kl;
 	int i;
 
 	printf("zone: %s(%p) size %d flags %#x\n",
 	    zone->uz_name, zone, zone->uz_size, zone->uz_flags);
 	LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
 		uma_print_keg(kl->kl_keg);
 	CPU_FOREACH(i) {
 		cache = &zone->uz_cpu[i];
 		printf("CPU %d Cache:\n", i);
 		cache_print(cache);
 	}
 }
 
 #ifdef DDB
 /*
  * Generate statistics across both the zone and its per-cpu cache's.  Return
  * desired statistics if the pointer is non-NULL for that statistic.
  *
  * Note: does not update the zone statistics, as it can't safely clear the
  * per-CPU cache statistic.
  *
  * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
  * safe from off-CPU; we should modify the caches to track this information
  * directly so that we don't have to.
  */
 static void
 uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
     uint64_t *freesp, uint64_t *sleepsp)
 {
 	uma_cache_t cache;
 	uint64_t allocs, frees, sleeps;
 	int cachefree, cpu;
 
 	allocs = frees = sleeps = 0;
 	cachefree = 0;
 	CPU_FOREACH(cpu) {
 		cache = &z->uz_cpu[cpu];
 		if (cache->uc_allocbucket != NULL)
 			cachefree += cache->uc_allocbucket->ub_cnt;
 		if (cache->uc_freebucket != NULL)
 			cachefree += cache->uc_freebucket->ub_cnt;
 		allocs += cache->uc_allocs;
 		frees += cache->uc_frees;
 	}
 	allocs += z->uz_allocs;
 	frees += z->uz_frees;
 	sleeps += z->uz_sleeps;
 	if (cachefreep != NULL)
 		*cachefreep = cachefree;
 	if (allocsp != NULL)
 		*allocsp = allocs;
 	if (freesp != NULL)
 		*freesp = frees;
 	if (sleepsp != NULL)
 		*sleepsp = sleeps;
 }
 #endif /* DDB */
 
 static int
 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
 {
 	uma_keg_t kz;
 	uma_zone_t z;
 	int count;
 
 	count = 0;
 	rw_rlock(&uma_rwlock);
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
 			count++;
 	}
 	rw_runlock(&uma_rwlock);
 	return (sysctl_handle_int(oidp, &count, 0, req));
 }
 
 static int
 sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct uma_stream_header ush;
 	struct uma_type_header uth;
 	struct uma_percpu_stat ups;
 	uma_bucket_t bucket;
 	struct sbuf sbuf;
 	uma_cache_t cache;
 	uma_klink_t kl;
 	uma_keg_t kz;
 	uma_zone_t z;
 	uma_keg_t k;
 	int count, error, i;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 	sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
 
 	count = 0;
 	rw_rlock(&uma_rwlock);
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
 			count++;
 	}
 
 	/*
 	 * Insert stream header.
 	 */
 	bzero(&ush, sizeof(ush));
 	ush.ush_version = UMA_STREAM_VERSION;
 	ush.ush_maxcpus = (mp_maxid + 1);
 	ush.ush_count = count;
 	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
 
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
 			bzero(&uth, sizeof(uth));
 			ZONE_LOCK(z);
 			strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
 			uth.uth_align = kz->uk_align;
 			uth.uth_size = kz->uk_size;
 			uth.uth_rsize = kz->uk_rsize;
 			LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
 				k = kl->kl_keg;
 				uth.uth_maxpages += k->uk_maxpages;
 				uth.uth_pages += k->uk_pages;
 				uth.uth_keg_free += k->uk_free;
 				uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
 				    * k->uk_ipers;
 			}
 
 			/*
 			 * A zone is secondary is it is not the first entry
 			 * on the keg's zone list.
 			 */
 			if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
 			    (LIST_FIRST(&kz->uk_zones) != z))
 				uth.uth_zone_flags = UTH_ZONE_SECONDARY;
 
 			LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
 				uth.uth_zone_free += bucket->ub_cnt;
 			uth.uth_allocs = z->uz_allocs;
 			uth.uth_frees = z->uz_frees;
 			uth.uth_fails = z->uz_fails;
 			uth.uth_sleeps = z->uz_sleeps;
 			(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
 			/*
 			 * While it is not normally safe to access the cache
 			 * bucket pointers while not on the CPU that owns the
 			 * cache, we only allow the pointers to be exchanged
 			 * without the zone lock held, not invalidated, so
 			 * accept the possible race associated with bucket
 			 * exchange during monitoring.
 			 */
 			for (i = 0; i < (mp_maxid + 1); i++) {
 				bzero(&ups, sizeof(ups));
 				if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
 					goto skip;
 				if (CPU_ABSENT(i))
 					goto skip;
 				cache = &z->uz_cpu[i];
 				if (cache->uc_allocbucket != NULL)
 					ups.ups_cache_free +=
 					    cache->uc_allocbucket->ub_cnt;
 				if (cache->uc_freebucket != NULL)
 					ups.ups_cache_free +=
 					    cache->uc_freebucket->ub_cnt;
 				ups.ups_allocs = cache->uc_allocs;
 				ups.ups_frees = cache->uc_frees;
 skip:
 				(void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
 			}
 			ZONE_UNLOCK(z);
 		}
 	}
 	rw_runlock(&uma_rwlock);
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 
 int
 sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
 {
 	uma_zone_t zone = *(uma_zone_t *)arg1;
 	int error, max, old;
 
 	old = max = uma_zone_get_max(zone);
 	error = sysctl_handle_int(oidp, &max, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	if (max < old)
 		return (EINVAL);
 
 	uma_zone_set_max(zone, max);
 
 	return (0);
 }
 
 int
 sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
 {
 	uma_zone_t zone = *(uma_zone_t *)arg1;
 	int cur;
 
 	cur = uma_zone_get_cur(zone);
 	return (sysctl_handle_int(oidp, &cur, 0, req));
 }
 
 #ifdef DDB
 DB_SHOW_COMMAND(uma, db_show_uma)
 {
 	uint64_t allocs, frees, sleeps;
 	uma_bucket_t bucket;
 	uma_keg_t kz;
 	uma_zone_t z;
 	int cachefree;
 
 	db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
 	    "Free", "Requests", "Sleeps", "Bucket");
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
 			if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
 				allocs = z->uz_allocs;
 				frees = z->uz_frees;
 				sleeps = z->uz_sleeps;
 				cachefree = 0;
 			} else
 				uma_zone_sumstat(z, &cachefree, &allocs,
 				    &frees, &sleeps);
 			if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
 			    (LIST_FIRST(&kz->uk_zones) != z)))
 				cachefree += kz->uk_free;
 			LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
 				cachefree += bucket->ub_cnt;
 			db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
 			    z->uz_name, (uintmax_t)kz->uk_size,
 			    (intmax_t)(allocs - frees), cachefree,
 			    (uintmax_t)allocs, sleeps, z->uz_count);
 			if (db_pager_quit)
 				return;
 		}
 	}
 }
 
 DB_SHOW_COMMAND(umacache, db_show_umacache)
 {
 	uint64_t allocs, frees;
 	uma_bucket_t bucket;
 	uma_zone_t z;
 	int cachefree;
 
 	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
 	    "Requests", "Bucket");
 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
 		uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
 		LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
 			cachefree += bucket->ub_cnt;
 		db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
 		    z->uz_name, (uintmax_t)z->uz_size,
 		    (intmax_t)(allocs - frees), cachefree,
 		    (uintmax_t)allocs, z->uz_count);
 		if (db_pager_quit)
 			return;
 	}
 }
 #endif
Index: projects/ifnet/sys
===================================================================
--- projects/ifnet/sys	(revision 281172)
+++ projects/ifnet/sys	(revision 281173)

Property changes on: projects/ifnet/sys
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys:r281153-281172
Index: projects/ifnet/usr.bin/netstat/mroute6.c
===================================================================
--- projects/ifnet/usr.bin/netstat/mroute6.c	(revision 281172)
+++ projects/ifnet/usr.bin/netstat/mroute6.c	(revision 281173)
@@ -1,324 +1,279 @@
 /*-
  * Copyright (C) 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*-
  * Copyright (c) 1989 Stephen Deering
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Stephen Deering of Stanford University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)mroute.c	8.2 (Berkeley) 4/28/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #ifdef INET6
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/protosw.h>
 #include <sys/mbuf.h>
 #include <sys/time.h>
 
 #include <net/if.h>
-#include <net/if_var.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 
 #include <err.h>
-#include <nlist.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdbool.h>
 #include <libxo/xo.h>
 
 #define	KERNEL 1
 #include <netinet6/ip6_mroute.h>
 #undef KERNEL
 
 #include "netstat.h"
 
-/*
- * kvm(3) bindings for every needed symbol
- */
-static struct nlist mrl[] = {
-#define	N_MF6CTABLE	0
-	{ .n_name = "_mf6ctable" },
-#define	N_MIF6TABLE	1
-	{ .n_name = "_mif6table" },
-#define	N_MRT6STAT	2
-	{ .n_name = "_mrt6stat" },
-	{ .n_name = NULL },
-};
-
-
 #define	WID_ORG	(Wflag ? 39 : (numeric_addr ? 29 : 18)) /* width of origin column */
 #define	WID_GRP	(Wflag ? 18 : (numeric_addr ? 16 : 18)) /* width of group column */
 
 void
 mroute6pr()
 {
 	struct mf6c *mf6ctable[MF6CTBLSIZ], *mfcp;
-	struct mif6 mif6table[MAXMIFS];
+	struct mif6_sctl mif6table[MAXMIFS];
 	struct mf6c mfc;
 	struct rtdetq rte, *rtep;
-	struct mif6 *mifp;
-	u_long mfcaddr, mifaddr;
+	struct mif6_sctl *mifp;
 	mifi_t mifi;
 	int i;
 	int banner_printed;
 	int saved_numeric_addr;
 	mifi_t maxmif = 0;
 	long int waitings;
 	size_t len;
 
-	kresolve_list(mrl);
-	mfcaddr = mrl[N_MF6CTABLE].n_value;
-	mifaddr = mrl[N_MIF6TABLE].n_value;
+	if (live == 0)
+		return;
 
-	if (mfcaddr == 0 || mifaddr == 0) {
-		fprintf(stderr, "No IPv6 MROUTING kernel support.\n");
+	len = sizeof(mif6table);
+	if (sysctlbyname("net.inet6.ip6.mif6table", mif6table, &len, NULL, 0) <
+	    0) {
+		xo_warn("sysctl: net.inet6.ip6.mif6table");
 		return;
 	}
 
-	len = sizeof(mif6table);
-	if (live) {
-		if (sysctlbyname("net.inet6.ip6.mif6table", mif6table, &len,
-		    NULL, 0) < 0) {
-			xo_warn("sysctl: net.inet6.ip6.mif6table");
-			return;
-		}
-	} else
-		kread(mifaddr, (char *)mif6table, sizeof(mif6table));
-
 	saved_numeric_addr = numeric_addr;
 	numeric_addr = 1;
 	banner_printed = 0;
 
 	for (mifi = 0, mifp = mif6table; mifi < MAXMIFS; ++mifi, ++mifp) {
-		struct ifnet ifnet;
 		char ifname[IFNAMSIZ];
 
-		if (mifp->m6_ifp == NULL)
+		if (mifp->m6_ifp == 0)
 			continue;
 
-		/* XXX KVM */
-		kread((u_long)mifp->m6_ifp, (char *)&ifnet, sizeof(ifnet));
-
 		maxmif = mifi;
 		if (!banner_printed) {
 			xo_open_list("multicast-interface");
 			xo_emit("\n{T:IPv6 Multicast Interface Table}\n"
 			    "{T: Mif   Rate   PhyIF   Pkts-In   Pkts-Out}\n");
 			banner_printed = 1;
 		}
 
 		xo_open_instance("multicast-interface");
 		xo_emit("  {:mif/%2u}   {:rate-limit/%4d}",
 		    mifi, mifp->m6_rate_limit);
 		xo_emit("   {:ifname/%5s}", (mifp->m6_flags & MIFF_REGISTER) ?
-		    "reg0" : if_indextoname(ifnet.if_index, ifname));
+		    "reg0" : if_indextoname(mifp->m6_ifp, ifname));
 
 		xo_emit(" {:received-packets/%9ju}  {:sent-packets/%9ju}\n",
 		    (uintmax_t)mifp->m6_pkt_in,
 		    (uintmax_t)mifp->m6_pkt_out);
 		xo_close_instance("multicast-interface");
 	}
 	if (banner_printed)
 		xo_open_list("multicast-interface");
 	else
 		xo_emit("\n{T:IPv6 Multicast Interface Table is empty}\n");
 
 	len = sizeof(mf6ctable);
-	if (live) {
-		if (sysctlbyname("net.inet6.ip6.mf6ctable", mf6ctable, &len,
-		    NULL, 0) < 0) {
-			xo_warn("sysctl: net.inet6.ip6.mf6ctable");
-			return;
-		}
-	} else
-		kread(mfcaddr, (char *)mf6ctable, sizeof(mf6ctable));
+	if (sysctlbyname("net.inet6.ip6.mf6ctable", mf6ctable, &len, NULL, 0) <
+	    0) {
+		xo_warn("sysctl: net.inet6.ip6.mf6ctable");
+		return;
+	}
 
 	banner_printed = 0;
 
 	for (i = 0; i < MF6CTBLSIZ; ++i) {
 		mfcp = mf6ctable[i];
 		while(mfcp) {
 			kread((u_long)mfcp, (char *)&mfc, sizeof(mfc));
 			if (!banner_printed) {
 				xo_open_list("multicast-forwarding-cache");
 				xo_emit("\n"
 				    "{T:IPv6 Multicast Forwarding Cache}\n");
 				xo_emit(" {T:%-*.*s} {T:%-*.*s} {T:%s}",
 				    WID_ORG, WID_ORG, "Origin",
 				    WID_GRP, WID_GRP, "Group",
 				    "  Packets Waits In-Mif  Out-Mifs\n");
 				banner_printed = 1;
 			}
 
 			xo_open_instance("multicast-forwarding-cache");
 
 			xo_emit(" {:origin/%-*.*s}", WID_ORG, WID_ORG,
 			    routename6(&mfc.mf6c_origin));
 			xo_emit(" {:group/%-*.*s}", WID_GRP, WID_GRP,
 			    routename6(&mfc.mf6c_mcastgrp));
 			xo_emit(" {:total-packets/%9ju}",
 			    (uintmax_t)mfc.mf6c_pkt_cnt);
 
 			for (waitings = 0, rtep = mfc.mf6c_stall; rtep; ) {
 				waitings++;
 				/* XXX KVM */
 				kread((u_long)rtep, (char *)&rte, sizeof(rte));
 				rtep = rte.next;
 			}
 			xo_emit("   {:waitings/%3ld}", waitings);
 
 			if (mfc.mf6c_parent == MF6C_INCOMPLETE_PARENT)
 				xo_emit(" ---   ");
 			else
 				xo_emit("  {:parent/%3d}   ", mfc.mf6c_parent);
 			xo_open_list("mif");
 			for (mifi = 0; mifi <= maxmif; mifi++) {
 				if (IF_ISSET(mifi, &mfc.mf6c_ifset))
 					xo_emit(" {l:%u}", mifi);
 			}
 			xo_close_list("mif");
 			xo_emit("\n");
 
 			mfcp = mfc.mf6c_next;
 			xo_close_instance("multicast-forwarding-cache");
 		}
 	}
 	if (banner_printed)
 		xo_close_list("multicast-forwarding-cache");
 	else
 		xo_emit("\n{T:IPv6 Multicast Forwarding Table is empty}\n");
 
 	xo_emit("\n");
 	numeric_addr = saved_numeric_addr;
 }
 
 void
 mrt6_stats()
 {
 	struct mrt6stat mrtstat;
-	u_long mstaddr;
 	size_t len = sizeof mrtstat;
 
-	kresolve_list(mrl);
-	mstaddr = mrl[N_MRT6STAT].n_value;
-
-	if (mstaddr == 0) {
-		fprintf(stderr, "No IPv6 MROUTING kernel support.\n");
+	if (sysctlbyname("net.inet6.ip6.mrt6stat", &mrtstat, &len, NULL, 0) <
+	    0) {
+		xo_warn("sysctl: net.inet6.ip6.mrt6stat");
 		return;
 	}
-
-	if (live) {
-		if (sysctlbyname("net.inet6.ip6.mrt6stat", &mrtstat, &len,
-		    NULL, 0) < 0) {
-			xo_warn("sysctl: net.inet6.ip6.mrt6stat");
-			return;
-		}
-	} else
-		kread(mstaddr, (char *)&mrtstat, sizeof(mrtstat));
 
 	xo_open_container("multicast-statistics");
 	xo_emit("{T:IPv6 multicast forwarding}:\n");
 
 #define	p(f, m) if (mrtstat.f || sflag <= 1) \
 	xo_emit(m, (uintmax_t)mrtstat.f, plural(mrtstat.f))
 #define	p2(f, m) if (mrtstat.f || sflag <= 1) \
 	xo_emit(m, (uintmax_t)mrtstat.f, plurales(mrtstat.f))
 
 	p(mrt6s_mfc_lookups, "\t{:cache-lookups/%ju} "
 	    "{N:/multicast forwarding cache lookup%s}\n");
 	p2(mrt6s_mfc_misses, "\t{:cache-misses/%ju} "
 	    "{N:/multicast forwarding cache miss%s}\n");
 	p(mrt6s_upcalls, "\t{:upcalls/%ju} "
 	    "{N:/upcall%s to multicast routing daemon}\n");
 	p(mrt6s_upq_ovflw, "\t{:upcall-overflows/%ju} "
 	    "{N:/upcall queue overflow%s}\n");
 	p(mrt6s_upq_sockfull, "\t{:upcalls-dropped-full-buffer/%ju} "
 	    "{N:/upcall%s dropped due to full socket buffer}\n");
 	p(mrt6s_cache_cleanups, "\t{:cache-cleanups/%ju} "
 	    "{N:/cache cleanup%s}\n");
 	p(mrt6s_no_route, "\t{:dropped-no-origin/%ju} "
 	    "{N:/datagram%s with no route for origin}\n");
 	p(mrt6s_bad_tunnel, "\t{:dropped-bad-tunnel/%ju} "
 	    "{N:/datagram%s arrived with bad tunneling}\n");
 	p(mrt6s_cant_tunnel, "\t{:dropped-could-not-tunnel/%ju} "
 	    "{N:/datagram%s could not be tunneled}\n");
 	p(mrt6s_wrong_if, "\t{:dropped-wrong-incoming-interface/%ju} "
 	    "{N:/datagram%s arrived on wrong interface}\n");
 	p(mrt6s_drop_sel, "\t{:dropped-selectively/%ju} "
 	    "{N:/datagram%s selectively dropped}\n");
 	p(mrt6s_q_overflow, "\t{:dropped-queue-overflow/%ju} "
 	    "{N:/datagram%s dropped due to queue overflow}\n");
 	p(mrt6s_pkt2large, "\t{:dropped-too-large/%ju} "
 	    "{N:/datagram%s dropped for being too large}\n");
 
 #undef	p2
 #undef	p
 	xo_close_container("multicast-statistics");
 }
 #endif /*INET6*/
Index: projects/ifnet/usr.bin/netstat/netstat.1
===================================================================
--- projects/ifnet/usr.bin/netstat/netstat.1	(revision 281172)
+++ projects/ifnet/usr.bin/netstat/netstat.1	(revision 281173)
@@ -1,822 +1,820 @@
 .\" Copyright (c) 1983, 1990, 1992, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 4. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"	@(#)netstat.1	8.8 (Berkeley) 4/18/94
 .\" $FreeBSD$
 .\"
-.Dd February 21, 2015
+.Dd April 7, 2015
 .Dt NETSTAT 1
 .Os
 .Sh NAME
 .Nm netstat
 .Nd show network status and statistics
 .Sh SYNOPSIS
 .Bk -words
 .Bl -tag -width "netstat"
 .It Nm
 .Op Fl -libxo
 .Op Fl 46AaLnRSTWx
 .Op Fl f Ar protocol_family | Fl p Ar protocol
 .Op Fl M Ar core
 .Op Fl N Ar system
 .It Nm Fl i | I Ar interface
 .Op Fl -libxo
 .Op Fl 46abdhnW
 .Op Fl f Ar address_family
 .Op Fl M Ar core
 .Op Fl N Ar system
 .It Nm Fl w Ar wait
 .Op Fl -libxo
 .Op Fl I Ar interface
 .Op Fl 46d
 .Op Fl M Ar core
 .Op Fl N Ar system
 .Op Fl q Ar howmany
 .It Nm Fl s
 .Op Fl -libxo
 .Op Fl 46sz
 .Op Fl f Ar protocol_family | Fl p Ar protocol
 .Op Fl M Ar core
 .Op Fl N Ar system
 .It Nm Fl i | I Ar interface Fl s
 .Op Fl -libxo
 .Op Fl 46s
 .Op Fl f Ar protocol_family | Fl p Ar protocol
 .Op Fl M Ar core
 .Op Fl N Ar system
 .It Nm Fl m
 .Op Fl -libxo
 .Op Fl M Ar core
 .Op Fl N Ar system
 .It Nm Fl B
 .Op Fl -libxo
 .Op Fl z
 .Op Fl I Ar interface
 .It Nm Fl r
 .Op Fl -libxo
 .Op Fl 46AnW
 .Op Fl F Ar fibnum
 .Op Fl f Ar address_family
 .Op Fl M Ar core
 .Op Fl N Ar system
 .It Nm Fl rs
 .Op Fl -libxo
 .Op Fl s
 .Op Fl M Ar core
 .Op Fl N Ar system
 .It Nm Fl g
 .Op Fl -libxo
 .Op Fl 46W
 .Op Fl f Ar address_family
-.Op Fl M Ar core
-.Op Fl N Ar system
 .It Nm Fl gs
 .Op Fl -libxo
 .Op Fl 46s
 .Op Fl f Ar address_family
 .Op Fl M Ar core
 .Op Fl N Ar system
 .It Nm Fl Q
 .Op Fl -libxo
 .El
 .Ek
 .Sh DESCRIPTION
 The
 .Nm
 command symbolically displays the contents of various network-related
 data structures.
 There are a number of output formats,
 depending on the options for the information presented.
 .Bl -tag -width indent
 .It Xo
 .Bk -words
 .Nm
 .Op Fl 46AaLnRSTWx
 .Op Fl f Ar protocol_family | Fl p Ar protocol
 .Op Fl M Ar core
 .Op Fl N Ar system
 .Ek
 .Xc
 Display a list of active sockets
 (protocol control blocks)
 for each network protocol.
 .Pp
 The default display for active sockets shows the local
 and remote addresses, send and receive queue sizes (in bytes), protocol,
 and the internal state of the protocol.
 Address formats are of the form
 .Dq host.port
 or
 .Dq network.port
 if a socket's address specifies a network but no specific host address.
 When known, the host and network addresses are displayed symbolically
 according to the databases
 .Xr hosts 5
 and
 .Xr networks 5 ,
 respectively.
 If a symbolic name for an address is unknown, or if
 the
 .Fl n
 option is specified, the address is printed numerically, according
 to the address family.
 For more information regarding
 the Internet IPv4
 .Dq dot format ,
 refer to
 .Xr inet 3 .
 Unspecified,
 or
 .Dq wildcard ,
 addresses and ports appear as
 .Dq Li * .
 .Bl -tag -width indent
 .It Fl 4
 Show IPv4 only.
 See
 .Sx GENERAL OPTIONS .
 .It Fl 6
 Show IPv6 only.
 See
 .Sx GENERAL OPTIONS .
 .It Fl A
 Show the address of a protocol control block (PCB)
 associated with a socket; used for debugging.
 .It Fl a
 Show the state of all sockets;
 normally sockets used by server processes are not shown.
 .It Fl L
 Show the size of the various listen queues.
 The first count shows the number of unaccepted connections,
 the second count shows the amount of unaccepted incomplete connections,
 and the third count is the maximum number of queued connections.
 .It Fl n
 Do not resolve numeric addresses and port numbers to names.
 See
 .Sx GENERAL OPTIONS .
 .It Fl R
 Display the flowid and flowtype for each socket.
 flowid is a 32 bit hardware specific identifier for each flow.
 flowtype defines which protocol fields are hashed to produce the id.
 A complete listing is available in
 .Pa sys/mbuf.h
 under
 .Dv M_HASHTYPE_* .
 .It Fl S
 Show network addresses as numbers (as with
 .Fl n )
 but show ports symbolically.
 .It Fl T
 Display diagnostic information from the TCP control block.
 Fields include the number of packets requiring retransmission,
 received out-of-order, and those advertising a zero-sized window.
 .It Fl W
 Avoid truncating addresses even if this causes some fields to overflow.
 .It Fl x
 Display socket buffer and TCP timer statistics for each
 internet socket.
 .Pp
 The
 .Fl x
 flag causes
 .Nm
 to output all the information recorded about data
 stored in the socket buffers.
 The fields are:
 .Bl -column ".Li R-MBUF"
 .It Li R-MBUF Ta Number of mbufs in the receive queue.
 .It Li S-MBUF Ta Number of mbufs in the send queue.
 .It Li R-CLUS Ta Number of clusters, of any type, in the receive
 queue.
 .It Li S-CLUS Ta Number of clusters, of any type, in the send queue.
 .It Li R-HIWA Ta Receive buffer high water mark, in bytes.
 .It Li S-HIWA Ta Send buffer high water mark, in bytes.
 .It Li R-LOWA Ta Receive buffer low water mark, in bytes.
 .It Li S-LOWA Ta Send buffer low water mark, in bytes.
 .It Li R-BCNT Ta Receive buffer byte count.
 .It Li S-BCNT Ta Send buffer byte count.
 .It Li R-BMAX Ta Maximum bytes that can be used in the receive buffer.
 .It Li S-BMAX Ta Maximum bytes that can be used in the send buffer.
 .El
 .It Fl f Ar protocol_family
 Filter by
 .Ar protocol_family .
 See
 .Sx GENERAL OPTIONS .
 .It Fl p Ar protocol
 Filter by
 .Ar protocol .
 See
 .Sx GENERAL OPTIONS .
 .It Fl M
 Use an alternative core.
 See
 .Sx GENERAL OPTIONS .
 .It Fl N
 Use an alternative kernel image.
 See
 .Sx GENERAL OPTIONS .
 .El
 .It Xo
 .Bk -words
 .Nm
 .Fl i | I Ar interface
 .Op Fl 46abdhnW
 .Op Fl f Ar address_family
 .Op Fl M Ar core
 .Op Fl N Ar system
 .Ek
 .Xc
 Show the state of all network interfaces or a single
 .Ar interface
 which have been auto-configured
 (interfaces statically configured into a system, but not
 located at boot time are not shown).
 An asterisk
 .Pq Dq Li *
 after an interface name indicates that the interface is
 .Dq down .
 .Pp
 When
 .Nm
 is invoked with
 .Fl i
 .Pq all interfaces
 or
 .Fl I Ar interface ,
 it provides a table of cumulative
 statistics regarding packets transferred, errors, and collisions.
 The network addresses of the interface
 and the maximum transmission unit
 .Pq Dq mtu
 are also displayed.
 .Bl -tag -width indent
 .It Fl 4
 Show IPv4 only.
 See
 .Sx GENERAL OPTIONS .
 .It Fl 6
 Show IPv6 only.
 See
 .Sx GENERAL OPTIONS .
 .It Fl a
 Multicast addresses currently in use are shown
 for each Ethernet interface and for each IP interface address.
 Multicast addresses are shown on separate lines following the interface
 address with which they are associated.
 .It Fl b
 Show the number of bytes in and out.
 .It Fl d
 Show the number of dropped packets.
 .It Fl h
 Print all counters in human readable form.
 .It Fl n
 Do not resolve numeric addresses and port numbers to names.
 See
 .Sx GENERAL OPTIONS .
 .It Fl W
 Avoid truncating interface names even if this causes some fields to overflow.
 .Sx GENERAL OPTIONS .
 .It Fl f Ar protocol_family
 Filter by
 .Ar protocol_family .
 See
 .Sx GENERAL OPTIONS .
 .El
 .It Xo
 .Bk -words
 .Nm
 .Fl w Ar wait
 .Op Fl I Ar interface
 .Op Fl 46d
 .Op Fl M Ar core
 .Op Fl N Ar system
 .Op Fl q Ar howmany
 .Ek
 .Xc
 At intervals of
 .Ar wait
 seconds, display the information regarding packet traffic on all
 configured network interfaces or a single
 .Ar interface .
 .Pp
 When
 .Nm
 is invoked with the
 .Fl w
 option and a
 .Ar wait
 interval argument, it displays a running count of statistics related to
 network interfaces.
 An obsolescent version of this option used a numeric parameter
 with no option, and is currently supported for backward compatibility.
 By default, this display summarizes information for all interfaces.
 Information for a specific interface may be displayed with the
 .Fl I Ar interface
 option.
 .Bl -tag -width indent
 .It Fl I Ar interface
 Only show information regarding
 .Ar interface
 .It Fl 4
 Show IPv4 only.
 See
 .Sx GENERAL OPTIONS .
 .It Fl 6
 Show IPv6 only.
 See
 .Sx GENERAL OPTIONS .
 .It Fl d
 Show the number of dropped packets.
 .It Fl M
 Use an alternative core.
 See
 .Sx GENERAL OPTIONS .
 .It Fl N
 Use an alternative kernel image.
 See
 .Sx GENERAL OPTIONS .
 .It Fl q
 Exit after
 .Ar howmany
 outputs.
 .El
 .It Xo
 .Bk -words
 .Nm
 .Fl s
 .Op Fl 46sz
 .Op Fl f Ar protocol_family | Fl p Ar protocol
 .Op Fl M Ar core
 .Op Fl N Ar system
 .Ek
 .Xc
 Display system-wide statistics for each network protocol.
 .Bl -tag -width indent
 .It Fl 4
 Show IPv4 only.
 See
 .Sx GENERAL OPTIONS .
 .It Fl 6
 Show IPv6 only.
 See
 .Sx GENERAL OPTIONS .
 .It Fl s
 If
 .Fl s
 is repeated, counters with a value of zero are suppressed.
 .It Fl z
 Reset statistic counters after displaying them.
 .It Fl f Ar protocol_family
 Filter by
 .Ar protocol_family .
 See
 .Sx GENERAL OPTIONS .
 .It Fl p Ar protocol
 Filter by
 .Ar protocol .
 See
 .Sx GENERAL OPTIONS .
 .It Fl M
 Use an alternative core.
 See
 .Sx GENERAL OPTIONS .
 .It Fl N
 Use an alternative kernel image
 See
 .Sx GENERAL OPTIONS .
 .El
 .It Xo
 .Bk -words
 .Nm
 .Fl i | I Ar interface Fl s
 .Op Fl 46s
 .Op Fl f Ar protocol_family | Fl p Ar protocol
 .Op Fl M Ar core
 .Op Fl N Ar system
 .Ek
 .Xc
 Display per-interface statistics for each network protocol.
 .Bl -tag -width indent
 .It Fl 4
 Show IPv4 only
 See
 .Sx GENERAL OPTIONS .
 .It Fl 6
 Show IPv6 only
 See
 .Sx GENERAL OPTIONS .
 .It Fl s
 If
 .Fl s
 is repeated, counters with a value of zero are suppressed.
 .It Fl f Ar protocol_family
 Filter by
 .Ar protocol_family .
 See
 .Sx GENERAL OPTIONS .
 .It Fl p Ar protocol
 Filter by
 .Ar protocol .
 See
 .Sx GENERAL OPTIONS .
 .It Fl M
 Use an alternative core
 See
 .Sx GENERAL OPTIONS .
 .It Fl N
 Use an alternative kernel image
 See
 .Sx GENERAL OPTIONS .
 .El
 .It Xo
 .Bk -words
 .Nm
 .Fl m
 .Op Fl M Ar core
 .Op Fl N Ar system
 .Ek
 .Xc
 Show statistics recorded by the memory management routines
 .Pq Xr mbuf 9 .
 The network manages a private pool of memory buffers.
 .Bl -tag -width indent
 .It Fl M
 Use an alternative core
 See
 .Sx GENERAL OPTIONS .
 .It Fl N
 Use an alternative kernel image
 See
 .Sx GENERAL OPTIONS .
 .El
 .It Xo
 .Bk -words
 .Nm
 .Fl B
 .Op Fl z
 .Op Fl I Ar interface
 .Ek
 .Xc
 Show statistics about
 .Xr bpf 4
 peers.
 This includes information like
 how many packets have been matched, dropped and received by the
 bpf device, also information about current buffer sizes and device
 states.
 .Pp
 The
 .Xr bpf 4
 flags displayed when
 .Nm
 is invoked with the
 .Fl B
 option represent the underlying parameters of the bpf peer.
 Each flag is
 represented as a single lower case letter.
 The mapping between the letters and flags in order of appearance are:
 .Bl -column ".Li i"
 .It Li p Ta Set if listening promiscuously
 .It Li i Ta Dv BIOCIMMEDIATE No has been set on the device
 .It Li f Ta Dv BIOCGHDRCMPLT No status: source link addresses are being
 filled automatically
 .It Li s Ta Dv BIOCGSEESENT No status: see packets originating locally and
 remotely on the interface.
 .It Li a Ta Packet reception generates a signal
 .It Li l Ta Dv BIOCLOCK No status: descriptor has been locked
 .El
 .Pp
 For more information about these flags, please refer to
 .Xr bpf 4 .
 .Bl -tag -width indent
 .It Fl z
 Reset statistic counters after displaying them.
 .El
 .It Xo
 .Bk -words
 .Nm
 .Fl r
 .Op Fl 46AnW
 .Op Fl F Ar fibnum
 .Op Fl f Ar address_family
 .Op Fl M Ar core
 .Op Fl N Ar system
 .Ek
 .Xc
 Display the contents of routing tables.
 .Pp
 When
 .Nm
 is invoked with the routing table option
 .Fl r ,
 it lists the available routes and their status.
 Each route consists of a destination host or network, and a gateway to use
 in forwarding packets.
 The flags field shows a collection of information about the route stored
 as binary choices.
 The individual flags are discussed in more detail in the
 .Xr route 8
 and
 .Xr route 4
 manual pages.
 The mapping between letters and flags is:
 .Bl -column ".Li W" ".Dv RTF_WASCLONED"
 .It Li 1 Ta Dv RTF_PROTO1 Ta "Protocol specific routing flag #1"
 .It Li 2 Ta Dv RTF_PROTO2 Ta "Protocol specific routing flag #2"
 .It Li 3 Ta Dv RTF_PROTO3 Ta "Protocol specific routing flag #3"
 .It Li B Ta Dv RTF_BLACKHOLE Ta "Just discard pkts (during updates)"
 .It Li b Ta Dv RTF_BROADCAST Ta "The route represents a broadcast address"
 .It Li D Ta Dv RTF_DYNAMIC Ta "Created dynamically (by redirect)"
 .It Li G Ta Dv RTF_GATEWAY Ta "Destination requires forwarding by intermediary"
 .It Li H Ta Dv RTF_HOST Ta "Host entry (net otherwise)"
 .It Li L Ta Dv RTF_LLINFO Ta "Valid protocol to link address translation"
 .It Li M Ta Dv RTF_MODIFIED Ta "Modified dynamically (by redirect)"
 .It Li R Ta Dv RTF_REJECT Ta "Host or net unreachable"
 .It Li S Ta Dv RTF_STATIC Ta "Manually added"
 .It Li U Ta Dv RTF_UP Ta "Route usable"
 .It Li X Ta Dv RTF_XRESOLVE Ta "External daemon translates proto to link address"
 .El
 .Pp
 Direct routes are created for each
 interface attached to the local host;
 the gateway field for such entries shows the address of the outgoing interface.
 The refcnt field gives the
 current number of active uses of the route.
 Connection oriented
 protocols normally hold on to a single route for the duration of
 a connection while connectionless protocols obtain a route while sending
 to the same destination.
 The use field provides a count of the number of packets
 sent using that route.
 The interface entry indicates the network interface utilized for the route.
 .Bl -tag -width indent
 .It Fl 4
 Show IPv4 only.
 See
 .Sx GENERAL OPTIONS .
 .It Fl 6
 Show IPv6 only.
 See
 .Sx GENERAL OPTIONS .
 .It Fl A
 Show the contents of the internal Patricia tree
 structures; used for debugging.
 .It Fl n
 Do not resolve numeric addresses and port numbers to names.
 See
 .Sx GENERAL OPTIONS .
 .It Fl W
 Show the path MTU for each route, and print interface names with a
 wider field size.
 .It Fl F
 Display the routing table with the number
 .Ar fibnum .
 If the specified
 .Ar fibnum
 is -1 or
 .Fl F
 is not specified,
 the default routing table is displayed.
 .It Fl f
 Display the routing table for a particular
 .Ar address_family .
 .It Fl M
 Use an alternative core
 See
 .Sx GENERAL OPTIONS .
 .It Fl N
 Use an alternative kernel image
 See
 .Sx GENERAL OPTIONS .
 .El
 .It Xo
 .Bk -words
 .Nm
 .Fl rs
 .Op Fl s
 .Op Fl M Ar core
 .Op Fl N Ar system
 .Ek
 .Xc
 Display routing statistics.
 .Bl -tag -width indent
 .It Fl s
 If
 .Fl s
 is repeated, counters with a value of zero are suppressed.
 .It Fl M
 Use an alternative core
 See
 .Sx GENERAL OPTIONS .
 .It Fl N
 Use an alternative kernel image
 See
 .Sx GENERAL OPTIONS .
 .El
 .It Xo
 .Bk -words
 .Nm
 .Fl g
 .Op Fl 46W
 .Op Fl f Ar address_family
 .Op Fl M Ar core
 .Op Fl N Ar system
 .Ek
 .Xc
 Display the contents of the multicast virtual interface tables,
 and multicast forwarding caches.
 Entries in these tables will appear only when the kernel is
 actively forwarding multicast sessions.
 This option is applicable only to the
 .Cm inet
 and
 .Cm inet6
 address families.
 .Bl -tag -width indent
 .It Fl 4
 Show IPv4 only
 See
 .Sx GENERAL OPTIONS .
 .It Fl 6
 Show IPv6 only
 See
 .Sx GENERAL OPTIONS .
 .It Fl W
 Avoid truncating addresses even if this causes some fields to overflow.
 .It Fl f Ar protocol_family
 Filter by
 .Ar protocol_family .
 See
 .Sx GENERAL OPTIONS .
 .It Fl M
 Use an alternative core
 See
 .Sx GENERAL OPTIONS .
 .It Fl N
 Use an alternative kernel image
 See
 .Sx GENERAL OPTIONS .
 .El
 .It Xo
 .Bk -words
 .Nm
 .Fl gs
 .Op Fl 46s
 .Op Fl f Ar address_family
 .Op Fl M Ar core
 .Op Fl N Ar system
 .Ek
 .Xc
 Show multicast routing statistics.
 .Bl -tag -width indent
 .It Fl 4
 Show IPv4 only
 See
 .Sx GENERAL OPTIONS .
 .It Fl 6
 Show IPv6 only
 See
 .Sx GENERAL OPTIONS .
 .It Fl s
 If
 .Fl s
 is repeated, counters with a value of zero are suppressed.
 .It Fl f Ar protocol_family
 Filter by
 .Ar protocol_family .
 See
 .Sx GENERAL OPTIONS .
 .It Fl M
 Use an alternative core
 See
 .Sx GENERAL OPTIONS .
 .It Fl N
 Use an alternative kernel image
 See
 .Sx GENERAL OPTIONS .
 .El
 .It Xo
 .Bk -words
 .Nm
 .Fl Q
 .Ek
 .Xc
 Show
 .Xr netisr 9
 statistics.
 The flags field shows available ISR handlers:
 .Bl -column ".Li W" ".Dv NETISR_SNP_FLAGS_DRAINEDCPU"
 .It Li C Ta Dv NETISR_SNP_FLAGS_M2CPUID Ta "Able to map mbuf to cpu id"
 .It Li D Ta Dv NETISR_SNP_FLAGS_DRAINEDCPU  Ta "Has queue drain handler"
 .It Li F Ta Dv NETISR_SNP_FLAGS_M2FLOW Ta "Able to map mbuf to flow id"
 .El
 .El
 .Pp
 .Ss GENERAL OPTIONS
 Some options have the general meaning:
 .Bl -tag -width flag
 .It Fl 4
 Is shorthand for
 .Fl f
 .Ar inet
 .Pq Show only IPv4
 .It Fl 6
 Is shorthand for
 .Fl f
 .Ar inet6
 .Pq Show only IPv6
 .It Fl f Ar address_family , Fl p Ar protocol
 Limit display to those records
 of the specified
 .Ar address_family
 or a single
 .Ar protocol .
 The following address families and protocols are recognized:
 .Pp
 .Bl -tag -width ".Cm netgraph , ng Pq Dv AF_NETGRAPH" -compact
 .It Em Family
 .Em Protocols
 .It Cm inet Pq Dv AF_INET
 .Cm divert , icmp , igmp , ip , ipsec , pim, sctp , tcp , udp
 .It Cm inet6 Pq Dv AF_INET6
 .Cm icmp6 , ip6 , ipsec6 , rip6 , tcp , udp
 .It Cm pfkey Pq Dv PF_KEY
 .Cm pfkey
 .It Cm netgraph , ng Pq Dv AF_NETGRAPH
 .Cm ctrl , data
 .It Cm unix Pq Dv AF_UNIX
 .It Cm link Pq Dv AF_LINK
 .El
 .Pp
 The program will complain if
 .Ar protocol
 is unknown or if there is no statistics routine for it.
 .It Fl M
 Extract values associated with the name list from the specified core
 instead of the default
 .Pa /dev/kmem .
 .It Fl N
 Extract the name list from the specified system instead of the default,
 which is the kernel image the system has booted from.
 .It Fl n
 Show network addresses and ports as numbers.
 Normally
 .Nm
 attempts to resolve addresses and ports,
 and display them symbolically.
 .El
 .Sh SEE ALSO
 .Xr fstat 1 ,
 .Xr nfsstat 1 ,
 .Xr procstat 1 ,
 .Xr ps 1 ,
 .Xr sockstat 1 ,
 .Xr libxo 3 ,
 .Xr xo_parse_args 3 ,
 .Xr bpf 4 ,
 .Xr inet 4 ,
 .Xr route 4 ,
 .Xr unix 4 ,
 .Xr hosts 5 ,
 .Xr networks 5 ,
 .Xr protocols 5 ,
 .Xr services 5 ,
 .Xr iostat 8 ,
 .Xr route 8 ,
 .Xr trpt 8 ,
 .Xr vmstat 8 ,
 .Xr mbuf 9
 .Sh HISTORY
 The
 .Nm
 command appeared in
 .Bx 4.2 .
 .Pp
 IPv6 support was added by WIDE/KAME project.
 .Sh BUGS
 The notion of errors is ill-defined.
Index: projects/ifnet/usr.sbin/acpi/acpiconf/acpiconf.c
===================================================================
--- projects/ifnet/usr.sbin/acpi/acpiconf/acpiconf.c	(revision 281172)
+++ projects/ifnet/usr.sbin/acpi/acpiconf/acpiconf.c	(revision 281173)
@@ -1,233 +1,247 @@
 /*-
  * Copyright (c) 1999 Mitsuru IWASAKI <iwasaki@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$Id: acpiconf.c,v 1.5 2000/08/08 14:12:19 iwasaki Exp $
  *	$FreeBSD$
  */
 
 #include <sys/param.h>
 
 #include <err.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <sys/ioctl.h>
 #include <sysexits.h>
 #include <unistd.h>
 
 #include <dev/acpica/acpiio.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 
 #define ACPIDEV		"/dev/acpi"
 
 static int	acpifd;
 
 static void
 acpi_init(void)
 {
 	acpifd = open(ACPIDEV, O_RDWR);
 	if (acpifd == -1)
 		acpifd = open(ACPIDEV, O_RDONLY);
 	if (acpifd == -1)
 		err(EX_OSFILE, ACPIDEV);
 }
 
 /* Prepare to sleep and then wait for the signal that sleeping can occur. */
 static void
 acpi_sleep(int sleep_type)
 {
 	int ret;
   
 	/* Notify OS that we want to sleep.  devd(8) gets this notify. */
 	ret = ioctl(acpifd, ACPIIO_REQSLPSTATE, &sleep_type);
 	if (ret != 0)
 		err(EX_IOERR, "request sleep type (%d) failed", sleep_type);
 }
 
 /* Ack or abort a pending suspend request. */
 static void
 acpi_sleep_ack(int err_val)
 {
 	int ret;
 
 	ret = ioctl(acpifd, ACPIIO_ACKSLPSTATE, &err_val);
 	if (ret != 0)
 		err(EX_IOERR, "ack sleep type failed");
 }
 
 /* should be a acpi define, but doesn't appear to be */
 #define UNKNOWN_CAP 0xffffffff
 #define UNKNOWN_VOLTAGE 0xffffffff
 
 static int
 acpi_battinfo(int num)
 {
 	union acpi_battery_ioctl_arg battio;
 	const char *pwr_units;
 	int hours, min, amp;
 	uint32_t volt;
 
 	if (num < 0 || num > 64)
 		err(EX_USAGE, "invalid battery %d", num);
 
 	/* Print battery design information. */
 	battio.unit = num;
 	if (ioctl(acpifd, ACPIIO_BATT_GET_BIF, &battio) == -1)
 		err(EX_IOERR, "get battery info (%d) failed", num);
 	amp = battio.bif.units;
 	pwr_units = amp ? "mA" : "mW";
 	if (battio.bif.dcap == UNKNOWN_CAP)
 		printf("Design capacity:\tunknown\n");
 	else
 		printf("Design capacity:\t%d %sh\n", battio.bif.dcap,
 		    pwr_units);
 	if (battio.bif.lfcap == UNKNOWN_CAP)
 		printf("Last full capacity:\tunknown\n");
 	else
 		printf("Last full capacity:\t%d %sh\n", battio.bif.lfcap,
 		    pwr_units);
 	printf("Technology:\t\t%s\n", battio.bif.btech == 0 ?
 	    "primary (non-rechargeable)" : "secondary (rechargeable)");
 	if (battio.bif.dvol == UNKNOWN_CAP)
 		printf("Design voltage:\t\tunknown\n");
 	else
 		printf("Design voltage:\t\t%d mV\n", battio.bif.dvol);
 	printf("Capacity (warn):\t%d %sh\n", battio.bif.wcap, pwr_units);
 	printf("Capacity (low):\t\t%d %sh\n", battio.bif.lcap, pwr_units);
 	printf("Low/warn granularity:\t%d %sh\n", battio.bif.gra1, pwr_units);
 	printf("Warn/full granularity:\t%d %sh\n", battio.bif.gra2, pwr_units);
 	printf("Model number:\t\t%s\n", battio.bif.model);
 	printf("Serial number:\t\t%s\n", battio.bif.serial);
 	printf("Type:\t\t\t%s\n", battio.bif.type);
 	printf("OEM info:\t\t%s\n", battio.bif.oeminfo);
 
 	/* Fetch battery voltage information. */
 	volt = UNKNOWN_VOLTAGE;
 	battio.unit = num;
 	if (ioctl(acpifd, ACPIIO_BATT_GET_BST, &battio) == -1)
 		err(EX_IOERR, "get battery status (%d) failed", num);
 	if (battio.bst.state != ACPI_BATT_STAT_NOT_PRESENT)
 		volt = battio.bst.volt;
 
 	/* Print current battery state information. */
 	battio.unit = num;
 	if (ioctl(acpifd, ACPIIO_BATT_GET_BATTINFO, &battio) == -1)
 		err(EX_IOERR, "get battery user info (%d) failed", num);
 	if (battio.battinfo.state != ACPI_BATT_STAT_NOT_PRESENT) {
-		printf("State:\t\t\t");
-		if (battio.battinfo.state == 0)
-			printf("high ");
-		if (battio.battinfo.state & ACPI_BATT_STAT_CRITICAL)
-			printf("critical ");
-		if (battio.battinfo.state & ACPI_BATT_STAT_DISCHARG)
-			printf("discharging ");
-		if (battio.battinfo.state & ACPI_BATT_STAT_CHARGING)
-			printf("charging ");
-		printf("\n");
+		const char *state;
+		switch (battio.battinfo.state & ACPI_BATT_STAT_BST_MASK) {
+		case 0:
+			state = "high";
+			break;
+		case ACPI_BATT_STAT_DISCHARG:
+			state = "discharging";
+			break;
+		case ACPI_BATT_STAT_CHARGING:
+			state = "charging";
+			break;
+		case ACPI_BATT_STAT_CRITICAL:
+			state = "critical";
+			break;
+		case ACPI_BATT_STAT_DISCHARG | ACPI_BATT_STAT_CRITICAL:
+			state = "critical discharging";
+			break;
+		case ACPI_BATT_STAT_CHARGING | ACPI_BATT_STAT_CRITICAL:
+			state = "critical charging";
+			break;
+		default:
+			state = "invalid";
+		}
+		printf("State:\t\t\t%s\n", state);
 		if (battio.battinfo.cap == -1)
 			printf("Remaining capacity:\tunknown\n");
 		else
 			printf("Remaining capacity:\t%d%%\n",
 			    battio.battinfo.cap);
 		if (battio.battinfo.min == -1)
 			printf("Remaining time:\t\tunknown\n");
 		else {
 			hours = battio.battinfo.min / 60;
 			min = battio.battinfo.min % 60;
 			printf("Remaining time:\t\t%d:%02d\n", hours, min);
 		}
 		if (battio.battinfo.rate == -1)
 			printf("Present rate:\t\tunknown\n");
 		else if (amp && volt != UNKNOWN_VOLTAGE) {
 			printf("Present rate:\t\t%d mA (%d mW)\n",
 			    battio.battinfo.rate,
 			    battio.battinfo.rate * volt / 1000);
 		} else
 			printf("Present rate:\t\t%d %s\n",
 			    battio.battinfo.rate, pwr_units);
 	} else
 		printf("State:\t\t\tnot present\n");
 
 	/* Print battery voltage information. */
 	if (volt == UNKNOWN_VOLTAGE)
 		printf("Present voltage:\tunknown\n");
 	else
 		printf("Present voltage:\t%d mV\n", volt);
 
 	return (0);
 }
 
 static void
 usage(const char* prog)
 {
 	printf("usage: %s [-h] [-i batt] [-k ack] [-s 1-4]\n", prog);
 	exit(0);
 }
 
 int
 main(int argc, char *argv[])
 {
 	char	c, *prog;
 	int	sleep_type;
 
 	prog = argv[0];
 	if (argc < 2)
 		usage(prog);
 		/* NOTREACHED */
 
 	sleep_type = -1;
 	acpi_init();
 	while ((c = getopt(argc, argv, "hi:k:s:")) != -1) {
 		switch (c) {
 		case 'i':
 			acpi_battinfo(atoi(optarg));
 			break;
 		case 'k':
 			acpi_sleep_ack(atoi(optarg));
 			break;
 		case 's':
 			if (optarg[0] == 'S')
 				sleep_type = optarg[1] - '0';
 			else
 				sleep_type = optarg[0] - '0';
 			if (sleep_type < 1 || sleep_type > 4)
 				errx(EX_USAGE, "invalid sleep type (%d)",
 				     sleep_type);
 			break;
 		case 'h':
 		default:
 			usage(prog);
 			/* NOTREACHED */
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	if (sleep_type != -1)
 		acpi_sleep(sleep_type);
 
 	close(acpifd);
 	exit (0);
 }
Index: projects/ifnet/usr.sbin/bsdinstall/scripts/zfsboot
===================================================================
--- projects/ifnet/usr.sbin/bsdinstall/scripts/zfsboot	(revision 281172)
+++ projects/ifnet/usr.sbin/bsdinstall/scripts/zfsboot	(revision 281173)
@@ -1,1582 +1,1585 @@
 #!/bin/sh
 #-
 # Copyright (c) 2013-2014 Allan Jude
 # Copyright (c) 2013-2015 Devin Teske
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 # $FreeBSD$
 #
 ############################################################ INCLUDES
 
 BSDCFG_SHARE="/usr/share/bsdconfig"
 . $BSDCFG_SHARE/common.subr || exit 1
 f_dprintf "%s: loading includes..." "$0"
 f_include $BSDCFG_SHARE/device.subr
 f_include $BSDCFG_SHARE/dialog.subr
 f_include $BSDCFG_SHARE/password/password.subr
 f_include $BSDCFG_SHARE/variable.subr
 
 ############################################################ CONFIGURATION
 
 #
 # Default name of the boot-pool
 #
 : ${ZFSBOOT_POOL_NAME:=zroot}
 
 #
 # Default options to use when creating zroot pool
 #
 : ${ZFSBOOT_POOL_CREATE_OPTIONS:=-O compress=lz4 -O atime=off}
 
 #
 # Default name for the boot environment parent dataset
 #
 : ${ZFSBOOT_BEROOT_NAME:=ROOT}
 
 #
 # Default name for the primany boot environment
 #
 : ${ZFSBOOT_BOOTFS_NAME:=default}
 
 #
 # Default Virtual Device (vdev) type to create
 #
 : ${ZFSBOOT_VDEV_TYPE:=stripe}
 
 #
 # Should we use sysctl(8) vfs.zfs.min_auto_ashift=12 to force 4K sectors?
 #
 : ${ZFSBOOT_FORCE_4K_SECTORS:=1}
 
 #
 # Should we use geli(8) to encrypt the drives?
 # NB: Automatically enables ZFSBOOT_BOOT_POOL
 #
 : ${ZFSBOOT_GELI_ENCRYPTION=}
 
 #
 # Default path to the geli(8) keyfile used in drive encryption
 #
 : ${ZFSBOOT_GELI_KEY_FILE:=/boot/encryption.key}
 
 #
 # Create a separate boot pool?
 # NB: Automatically set when using geli(8) or MBR
 #
 : ${ZFSBOOT_BOOT_POOL=}
 
 #
 # Options to use when creating separate boot pool (if any)
 #
 : ${ZFSBOOT_BOOT_POOL_CREATE_OPTIONS:=}
 
 #
 # Default name for boot pool when enabled (e.g., geli(8) or MBR)
 #
 : ${ZFSBOOT_BOOT_POOL_NAME:=bootpool}
 
 #
 # Default size for boot pool when enabled (e.g., geli(8) or MBR)
 #
 : ${ZFSBOOT_BOOT_POOL_SIZE:=2g}
 
 #
 # Default disks to use (always empty unless being scripted)
 #
 : ${ZFSBOOT_DISKS:=}
 
 #
 # Default partitioning scheme to use on disks
 #
 : ${ZFSBOOT_PARTITION_SCHEME:=GPT}
 
 #
 # How much swap to put on each block device in the boot zpool
 # NOTE: Value passed to gpart(8); which supports SI unit suffixes.
 #
 : ${ZFSBOOT_SWAP_SIZE:=2g}
 
 #
 # Should we use geli(8) to encrypt the swap?
 #
 : ${ZFSBOOT_SWAP_ENCRYPTION=}
 
 #
 # Should we use gmirror(8) to mirror the swap?
 #
 : ${ZFSBOOT_SWAP_MIRROR=}
 
 #
 # Default ZFS datasets for root zpool
 #
 # NOTE: Requires /tmp, /var/tmp, /$ZFSBOOT_BOOTFS_NAME/$ZFSBOOT_BOOTFS_NAME
 # NOTE: Anything after pound/hash character [#] is ignored as a comment.
 #
 f_isset ZFSBOOT_DATASETS || ZFSBOOT_DATASETS="
 	# DATASET	OPTIONS (comma or space separated; or both)
 
 	# Boot Environment [BE] root and default boot dataset
 	/$ZFSBOOT_BEROOT_NAME				mountpoint=none
 	/$ZFSBOOT_BEROOT_NAME/$ZFSBOOT_BOOTFS_NAME	mountpoint=/
 
 	# Compress /tmp, allow exec but not setuid
 	/tmp		mountpoint=/tmp,exec=on,setuid=off
 
 	# Don't mount /usr so that 'base' files go to the BEROOT
 	/usr		mountpoint=/usr,canmount=off
 
 	# Home directories separated so they are common to all BEs
 	/usr/home	# NB: /home is a symlink to /usr/home
 
 	# Ports tree
 	/usr/ports	setuid=off
 
 	# Source tree (compressed)
 	/usr/src
 
 	# Create /var and friends
 	/var		mountpoint=/var,canmount=off
 	/var/crash	exec=off,setuid=off
 	/var/log	exec=off,setuid=off
 	/var/mail	atime=on
 	/var/tmp	setuid=off
 " # END-QUOTE
 
 #
 # If interactive and the user has not explicitly chosen a vdev type or disks,
 # make the user confirm scripted/default choices when proceeding to install.
 #
 : ${ZFSBOOT_CONFIRM_LAYOUT:=1}
 
 ############################################################ GLOBALS
 
 #
 # Format of a line in printf(1) syntax to add to fstab(5)
 #
 FSTAB_FMT="%s\t\t%s\t%s\t%s\t\t%s\t%s\n"
 
 #
 # Command strings for various tasks
 #
 CHMOD_MODE='chmod %s "%s"'
 DD_WITH_OPTIONS='dd if="%s" of="%s" %s'
 ECHO_APPEND='echo "%s" >> "%s"'
 GELI_ATTACH='geli attach -j - -k "%s" "%s"'
 GELI_DETACH_F='geli detach -f "%s"'
 GELI_PASSWORD_INIT='geli init -b -B "%s" -e %s -J - -K "%s" -l 256 -s 4096 "%s"'
 GPART_ADD='gpart add -t %s "%s"'
 GPART_ADD_INDEX='gpart add -i %s -t %s "%s"'
 GPART_ADD_INDEX_WITH_SIZE='gpart add -i %s -t %s -s %s "%s"'
 GPART_ADD_LABEL='gpart add -l %s -t %s "%s"'
 GPART_ADD_LABEL_WITH_SIZE='gpart add -l %s -t %s -s %s "%s"'
 GPART_BOOTCODE='gpart bootcode -b "%s" "%s"'
 GPART_BOOTCODE_PART='gpart bootcode -b "%s" -p "%s" -i %s "%s"'
 GPART_CREATE='gpart create -s %s "%s"'
 GPART_DESTROY_F='gpart destroy -F "%s"'
 GPART_SET_ACTIVE='gpart set -a active -i %s "%s"'
 GRAID_DELETE='graid delete "%s"'
 LN_SF='ln -sf "%s" "%s"'
 MKDIR_P='mkdir -p "%s"'
 MOUNT_TYPE='mount -t %s "%s" "%s"'
 PRINTF_CONF="printf '%s=\"%%s\"\\\n' %s >> \"%s\""
 PRINTF_FSTAB='printf "$FSTAB_FMT" "%s" "%s" "%s" "%s" "%s" "%s" >> "%s"'
 SHELL_TRUNCATE=':> "%s"'
 SWAP_GMIRROR_LABEL='gmirror label swap %s'
 SYSCTL_ZFS_MIN_ASHIFT_12='sysctl vfs.zfs.min_auto_ashift=12'
 UMOUNT='umount "%s"'
 ZFS_CREATE_WITH_OPTIONS='zfs create %s "%s"'
 ZFS_SET='zfs set "%s" "%s"'
 ZFS_UNMOUNT='zfs unmount "%s"'
 ZPOOL_CREATE_WITH_OPTIONS='zpool create %s "%s" %s %s'
 ZPOOL_DESTROY='zpool destroy "%s"'
 ZPOOL_EXPORT='zpool export "%s"'
 ZPOOL_IMPORT_WITH_OPTIONS='zpool import %s "%s"'
 ZPOOL_LABELCLEAR_F='zpool labelclear -f "%s"'
 ZPOOL_SET='zpool set %s "%s"'
 
 #
 # Strings that should be moved to an i18n file and loaded with f_include_lang()
 #
 hline_alnum_arrows_punc_tab_enter="Use alnum, arrows, punctuation, TAB or ENTER"
 hline_arrows_space_tab_enter="Use arrows, SPACE, TAB or ENTER"
 hline_arrows_tab_enter="Press arrows, TAB or ENTER"
 msg_an_unknown_error_occurred="An unknown error occurred"
 msg_back="Back"
 msg_cancel="Cancel"
 msg_change_selection="Change Selection"
 msg_configure_options="Configure Options:"
 msg_detailed_disk_info="gpart(8) show %s:\n%s\n\ncamcontrol(8) inquiry %s:\n%s\n\n\ncamcontrol(8) identify %s:\n%s\n"
 msg_disk_info="Disk Info"
 msg_disk_info_help="Get detailed information on disk device(s)"
 msg_disk_singular="disk"
 msg_disk_plural="disks"
 msg_encrypt_disks="Encrypt Disks?"
 msg_encrypt_disks_help="Use geli(8) to encrypt all data partitions"
 msg_error="Error"
 msg_force_4k_sectors="Force 4K Sectors?"
 msg_force_4k_sectors_help="Use sysctl(8) vfs.zfs.min_auto_ashift=12 to force 4K sectors"
 msg_freebsd_installer="FreeBSD Installer"
 msg_geli_password="Enter a strong passphrase, used to protect your encryption keys. You will be required to enter this passphrase each time the system is booted"
 msg_geli_setup="Initializing encryption on selected disks,\n this will take several seconds per disk"
 msg_install="Install"
 msg_install_desc="Proceed with Installation"
 msg_install_help="Create ZFS boot pool with displayed options"
 msg_invalid_boot_pool_size="Invalid boot pool size \`%s'"
 msg_invalid_disk_argument="Invalid disk argument \`%s'"
 msg_invalid_index_argument="Invalid index argument \`%s'"
 msg_invalid_swap_size="Invalid swap size \`%s'"
 msg_invalid_virtual_device_type="Invalid Virtual Device type \`%s'"
 msg_last_chance_are_you_sure="Last Chance! Are you sure you want to destroy\nthe current contents of the following disks:\n\n   %s"
 msg_last_chance_are_you_sure_color='\\ZrLast Chance!\\ZR Are you \\Z1sure\\Zn you want to \\Zr\\Z1destroy\\Zn\nthe current contents of the following disks:\n\n   %s'
 msg_mirror_desc="Mirror - n-Way Mirroring"
 msg_mirror_help="[2+ Disks] Mirroring provides the best performance, but the least storage"
 msg_missing_disk_arguments="missing disk arguments"
 msg_missing_one_or_more_scripted_disks="Missing one or more scripted disks!"
 msg_no="NO"
 msg_no_disks_present_to_configure="No disk(s) present to configure"
 msg_no_disks_selected="No disks selected."
 msg_not_enough_disks_selected="Not enough disks selected. (%u < %u minimum)"
 msg_null_disk_argument="NULL disk argument"
 msg_null_index_argument="NULL index argument"
 msg_null_poolname="NULL poolname"
 msg_ok="OK"
 msg_partition_scheme="Partition Scheme"
 msg_partition_scheme_help="Toggle between GPT and MBR partitioning schemes"
 msg_please_enter_a_name_for_your_zpool="Please enter a name for your zpool:"
 msg_please_enter_amount_of_swap_space="Please enter amount of swap space (SI-Unit suffixes\nrecommended; e.g., \`2g' for 2 Gigabytes):"
 msg_please_select_one_or_more_disks="Please select one or more disks to create a zpool:"
 msg_pool_name="Pool Name"
 msg_pool_name_cannot_be_empty="Pool name cannot be empty."
 msg_pool_name_help="Customize the name of the zpool to be created (Required)"
 msg_pool_type_disks="Pool Type/Disks:"
 msg_pool_type_disks_help="Choose type of ZFS Virtual Device and disks to use (Required)"
 msg_processing_selection="Processing selection..."
 msg_raidz1_desc="RAID-Z1 - Single Redundant RAID"
 msg_raidz1_help="[3+ Disks] Withstand failure of 1 disk. Recommended for: 3, 5 or 9 disks"
 msg_raidz2_desc="RAID-Z2 - Double Redundant RAID"
 msg_raidz2_help="[4+ Disks] Withstand failure of 2 disks. Recommended for: 4, 6 or 10 disks"
 msg_raidz3_desc="RAID-Z3 - Triple Redundant RAID"
 msg_raidz3_help="[5+ Disks] Withstand failure of 3 disks. Recommended for: 5, 7 or 11 disks"
 msg_rescan_devices="Rescan Devices"
 msg_rescan_devices_help="Scan for device changes"
 msg_select="Select"
 msg_select_a_disk_device="Select a disk device"
 msg_select_virtual_device_type="Select Virtual Device type:"
 msg_stripe_desc="Stripe - No Redundancy"
 msg_stripe_help="[1+ Disks] Striping provides maximum storage but no redundancy"
 msg_swap_encrypt="Encrypt Swap?"
 msg_swap_encrypt_help="Encrypt swap partitions with temporary keys, discarded on reboot"
 msg_swap_mirror="Mirror Swap?"
 msg_swap_mirror_help="Mirror swap partitions for redundancy, breaks crash dumps"
 msg_swap_size="Swap Size"
 msg_swap_size_help="Customize how much swap space is allocated to each selected disk"
 msg_these_disks_are_too_small="These disks are too small given the amount of requested\nswap (%s) and/or geli(8) (%s) partitions, which would\ntake 50%% or more of each of the following selected disk\ndevices (not recommended):\n\n  %s\n\nRecommend changing partition size(s) and/or selecting a\ndifferent set of devices."
 msg_uefi_not_supported="The FreeBSD UEFI loader does not currently support booting root-on-ZFS. Your system will need to boot in legacy (CSM) mode.\nDo you want to continue?"
 msg_unable_to_get_disk_capacity="Unable to get disk capacity of \`%s'"
 msg_unsupported_partition_scheme="%s is an unsupported partition scheme"
 msg_user_cancelled="User Cancelled."
 msg_yes="YES"
 msg_zfs_configuration="ZFS Configuration"
 
 ############################################################ FUNCTIONS
 
 # dialog_menu_main
 #
 # Display the dialog(1)-based application main menu.
 #
 dialog_menu_main()
 {
 	local title="$DIALOG_TITLE"
 	local btitle="$DIALOG_BACKTITLE"
 	local prompt="$msg_configure_options"
 	local force4k="$msg_no"
 	local usegeli="$msg_no"
 	local swapgeli="$msg_no"
 	local swapmirror="$msg_no"
 	[ "$ZFSBOOT_FORCE_4K_SECTORS" ] && force4k="$msg_yes"
 	[ "$ZFSBOOT_GELI_ENCRYPTION" ] && usegeli="$msg_yes"
 	[ "$ZFSBOOT_SWAP_ENCRYPTION" ] && swapgeli="$msg_yes"
 	[ "$ZFSBOOT_SWAP_MIRROR" ] && swapmirror="$msg_yes"
 	local disks n disks_grammar
 	f_count n $ZFSBOOT_DISKS
 	{ [ $n -eq 1 ] && disks_grammar=$msg_disk_singular; } ||
 		disks_grammar=$msg_disk_plural # grammar
 	local menu_list="
 		'>>> $msg_install'      '$msg_install_desc'
 		                        '$msg_install_help'
 		'T $msg_pool_type_disks'
 		                        '$ZFSBOOT_VDEV_TYPE: $n $disks_grammar'
 		                        '$msg_pool_type_disks_help'
 		'- $msg_rescan_devices' '*'
 		                        '$msg_rescan_devices_help'
 		'- $msg_disk_info'      '*'
 		                        '$msg_disk_info_help'
 		'N $msg_pool_name'      '$ZFSBOOT_POOL_NAME'
 		                        '$msg_pool_name_help'
 		'4 $msg_force_4k_sectors'
 		                        '$force4k'
 		                        '$msg_force_4k_sectors_help'
 		'E $msg_encrypt_disks'  '$usegeli'
 		                        '$msg_encrypt_disks_help'
 		'P $msg_partition_scheme'
 		                        '$ZFSBOOT_PARTITION_SCHEME'
 		                        '$msg_partition_scheme_help'
 		'S $msg_swap_size'      '$ZFSBOOT_SWAP_SIZE'
 		                        '$msg_swap_size_help'
 		'M $msg_swap_mirror'    '$swapmirror'
 		                        '$msg_swap_mirror_help'
 		'W $msg_swap_encrypt'   '$swapgeli'
 		                        '$msg_swap_encrypt_help'
 	" # END-QUOTE
 	local defaultitem= # Calculated below
 	local hline="$hline_alnum_arrows_punc_tab_enter"
 
 	local height width rows
 	eval f_dialog_menu_with_help_size height width rows \
 		\"\$title\" \"\$btitle\" \"\$prompt\" \"\$hline\" $menu_list
 
 	# Obtain default-item from previously stored selection
 	f_dialog_default_fetch defaultitem
 
 	local menu_choice
 	menu_choice=$( eval $DIALOG \
 		--title \"\$title\"              \
 		--backtitle \"\$btitle\"         \
 		--hline \"\$hline\"              \
 		--item-help                      \
 		--ok-label \"\$msg_select\"      \
 		--cancel-label \"\$msg_cancel\"  \
 		--default-item \"\$defaultitem\" \
 		--menu \"\$prompt\"              \
 		$height $width $rows             \
 		$menu_list                       \
 		2>&1 >&$DIALOG_TERMINAL_PASSTHRU_FD
 	)
 	local retval=$?
 	f_dialog_data_sanitize menu_choice
 	f_dialog_menutag_store "$menu_choice"
 
 	# Only update default-item on success
 	[ $retval -eq $DIALOG_OK ] && f_dialog_default_store "$menu_choice"
 
 	return $retval
 }
 
 # dialog_last_chance $disks ...
 #
 # Display a list of the disks that the user is about to destroy. The default
 # action is to return error status unless the user explicitly (non-default)
 # selects "Yes" from the noyes dialog.
 #
 dialog_last_chance()
 {
 	local title="$DIALOG_TITLE"
 	local btitle="$DIALOG_BACKTITLE"
 	local prompt # Calculated below
 	local hline="$hline_arrows_tab_enter"
 
 	local height=8 width=50 prefix="   "
 	local plen=${#prefix} list= line=
 	local max_width=$(( $width - 3 - $plen ))
 
 	local yes no defaultno extra_args format
 	if [ "$USE_XDIALOG" ]; then
 		yes=ok no=cancel defaultno=default-no
 		extra_args="--wrap --left"
 		format="$msg_last_chance_are_you_sure"
 	else
 		yes=yes no=no defaultno=defaultno
 		extra_args="--colors --cr-wrap"
 		format="$msg_last_chance_are_you_sure_color"
 	fi
 
 	local disk line_width
 	for disk in $*; do
 		if [ "$line" ]; then
 			line_width=${#line}
 		else
 			line_width=$plen
 		fi
 		line_width=$(( $line_width + 1 + ${#disk} ))
 		# Add newline before disk if it would exceed max_width
 		if [ $line_width -gt $max_width ]; then
 			list="$list$line\n"
 			line="$prefix"
 			height=$(( $height + 1 ))
 		fi
 		# Add the disk to the list
 		line="$line $disk"
 	done
 	# Append the left-overs
 	if [ "${line#$prefix}" ]; then
 		list="$list$line"
 		height=$(( $height + 1 ))
 	fi
 
 	# Add height for Xdialog(1)
 	[ "$USE_XDIALOG" ] && height=$(( $height + $height / 5 + 3 ))
 
 	prompt=$( printf "$format" "$list" )
 	f_dprintf "%s: Last Chance!" "$0"
 	$DIALOG \
 		--title "$title"        \
 		--backtitle "$btitle"   \
 		--hline "$hline"        \
 		--$defaultno            \
 		--$yes-label "$msg_yes" \
 		--$no-label "$msg_no"   \
 		$extra_args             \
 		--yesno "$prompt" $height $width
 }
 
 # dialog_menu_layout
 #
 # Configure Virtual Device type and disks to use for the ZFS boot pool. User
 # must select enough disks to satisfy the chosen vdev type.
 #
 dialog_menu_layout()
 {
 	local funcname=dialog_menu_layout
 	local title="$DIALOG_TITLE"
 	local btitle="$DIALOG_BACKTITLE"
 	local vdev_prompt="$msg_select_virtual_device_type"
 	local disk_prompt="$msg_please_select_one_or_more_disks"
 	local vdev_menu_list="
 		'stripe' '$msg_stripe_desc' '$msg_stripe_help'
 		'mirror' '$msg_mirror_desc' '$msg_mirror_help'
 		'raidz1' '$msg_raidz1_desc' '$msg_raidz1_help'
 		'raidz2' '$msg_raidz2_desc' '$msg_raidz2_help'
 		'raidz3' '$msg_raidz3_desc' '$msg_raidz3_help'
 	" # END-QUOTE
 	local disk_check_list= # Calculated below
 	local vdev_hline="$hline_arrows_tab_enter"
 	local disk_hline="$hline_arrows_space_tab_enter"
 
 	# Warn the user if vdev type is not valid
 	case "$ZFSBOOT_VDEV_TYPE" in
 	stripe|mirror|raidz1|raidz2|raidz3) : known good ;;
 	*)
 		f_dprintf "%s: Invalid virtual device type \`%s'" \
 			  $funcname "$ZFSBOOT_VDEV_TYPE"
 		f_show_err "$msg_invalid_virtual_device_type" \
 			   "$ZFSBOOT_VDEV_TYPE"
 		f_interactive || return $FAILURE
 	esac
 
 	# Calculate size of vdev menu once only
 	local vheight vwidth vrows
 	eval f_dialog_menu_with_help_size vheight vwidth vrows \
 		\"\$title\" \"\$btitle\" \"\$vdev_prompt\" \"\$vdev_hline\" \
 		$vdev_menu_list
 
 	# Get a list of probed disk devices
 	local disks=
 	debug= f_device_find "" $DEVICE_TYPE_DISK disks
 
 	# Prune out mounted md(4) devices that may be part of the boot process
 	local disk name new_list=
 	for disk in $disks; do
 		debug= $disk get name name
 		case "$name" in
 		md[0-9]*) f_mounted -b "/dev/$name" && continue ;;
 		esac
 		new_list="$new_list $disk"
 	done
 	disks="${new_list# }"
 
 	# Debugging
 	if [ "$debug" ]; then
 		local disk_names=
 		for disk in $disks; do
 			debug= $disk get name name
 			disk_names="$disk_names $name"
 		done
 		f_dprintf "$funcname: disks=[%s]" "${disk_names# }"
 	fi
 
 	if [ ! "$disks" ]; then
 		f_dprintf "No disk(s) present to configure"
 		f_show_err "$msg_no_disks_present_to_configure"
 		return $FAILURE
 	fi
 
 	# Lets sort the disks array to be more user friendly
 	f_device_sort_by name disks disks
 
 	#
 	# Operate in a loop so we can (if interactive) repeat if not enough
 	# disks are selected to satisfy the chosen vdev type or user wants to
 	# back-up to the previous menu.
 	#
 	local vardisk ndisks onoff selections vdev_choice breakout device
 	local valid_disks all_valid want_disks desc height width rows
 	while :; do
 		#
 		# Confirm the vdev type that was selected
 		#
 		if f_interactive && [ "$ZFSBOOT_CONFIRM_LAYOUT" ]; then
 			vdev_choice=$( eval $DIALOG \
 				--title \"\$title\"              \
 				--backtitle \"\$btitle\"         \
 				--hline \"\$vdev_hline\"         \
 				--ok-label \"\$msg_ok\"          \
 				--cancel-label \"\$msg_cancel\"  \
 				--item-help                      \
 				--default-item \"\$ZFSBOOT_VDEV_TYPE\" \
 				--menu \"\$vdev_prompt\"         \
 				$vheight $vwidth $vrows          \
 				$vdev_menu_list                  \
 				2>&1 >&$DIALOG_TERMINAL_PASSTHRU_FD
 			) || return $?
 				# Exit if user pressed ESC or chose Cancel/No
 			f_dialog_data_sanitize vdev_choice
 
 			ZFSBOOT_VDEV_TYPE="$vdev_choice"
 			f_dprintf "$funcname: ZFSBOOT_VDEV_TYPE=[%s]" \
 			          "$ZFSBOOT_VDEV_TYPE"
 		fi
 
 		# Determine the number of disks needed for this vdev type
 		want_disks=0
 		case "$ZFSBOOT_VDEV_TYPE" in
 		stripe) want_disks=1 ;;
 		mirror) want_disks=2 ;;
 		raidz1) want_disks=3 ;;
 		raidz2) want_disks=4 ;;
 		raidz3) want_disks=5 ;;
 		esac
 
 		#
 		# Warn the user if any scripted disks are invalid
 		#
 		valid_disks= all_valid=${ZFSBOOT_DISKS:+1} # optimism
 		for disk in $ZFSBOOT_DISKS; do
 			if debug= f_device_find -1 \
 				$disk $DEVICE_TYPE_DISK device
 			then
 				valid_disks="$valid_disks $disk"
 				continue
 			fi
 			f_dprintf "$funcname: \`%s' is not a real disk" "$disk"
 			all_valid=
 		done
 		if [ ! "$all_valid" ]; then
 			if [ "$ZFSBOOT_DISKS" ]; then
 				f_show_err \
 				    "$msg_missing_one_or_more_scripted_disks"
 			else
 				f_dprintf "No disks selected."
 				f_interactive ||
 					f_show_err "$msg_no_disks_selected"
 			fi
 			f_interactive || return $FAILURE
 		fi
 		ZFSBOOT_DISKS="${valid_disks# }"
 
 		#
 		# Short-circuit if we're running non-interactively
 		#
 		if ! f_interactive || [ ! "$ZFSBOOT_CONFIRM_LAYOUT" ]; then
 			f_count ndisks $ZFSBOOT_DISKS
 			[ $ndisks -ge $want_disks ] && break # to success
 
 			# Not enough disks selected
 			f_dprintf "$funcname: %s: %s (%u < %u minimum)" \
 				  "$ZFSBOOT_VDEV_TYPE" \
 			          "Not enough disks selected." \
 				  $ndisks $want_disks
 			f_interactive || return $FAILURE
 			msg_yes="$msg_change_selection" msg_no="$msg_cancel" \
 				f_yesno "%s: $msg_not_enough_disks_selected" \
 				"$ZFSBOOT_VDEV_TYPE" $ndisks $want_disks ||
 				return $FAILURE
 		fi
 
 		#
 		# Confirm the disks that were selected
 		# Loop until the user cancels or selects enough disks
 		#
 		breakout=
 		while :; do
 			# Loop over list of available disks, resetting state
 			for disk in $disks; do
 				f_isset _${disk}_status && _${disk}_status=
 			done
 
 			# Loop over list of selected disks and create temporary
 			# locals to map statuses onto up-to-date list of disks
 			for disk in $ZFSBOOT_DISKS; do
 				debug= f_device_find -1 \
 					$disk $DEVICE_TYPE_DISK disk
 				f_isset _${disk}_status ||
 					local _${disk}_status
 				_${disk}_status=on
 			done
 
 			# Create the checklist menu of discovered disk devices
 			disk_check_list=
 			for disk in $disks; do
 				desc=
 				$disk get name name
 				$disk get desc desc
 				f_shell_escape "$desc" desc
 				f_getvar _${disk}_status:-off onoff
 				disk_check_list="$disk_check_list
 					$name '$desc' $onoff"
 			done
 
 			eval f_dialog_checklist_size height width rows \
 				\"\$title\" \"\$btitle\" \"\$prompt\" \
 				\"\$hline\" $disk_check_list
 
 			selections=$( eval $DIALOG \
 				--title \"\$DIALOG_TITLE\"         \
 				--backtitle \"\$DIALOG_BACKTITLE\" \
 				--separate-output                  \
 				--hline \"\$hline\"                \
 				--ok-label \"\$msg_ok\"            \
 				--cancel-label \"\$msg_back\"      \
 				--checklist \"\$prompt\"           \
 				$height $width $rows               \
 				$disk_check_list                   \
 				2>&1 >&$DIALOG_TERMINAL_PASSTHRU_FD
 			) || break
 				# Loop if user pressed ESC or chose Cancel/No
 			f_dialog_data_sanitize selections
 
 			ZFSBOOT_DISKS="$selections"
 			f_dprintf "$funcname: ZFSBOOT_DISKS=[%s]" \
 			          "$ZFSBOOT_DISKS"
 
 			f_count ndisks $ZFSBOOT_DISKS
 			[ $ndisks -ge $want_disks ] &&
 				breakout=break && break
 
 			# Not enough disks selected
 			f_dprintf "$funcname: %s: %s (%u < %u minimum)" \
 				  "$ZFSBOOT_VDEV_TYPE" \
 			          "Not enough disks selected." \
 			          $ndisks $want_disks
 			msg_yes="$msg_change_selection" msg_no="$msg_cancel" \
 				f_yesno "%s: $msg_not_enough_disks_selected" \
 				"$ZFSBOOT_VDEV_TYPE" $ndisks $want_disks ||
 				break
 		done
 		[ "$breakout" = "break" ] && break
 		[ "$ZFSBOOT_CONFIRM_LAYOUT" ] || return $FAILURE
 	done
 
 	return $DIALOG_OK
 }
 
 # dialog_uefi_prompt
 #
 # Confirm that the user wants to continue with the installation on a BIOS
 # system when they have booted with UEFI
 #
 dialog_uefi_prompt()
 {
 	local title="$DIALOG_TITLE"
 	local btitle="$DIALOG_BACKTITLE"
 	local prompt # Calculated below
 	local hline="$hline_arrows_tab_enter"
 
 	local height=8 width=50 prefix="   "
 	local plen=${#prefix} list= line=
 	local max_width=$(( $width - 3 - $plen ))
 
 	local yes no defaultno extra_args format
 	if [ "$USE_XDIALOG" ]; then
 		yes=ok no=cancel defaultno=default-no
 		extra_args="--wrap --left"
 		format="$msg_uefi_not_supported"
 	else
 		yes=yes no=no defaultno=defaultno
 		extra_args="--cr-wrap"
 		format="$msg_uefi_not_supported"
 	fi
 
 	# Add height for Xdialog(1)
 	[ "$USE_XDIALOG" ] && height=$(( $height + $height / 5 + 3 ))
 
 	prompt=$( printf "$format" )
 	f_dprintf "%s: UEFI prompt" "$0"
 	$DIALOG \
 		--title "$title"        \
 		--backtitle "$btitle"   \
 		--hline "$hline"        \
 		--$yes-label "$msg_yes" \
 		--$no-label "$msg_no"   \
 		$extra_args             \
 		--yesno "$prompt" $height $width
 }
 
 # zfs_create_diskpart $disk $index
 #
 # For each block device to be used in the zpool, rather than just create the
 # zpool with the raw block devices (e.g., da0, da1, etc.) we create partitions
 # so we can have some real swap. This also provides wiggle room incase your
 # replacement drivers do not have the exact same sector counts.
 #
 # NOTE: $swapsize and $bootsize should be defined by the calling function.
 # NOTE: Sets $bootpart and $targetpart for the calling function.
 #
 zfs_create_diskpart()
 {
 	local funcname=zfs_create_diskpart
 	local disk="$1" index="$2"
 
 	# Check arguments
 	if [ ! "$disk" ]; then
 		f_dprintf "$funcname: NULL disk argument"
 		msg_error="$msg_error: $funcname" \
 			f_show_err "$msg_null_disk_argument"
 		return $FAILURE
 	fi
 	if [ "${disk#*[$IFS]}" != "$disk" ]; then
 		f_dprintf "$funcname: Invalid disk argument \`%s'" "$disk"
 		msg_error="$msg_error: $funcname" \
 			f_show_err "$msg_invalid_disk_argument" "$disk"
 		return $FAILURE
 	fi
 	if [ ! "$index" ]; then
 		f_dprintf "$funcname: NULL index argument"
 		msg_error="$msg_error: $funcname" \
 			f_show_err "$msg_null_index_argument"
 		return $FAILURE
 	fi
 	if ! f_isinteger "$index"; then
 		f_dprintf "$funcname: Invalid index argument \`%s'" "$index"
 		msg_error="$msg_error: $funcname" \
 			f_show_err "$msg_invalid_index_argument" "$index"
 		return $FAILURE
 	fi
 	f_dprintf "$funcname: disk=[%s] index=[%s]" "$disk" "$index"
 
 	# Check for unknown partition scheme before proceeding further
 	case "$ZFSBOOT_PARTITION_SCHEME" in
 	""|MBR|GPT) : known good ;;
 	*)
 		f_dprintf "$funcname: %s is an unsupported partition scheme" \
 		          "$ZFSBOOT_PARTITION_SCHEME"
 		msg_error="$msg_error: $funcname" f_show_err \
 			"$msg_unsupported_partition_scheme" \
 			"$ZFSBOOT_PARTITION_SCHEME"
 		return $FAILURE
 	esac
 
 	#
 	# Destroy whatever partition layout is currently on disk.
 	# NOTE: `-F' required to destroy if partitions still exist.
 	# NOTE: Failure is ok here, blank disk will have nothing to destroy.
 	#
 	f_dprintf "$funcname: Destroying all data/layouts on \`%s'..." "$disk"
 	f_eval_catch -d $funcname gpart "$GPART_DESTROY_F" $disk
 	f_eval_catch -d $funcname graid "$GRAID_DELETE" $disk
 	f_eval_catch -d $funcname zpool "$ZPOOL_LABELCLEAR_F" /dev/$disk
 
 	# Make doubly-sure backup GPT is destroyed
 	f_eval_catch -d $funcname gpart "$GPART_CREATE" gpt $disk
 	f_eval_catch -d $funcname gpart "$GPART_DESTROY_F" $disk
 
 	#
 	# Enable boot pool if encryption is desired
 	#
 	[ "$ZFSBOOT_GELI_ENCRYPTION" ] && ZFSBOOT_BOOT_POOL=1
 
 	#
 	# Lay down the desired type of partition scheme
 	#
 	local setsize mbrindex
 	case "$ZFSBOOT_PARTITION_SCHEME" in
 	""|GPT) f_dprintf "$funcname: Creating GPT layout..."
 		#
 		# 1. Create GPT layout using labels
 		#
 		f_eval_catch $funcname gpart "$GPART_CREATE" gpt $disk ||
 		             return $FAILURE
 
 		#
 		# 2. Add small freebsd-boot partition labeled `boot#'
 		#
 		f_eval_catch $funcname gpart "$GPART_ADD_LABEL_WITH_SIZE" \
 		             gptboot$index freebsd-boot 512k $disk ||
 		             return $FAILURE
 		f_eval_catch $funcname gpart "$GPART_BOOTCODE_PART" \
 		             /boot/pmbr /boot/gptzfsboot 1 $disk ||
 		             return $FAILURE
 
 		# NB: zpool will use the `zfs#' GPT labels
 		bootpart=p2 swappart=p2 targetpart=p2
 		[ ${swapsize:-0} -gt 0 ] && targetpart=p3
 
 		#
 		# Prepare boot pool if enabled (e.g., for geli(8))
 		#
 		if [ "$ZFSBOOT_BOOT_POOL" ]; then
 			bootpart=p2 swappart=p3 targetpart=p3
 			[ ${swapsize:-0} -gt 0 ] && targetpart=p4
 			f_eval_catch $funcname gpart \
 			             "$GPART_ADD_LABEL_WITH_SIZE" boot$index \
 			             freebsd-zfs ${bootsize}b $disk ||
 			             return $FAILURE
 			# Pedantically nuke any old labels
 			f_eval_catch -d $funcname zpool "$ZPOOL_LABELCLEAR_F" \
 			                /dev/$disk$bootpart
 			if [ "$ZFSBOOT_GELI_ENCRYPTION" ]; then
 				# Pedantically detach targetpart for later
 				f_eval_catch -d $funcname geli \
 				                "$GELI_DETACH_F" \
 				                /dev/$disk$targetpart
 			fi
 		fi
 
 		#
 		# 3. Add freebsd-swap partition labeled `swap#'
 		#
 		if [ ${swapsize:-0} -gt 0 ]; then
 			f_eval_catch $funcname gpart \
 			             "$GPART_ADD_LABEL_WITH_SIZE" swap$index \
 			             freebsd-swap ${swapsize}b $disk ||
 			             return $FAILURE
 			# Pedantically nuke any old labels on the swap
 			f_eval_catch -d $funcname zpool "$ZPOOL_LABELCLEAR_F" \
 			                /dev/$disk$swappart
 		fi
 
 		#
 		# 4. Add freebsd-zfs partition labeled `zfs#' for zroot
 		#
 		f_eval_catch $funcname gpart "$GPART_ADD_LABEL" \
 		             zfs$index freebsd-zfs $disk || return $FAILURE
 		f_eval_catch -d $funcname zpool "$ZPOOL_LABELCLEAR_F" \
 		                /dev/$disk$targetpart
 		;;
 
 	MBR) f_dprintf "$funcname: Creating MBR layout..."
 		#
 		# 1. Create MBR layout (no labels)
 		#
 		f_eval_catch $funcname gpart "$GPART_CREATE" mbr $disk ||
 		             return $FAILURE
 		f_eval_catch $funcname gpart "$GPART_BOOTCODE" /boot/mbr \
 		             $disk || return $FAILURE
 
 		#
 		# 2. Add freebsd slice with all available space
 		#
 		f_eval_catch $funcname gpart "$GPART_ADD" freebsd $disk ||
 		             return $FAILURE
 		f_eval_catch $funcname gpart "$GPART_SET_ACTIVE" 1 $disk ||
 		             return $FAILURE
 		# Pedantically nuke any old labels
 		f_eval_catch -d $funcname zpool "$ZPOOL_LABELCLEAR_F" \
 		                /dev/${disk}s1
 		# Pedantically nuke any old scheme
 		f_eval_catch -d $funcname gpart "$GPART_DESTROY_F" ${disk}s1
 
 		#
 		# 3. Write BSD scheme to the freebsd slice
 		#
 		f_eval_catch $funcname gpart "$GPART_CREATE" BSD ${disk}s1 ||
 		             return $FAILURE
 
 		# NB: zpool will use s1a (no labels)
 		bootpart=s1a swappart=s1b targetpart=s1d mbrindex=4
 
 		#
 		# Always prepare a boot pool on MBR
 		#
 		ZFSBOOT_BOOT_POOL=1
 		f_eval_catch $funcname gpart \
 		             "$GPART_ADD_INDEX_WITH_SIZE" \
 		             1 freebsd-zfs ${bootsize}b ${disk}s1 ||
 		             return $FAILURE
 		# Pedantically nuke any old labels
 		f_eval_catch -d $funcname zpool "$ZPOOL_LABELCLEAR_F" \
 		                /dev/$disk$bootpart
 		if [ "$ZFSBOOT_GELI_ENCRYPTION" ]; then
 			# Pedantically detach targetpart for later
 			f_eval_catch -d $funcname geli \
 			                "$GELI_DETACH_F" \
 					/dev/$disk$targetpart
 		fi
 
 		#
 		# 4. Add freebsd-swap partition
 		#
 		if [ ${swapsize:-0} -gt 0 ]; then
 			f_eval_catch $funcname gpart \
 			             "$GPART_ADD_INDEX_WITH_SIZE" 2 \
 			             freebsd-swap ${swapsize}b ${disk}s1 ||
 			             return $FAILURE
 			# Pedantically nuke any old labels on the swap
 			f_eval_catch -d $funcname zpool "$ZPOOL_LABELCLEAR_F" \
 			                /dev/${disk}s1b
 		fi
 
 		#
 		# 5. Add freebsd-zfs partition for zroot
 		#
 		f_eval_catch $funcname gpart "$GPART_ADD_INDEX" \
 		             $mbrindex freebsd-zfs ${disk}s1 || return $FAILURE
 		f_eval_catch -d $funcname zpool "$ZPOOL_LABELCLEAR_F" \
 		                /dev/$disk$targetpart # Pedantic
 		f_eval_catch $funcname dd "$DD_WITH_OPTIONS" \
 		             /boot/zfsboot /dev/${disk}s1 count=1 ||
 		             return $FAILURE
 		;;
 
 	esac # $ZFSBOOT_PARTITION_SCHEME
 
 	# Update fstab(5)
 	if [ "$isswapmirror" ]; then
 		# This is not the first disk in the mirror, do nothing
 	elif [ "$ZFSBOOT_SWAP_ENCRYPTION" -a "$ZFSBOOT_SWAP_MIRROR" ]; then
 		f_eval_catch $funcname printf "$PRINTF_FSTAB" \
 		             /dev/mirror/swap.eli none swap sw 0 0 \
 		             $BSDINSTALL_TMPETC/fstab ||
 		             return $FAILURE
 		isswapmirror=1
 	elif [ "$ZFSBOOT_SWAP_MIRROR" ]; then
 		f_eval_catch $funcname printf "$PRINTF_FSTAB" \
 		             /dev/mirror/swap none swap sw 0 0 \
 		             $BSDINSTALL_TMPETC/fstab ||
 		             return $FAILURE
 		isswapmirror=1
 	elif [ "$ZFSBOOT_SWAP_ENCRYPTION" ]; then
 		f_eval_catch $funcname printf "$PRINTF_FSTAB" \
 		             /dev/$disk${swappart}.eli none swap sw 0 0 \
 		             $BSDINSTALL_TMPETC/fstab ||
 		             return $FAILURE
 	else
 		f_eval_catch $funcname printf "$PRINTF_FSTAB" \
 		             /dev/$disk$swappart none swap sw 0 0 \
 		             $BSDINSTALL_TMPETC/fstab ||
 		             return $FAILURE
 	fi
 
 	return $SUCCESS
 }
 
 # zfs_create_boot $poolname $vdev_type $disks ...
 #
 # Creates boot pool and dataset layout. Returns error if something goes wrong.
 # Errors are printed to stderr for collection and display.
 #
 zfs_create_boot()
 {
 	local funcname=zfs_create_boot
 	local zroot_name="$1"
 	local zroot_vdevtype="$2"
 	local zroot_vdevs= # Calculated below
 	local swap_devs= # Calculated below
 	local boot_vdevs= # Used for geli(8) and/or MBR layouts
 	shift 2 # poolname vdev_type
 	local disks="$*" disk
 	local isswapmirror
 	local bootpart targetpart swappart # Set by zfs_create_diskpart() below
 	local create_options
 
 	#
 	# Pedantic checks; should never be seen
 	#
 	if [ ! "$zroot_name" ]; then
 		f_dprintf "$funcname: NULL poolname"
 		msg_error="$msg_error: $funcname" \
 			f_show_err "$msg_null_poolname"
 		return $FAILURE
 	fi
 	if [ $# -lt 1 ]; then
 		f_dprintf "$funcname: missing disk arguments"
 		msg_error="$msg_error: $funcname" \
 			f_show_err "$msg_missing_disk_arguments"
 		return $FAILURE
 	fi
 	f_dprintf "$funcname: poolname=[%s] vdev_type=[%s]" \
 	          "$zroot_name" "$zroot_vdevtype"
 
 	#
 	# Initialize fstab(5)
 	#
 	f_dprintf "$funcname: Initializing temporary fstab(5) file..."
 	f_eval_catch $funcname sh "$SHELL_TRUNCATE" $BSDINSTALL_TMPETC/fstab ||
 	             return $FAILURE
 	f_eval_catch $funcname printf "$PRINTF_FSTAB" \
 	             "# Device" Mountpoint FStype Options Dump "Pass#" \
 	             $BSDINSTALL_TMPETC/fstab || return $FAILURE
 
 	#
 	# Expand SI units in desired sizes
 	#
 	f_dprintf "$funcname: Expanding supplied size values..."
 	local swapsize bootsize
 	if ! f_expand_number "$ZFSBOOT_SWAP_SIZE" swapsize; then
 		f_dprintf "$funcname: Invalid swap size \`%s'" \
 		          "$ZFSBOOT_SWAP_SIZE"
 		f_show_err "$msg_invalid_swap_size" "$ZFSBOOT_SWAP_SIZE"
 		return $FAILURE
 	fi
 	if ! f_expand_number "$ZFSBOOT_BOOT_POOL_SIZE" bootsize; then
 		f_dprintf "$funcname: Invalid boot pool size \`%s'" \
 		          "$ZFSBOOT_BOOT_POOL_SIZE"
 		f_show_err "$msg_invalid_boot_pool_size" \
 		           "$ZFSBOOT_BOOT_POOL_SIZE"
 		return $FAILURE
 	fi
 	f_dprintf "$funcname: ZFSBOOT_SWAP_SIZE=[%s] swapsize=[%s]" \
 	          "$ZFSBOOT_SWAP_SIZE" "$swapsize"
 	f_dprintf "$funcname: ZFSBOOT_BOOT_POOL_SIZE=[%s] bootsize=[%s]" \
 	          "$ZFSBOOT_BOOT_POOL_SIZE" "$bootsize"
 
 	#
 	# Destroy the pool in-case this is our second time 'round (case of
 	# failure and installer presented ``Retry'' option to come back).
 	#
 	# NB: If we don't destroy the pool, later gpart(8) destroy commands
 	# that try to clear existing partitions (see zfs_create_diskpart())
 	# will fail with a `Device Busy' error, leading to `GEOM exists'.
 	#
 	f_eval_catch -d $funcname zpool "$ZPOOL_DESTROY" "$zroot_name"
 
 	#
 	# Prepare the disks and build pool device list(s)
 	#
 	f_dprintf "$funcname: Preparing disk partitions for ZFS pool..."
 
 	# Force 4K sectors using vfs.zfs.min_auto_ashift=12
 	if [ "$ZFSBOOT_FORCE_4K_SECTORS" ]; then
 		f_dprintf "$funcname: With 4K sectors..."
 		f_eval_catch $funcname sysctl "$SYSCTL_ZFS_MIN_ASHIFT_12" \
 		    || return $FAILURE
 	fi
 	local n=0
 	for disk in $disks; do
 		zfs_create_diskpart $disk $n || return $FAILURE
 		# Now $bootpart, $targetpart, and $swappart are set (suffix
 		# for $disk)
 		if [ "$ZFSBOOT_BOOT_POOL" ]; then
 			boot_vdevs="$boot_vdevs $disk$bootpart"
 		fi
 		zroot_vdevs="$zroot_vdevs $disk$targetpart"
 		if [ "$ZFSBOOT_GELI_ENCRYPTION" ]; then
 			zroot_vdevs="$zroot_vdevs.eli"
 		fi
 
 		n=$(( $n + 1 ))
 	done # disks
 
 	#
 	# If we need/want a boot pool, create it
 	#
 	if [ "$ZFSBOOT_BOOT_POOL" ]; then
 		local bootpool_vdevtype= # Calculated below
 		local bootpool_options= # Calculated below
 		local bootpool_name="$ZFSBOOT_BOOT_POOL_NAME"
 		local bootpool="$BSDINSTALL_CHROOT/$bootpool_name"
 		local zroot_key="${ZFSBOOT_GELI_KEY_FILE#/}"
 
 		f_dprintf "$funcname: Setting up boot pool..."
 		[ "$ZFSBOOT_GELI_ENCRYPTION" ] &&
 			f_dprintf "$funcname: For encrypted root disk..."
 
 		# Create parent directory for boot pool
 		f_eval_catch -d $funcname umount "$UMOUNT" /mnt
 		f_eval_catch $funcname mount "$MOUNT_TYPE" tmpfs none \
 		             $BSDINSTALL_CHROOT || return $FAILURE
 
 		# Create mirror across the boot partition on all disks
 		local nvdevs
 		f_count nvdevs $boot_vdevs
 		[ $nvdevs -gt 1 ] && bootpool_vdevtype=mirror
 
 		create_options="$ZFSBOOT_BOOT_POOL_CREATE_OPTIONS"
 		bootpool_options="-o altroot=$BSDINSTALL_CHROOT"
 		bootpool_options="$bootpool_options $create_options"
 		bootpool_options="$bootpool_options -m \"/$bootpool_name\" -f"
 		f_eval_catch $funcname zpool "$ZPOOL_CREATE_WITH_OPTIONS" \
 		             "$bootpool_options" "$bootpool_name" \
 		             "$bootpool_vdevtype" "$boot_vdevs" ||
 		             return $FAILURE
 
 		f_eval_catch $funcname mkdir "$MKDIR_P" "$bootpool/boot" ||
 		             return $FAILURE
 
 		if [ "$ZFSBOOT_GELI_ENCRYPTION" ]; then
 			# Generate an encryption key using random(4)
 			f_eval_catch $funcname dd "$DD_WITH_OPTIONS" \
 			             /dev/random "$bootpool/$zroot_key" \
 			             "bs=4096 count=1" || return $FAILURE
+			f_eval_catch $funcname chmod "$CHMOD_MODE" \
+			             go-wrx "$bootpool/$zroot_key" ||
+			             return $FAILURE
 		else
 			# Clean up
 			f_eval_catch $funcname zfs "$ZFS_UNMOUNT" \
 			             "$bootpool_name" || return $FAILURE
 			f_eval_catch -d $funcname umount "$UMOUNT" /mnt # tmpfs
 		fi
 
 	fi
 
 	#
 	# Create the geli(8) GEOMS
 	#
 	if [ "$ZFSBOOT_GELI_ENCRYPTION" ]; then
 		# Prompt user for password (twice)
 		if ! msg_enter_new_password="$msg_geli_password" \
 			f_dialog_input_password
 		then
 			f_dprintf "$funcname: User cancelled"
 			f_show_err "$msg_user_cancelled"
 			return $FAILURE
 		fi
 
 		# Initialize geli(8) on each of the target partitions
 		for disk in $disks; do
 			f_dialog_info "$msg_geli_setup" \
 				2>&1 >&$DIALOG_TERMINAL_PASSTHRU_FD
 			if ! echo "$pw_password" | f_eval_catch \
 				$funcname geli "$GELI_PASSWORD_INIT" \
 				"$bootpool/boot/$disk$targetpart.eli" \
 				AES-XTS "$bootpool/$zroot_key" \
 				$disk$targetpart
 			then
 				f_interactive || f_die
 				unset pw_password # Sensitive info
 				return $FAILURE
 			fi
 			if ! echo "$pw_password" | f_eval_catch \
 				$funcname geli "$GELI_ATTACH" \
 				"$bootpool/$zroot_key" $disk$targetpart
 			then
 				f_interactive || f_die
 				unset pw_password # Sensitive info
 				return $FAILURE
 			fi
 		done
 		unset pw_password # Sensitive info
 
 		# Clean up
 		f_eval_catch $funcname zfs "$ZFS_UNMOUNT" "$bootpool_name" ||
 			return $FAILURE
 		f_eval_catch -d $funcname umount "$UMOUNT" /mnt # tmpfs
 	fi
 
 	#
 	# Create the gmirror(8) GEOMS for swap
 	#
 	if [ "$ZFSBOOT_SWAP_MIRROR" ]; then
 		for disk in $disks; do
 			swap_devs="$swap_devs $disk$swappart"
 		done
 		f_eval_catch $funcname gmirror "$SWAP_GMIRROR_LABEL" \
 			"$swap_devs" || return $FAILURE
 	fi
 
 	#
 	# Create the ZFS root pool with desired type and disk devices
 	#
 	f_dprintf "$funcname: Creating root pool..."
 	create_options="$ZFSBOOT_POOL_CREATE_OPTIONS"
 	f_eval_catch $funcname zpool "$ZPOOL_CREATE_WITH_OPTIONS" \
 		"-o altroot=$BSDINSTALL_CHROOT $create_options -m none -f" \
 		"$zroot_name" "$zroot_vdevtype" "$zroot_vdevs" ||
 		return $FAILURE
 
 	#
 	# Create ZFS dataset layout within the new root pool
 	#
 	f_dprintf "$funcname: Creating ZFS datasets..."
 	echo "$ZFSBOOT_DATASETS" | while read dataset options; do
 		# Skip blank lines and comments
 		case "$dataset" in "#"*|"") continue; esac
 		# Remove potential inline comments in options
 		options="${options%%#*}"
 		# Replace tabs with spaces
 		f_replaceall "$options" "	" " " options
 		# Reduce contiguous runs of space to one single space
 		oldoptions=
 		while [ "$oldoptions" != "$options" ]; do
 			oldoptions="$options"
 			f_replaceall "$options" "  " " " options
 		done
 		# Replace both commas and spaces with ` -o '
 		f_replaceall "$options" "[ ,]" " -o " options
 		# Create the dataset with desired options
 		f_eval_catch $funcname zfs "$ZFS_CREATE_WITH_OPTIONS" \
 		             "${options:+-o $options}" "$zroot_name$dataset" ||
 		             return $FAILURE
 	done
 
 	# Touch up permissions on the tmp directories
 	f_dprintf "$funcname: Modifying directory permissions..."
 	local dir
 	for dir in /tmp /var/tmp; do
 		f_eval_catch $funcname chmod "$CHMOD_MODE" 1777 \
 		             $BSDINSTALL_CHROOTDIR$dir || return $FAILURE
 	done
 
 	# Create symlink(s)
 	if [ "$ZFSBOOT_BOOT_POOL" ]; then
 		f_dprintf "$funcname: Creating /boot symlink for boot pool..."
 		f_eval_catch $funcname ln "$LN_SF" "$bootpool_name/boot" \
 		             $BSDINSTALL_CHROOT/boot || return $FAILURE
 	fi
 
 	# Set bootfs property
 	local zroot_bootfs="$ZFSBOOT_BEROOT_NAME/$ZFSBOOT_BOOTFS_NAME"
 	f_dprintf "$funcname: Setting bootfs property..."
 	f_eval_catch $funcname zpool "$ZPOOL_SET" \
 		"bootfs=\"$zroot_name/$zroot_bootfs\"" "$zroot_name" ||
 		return $FAILURE
 
 	# Export the pool(s)
 	f_dprintf "$funcname: Temporarily exporting ZFS pool(s)..."
 	f_eval_catch $funcname zpool "$ZPOOL_EXPORT" "$zroot_name" ||
 	             return $FAILURE
 	if [ "$ZFSBOOT_BOOT_POOL" ]; then
 		f_eval_catch $funcname zpool "$ZPOOL_EXPORT" \
 		             "$bootpool_name" || return $FAILURE
 	fi
 
 	# MBR boot loader touch-up
 	if [ "$ZFSBOOT_PARTITION_SCHEME" = "MBR" ]; then
 		f_dprintf "$funcname: Updating MBR boot loader on disks..."
 		# Stick the ZFS boot loader in the "convienient hole" after
 		# the ZFS internal metadata
 		for disk in $disks; do
 			f_eval_catch $funcname dd "$DD_WITH_OPTIONS" \
 			             /boot/zfsboot /dev/$disk$bootpart \
 			             "skip=1 seek=1024" || return $FAILURE
 		done
 	fi
 
 	# Re-import the ZFS pool(s)
 	f_dprintf "$funcname: Re-importing ZFS pool(s)..."
 	f_eval_catch $funcname zpool "$ZPOOL_IMPORT_WITH_OPTIONS" \
 	             "-o altroot=\"$BSDINSTALL_CHROOT\"" "$zroot_name" ||
 	             return $FAILURE
 	if [ "$ZFSBOOT_BOOT_POOL" ]; then
 		f_eval_catch $funcname zpool "$ZPOOL_IMPORT_WITH_OPTIONS" \
 		             "-o altroot=\"$BSDINSTALL_CHROOT\"" \
 		             "$bootpool_name" || return $FAILURE
 	fi
 
 	# While this is apparently not needed, it seems to help MBR
 	f_dprintf "$funcname: Configuring zpool.cache for zroot..."
 	f_eval_catch $funcname mkdir "$MKDIR_P" $BSDINSTALL_CHROOT/boot/zfs ||
 	             return $FAILURE
 	f_eval_catch $funcname zpool "$ZPOOL_SET" \
 	             "cachefile=\"$BSDINSTALL_CHROOT/boot/zfs/zpool.cache\"" \
 	             "$zroot_name" || return $FAILURE
 
 	# Last, but not least... required lines for rc.conf(5)/loader.conf(5)
 	# NOTE: We later concatenate these into their destination
 	f_dprintf "%s: Configuring rc.conf(5)/loader.conf(5) additions..." \
 	          "$funcname"
 	f_eval_catch $funcname echo "$ECHO_APPEND" 'zfs_enable=\"YES\"' \
 	             $BSDINSTALL_TMPETC/rc.conf.zfs || return $FAILURE
 	f_eval_catch $funcname echo "$ECHO_APPEND" \
 	             'kern.geom.label.disk_ident.enable=\"0\"' \
 	             $BSDINSTALL_TMPBOOT/loader.conf.zfs || return $FAILURE
 	f_eval_catch $funcname echo "$ECHO_APPEND" \
 	             'kern.geom.label.gptid.enable=\"0\"' \
 	             $BSDINSTALL_TMPBOOT/loader.conf.zfs || return $FAILURE
 
 	if [ "$ZFSBOOT_SWAP_MIRROR" ]; then
 		f_eval_catch $funcname echo "$ECHO_APPEND" \
 		             'geom_mirror_load=\"YES\"' \
 		             $BSDINSTALL_TMPBOOT/loader.conf.gmirror ||
 		             return $FAILURE
 	fi
 
 	# We're all done unless we should go on for boot pool
 	[ "$ZFSBOOT_BOOT_POOL" ] || return $SUCCESS
 
 	# Set cachefile for boot pool so it auto-imports at system start
 	f_dprintf "$funcname: Configuring zpool.cache for boot pool..."
 	f_eval_catch $funcname zpool "$ZPOOL_SET" \
 	             "cachefile=\"$BSDINSTALL_CHROOT/boot/zfs/zpool.cache\"" \
 	             "$bootpool_name" || return $FAILURE
 
 	# Some additional geli(8) requirements for loader.conf(5)
 	for option in \
 		'zpool_cache_load=\"YES\"' \
 		'zpool_cache_type=\"/boot/zfs/zpool.cache\"' \
 		'zpool_cache_name=\"/boot/zfs/zpool.cache\"' \
 	; do
 		f_eval_catch $funcname echo "$ECHO_APPEND" "$option" \
 		             $BSDINSTALL_TMPBOOT/loader.conf.zfs ||
 		             return $FAILURE
 	done
 	f_eval_catch $funcname printf "$PRINTF_CONF" vfs.root.mountfrom \
 		"\"zfs:$zroot_name/$zroot_bootfs\"" \
 		$BSDINSTALL_TMPBOOT/loader.conf.root || return $FAILURE
 
 	# We're all done unless we should go on to do encryption
 	[ "$ZFSBOOT_GELI_ENCRYPTION" ] || return $SUCCESS
 
 	#
 	# Configure geli(8)-based encryption
 	#
 	f_dprintf "$funcname: Configuring disk encryption..."
 	f_eval_catch $funcname echo "$ECHO_APPEND" 'aesni_load=\"YES\"' \
 		$BSDINSTALL_TMPBOOT/loader.conf.aesni || return $FAILURE
 	f_eval_catch $funcname echo "$ECHO_APPEND" 'geom_eli_load=\"YES\"' \
 		$BSDINSTALL_TMPBOOT/loader.conf.geli || return $FAILURE
 	for disk in $disks; do
 		f_eval_catch $funcname printf "$PRINTF_CONF" \
 			geli_%s_keyfile0_load "$disk$targetpart YES" \
 			$BSDINSTALL_TMPBOOT/loader.conf.$disk$targetpart ||
 			return $FAILURE
 		f_eval_catch $funcname printf "$PRINTF_CONF" \
 			geli_%s_keyfile0_type \
 			"$disk$targetpart $disk$targetpart:geli_keyfile0" \
 			$BSDINSTALL_TMPBOOT/loader.conf.$disk$targetpart ||
 			return $FAILURE
 		f_eval_catch $funcname printf "$PRINTF_CONF" \
 			geli_%s_keyfile0_name \
 			"$disk$targetpart \"$ZFSBOOT_GELI_KEY_FILE\"" \
 			$BSDINSTALL_TMPBOOT/loader.conf.$disk$targetpart ||
 			return $FAILURE
 	done
 
 	return $SUCCESS
 }
 
 # dialog_menu_diskinfo
 #
 # Prompt the user to select a disk and then provide detailed info on it.
 #
 dialog_menu_diskinfo()
 {
 	local device disk
 
 	#
 	# Break from loop when user cancels disk selection
 	#
 	while :; do
 		device=$( msg_cancel="$msg_back" f_device_menu \
 			"$DIALOG_TITLE" "$msg_select_a_disk_device" "" \
 			$DEVICE_TYPE_DISK 2>&1 ) || break
 		$device get name disk
 
 		# Show gpart(8) `show' and camcontrol(8) `inquiry' data
 		f_show_msg "$msg_detailed_disk_info" \
 			"$disk" "$( gpart show $disk 2> /dev/null )" \
 			"$disk" "$( camcontrol inquiry $disk 2> /dev/null )" \
 			"$disk" "$( camcontrol identify $disk 2> /dev/null )"
 	done
 
 	return $SUCCESS
 }
 
 ############################################################ MAIN
 
 #
 # Initialize
 #
 f_dialog_title "$msg_zfs_configuration"
 f_dialog_backtitle "$msg_freebsd_installer"
 
 # User may have specifically requested ZFS-related operations be interactive
 ! f_interactive && f_zfsinteractive && unset $VAR_NONINTERACTIVE
 
 #
 # Debugging
 #
 f_dprintf "BSDINSTALL_CHROOT=[%s]" "$BSDINSTALL_CHROOT"
 f_dprintf "BSDINSTALL_TMPETC=[%s]" "$BSDINSTALL_TMPETC"
 f_dprintf "FSTAB_FMT=[%s]" "$FSTAB_FMT"
 
 #
 # If the system was booted with UEFI, warn the user that FreeBSD can't do
 # ZFS with UEFI yet
 #
 if f_interactive; then
 	bootmethod=$( sysctl -n machdep.bootmethod )
 	f_dprintf "machdep.bootmethod=[%s]" "$bootmethod"
 	if [ "$bootmethod" != "BIOS" ]; then
 		dialog_uefi_prompt
 		retval=$?
 		f_dprintf "uefi_prompt=[%s]" "$retval"
 		[ $retval -eq $DIALOG_OK ] || f_die
 	fi
 fi
 
 #
 # Loop over the main menu until we've accomplished what we came here to do
 #
 while :; do
 	if ! f_interactive; then
 		retval=$DIALOG_OK
 		mtag=">>> $msg_install"
 	else
 		dialog_menu_main
 		retval=$?
 		f_dialog_menutag_fetch mtag
 	fi
 
 	f_dprintf "retval=%u mtag=[%s]" $retval "$mtag"
 	[ $retval -eq $DIALOG_OK ] || f_die
 
 	case "$mtag" in
 	">>> $msg_install")
 		#
 		# First, validate the user's selections
 		#
 
 		# Make sure they gave us a name for the pool
 		if [ ! "$ZFSBOOT_POOL_NAME" ]; then
 			f_dprintf "Pool name cannot be empty."
 			f_show_err "$msg_pool_name_cannot_be_empty"
 			continue
 		fi
 
 		# Validate vdev type against number of disks selected/scripted
 		# (also validates that ZFSBOOT_DISKS are real [probed] disks)
 		# NB: dialog_menu_layout supports running non-interactively
 		dialog_menu_layout || continue
 
 		# Make sure each disk will be at least 50% ZFS
 		if f_expand_number "$ZFSBOOT_SWAP_SIZE" swapsize &&
 		   f_expand_number "$ZFSBOOT_BOOT_POOL_SIZE" bootsize
 		then
 			minsize=$swapsize teeny_disks=
 			[ "$ZFSBOOT_BOOT_POOL" ] &&
 				minsize=$(( $minsize + $bootsize ))
 			for disk in $ZFSBOOT_DISKS; do
 				debug= f_device_find -1 \
 					$disk $DEVICE_TYPE_DISK device
 				$device get capacity disksize || continue
 				[ ${disksize:-0} -ge 0 ] || disksize=0
 				disksize=$(( $disksize - $minsize ))
 				[ $disksize -lt $minsize ] &&
 					teeny_disks="$teeny_disks $disk"
 			done
 			if [ "$teeny_disks" ]; then
 				f_dprintf "swapsize=[%s] bootsize[%s] %s" \
 				          "$ZFSBOOT_SWAP_SIZE" \
 				          "$ZFSBOOT_BOOT_POOL_SIZE" \
 				          "minsize=[$minsize]"
 				f_dprintf "These disks are too small: %s" \
 				          "$teeny_disks"
 				f_show_err "$msg_these_disks_are_too_small" \
 				           "$ZFSBOOT_SWAP_SIZE" \
 				           "$ZFSBOOT_BOOT_POOL_SIZE" \
 				           "$teeny_disks"
 				continue
 			fi
 		fi
 
 		#
 		# Last Chance!
 		#
 		if f_interactive; then
 			dialog_last_chance $ZFSBOOT_DISKS || continue
 		fi
 
 		#
 		# Let's do this
 		#
 
 		vdev_type="$ZFSBOOT_VDEV_TYPE"
 
 		# Blank the vdev type for the default layout
 		[ "$vdev_type" = "stripe" ] && vdev_type=
 
 		zfs_create_boot "$ZFSBOOT_POOL_NAME" \
 		                "$vdev_type" $ZFSBOOT_DISKS || continue
 
 		break # to success
 		;;
 	?" $msg_pool_type_disks")
 		ZFSBOOT_CONFIRM_LAYOUT=1
 		dialog_menu_layout
 		# User has poked settings, disable later confirmation
 		ZFSBOOT_CONFIRM_LAYOUT=
 		;;
 	"- $msg_rescan_devices") f_device_rescan ;;
 	"- $msg_disk_info") dialog_menu_diskinfo ;;
 	?" $msg_pool_name")
 		# Prompt the user to input/change the name for the new pool
 		f_dialog_input input \
 			"$msg_please_enter_a_name_for_your_zpool" \
 			"$ZFSBOOT_POOL_NAME" &&
 			ZFSBOOT_POOL_NAME="$input"
 		;;
 	?" $msg_force_4k_sectors")
 		# Toggle the variable referenced both by the menu and later
 		if [ "$ZFSBOOT_FORCE_4K_SECTORS" ]; then
 			ZFSBOOT_FORCE_4K_SECTORS=
 		else
 			ZFSBOOT_FORCE_4K_SECTORS=1
 		fi
 		;;
 	?" $msg_encrypt_disks")
 		# Toggle the variable referenced both by the menu and later
 		if [ "$ZFSBOOT_GELI_ENCRYPTION" ]; then
 			ZFSBOOT_GELI_ENCRYPTION=
 		else
 			ZFSBOOT_FORCE_4K_SECTORS=1
 			ZFSBOOT_GELI_ENCRYPTION=1
 		fi
 		;;
 	?" $msg_partition_scheme")
 		# Toggle between GPT and MBR
 		if [ "$ZFSBOOT_PARTITION_SCHEME" = GPT ]; then
 			ZFSBOOT_PARTITION_SCHEME=MBR
 		else
 			ZFSBOOT_PARTITION_SCHEME=GPT
 		fi
 		;;
 	?" $msg_swap_size")
 		# Prompt the user to input/change the swap size for each disk
 		f_dialog_input input \
 			"$msg_please_enter_amount_of_swap_space" \
 			"$ZFSBOOT_SWAP_SIZE" &&
 			ZFSBOOT_SWAP_SIZE="${input:-0}"
 		;;
 	?" $msg_swap_mirror")
 		# Toggle the variable referenced both by the menu and later
 		if [ "$ZFSBOOT_SWAP_MIRROR" ]; then
 			ZFSBOOT_SWAP_MIRROR=
 		else
 			ZFSBOOT_SWAP_MIRROR=1
 		fi
 		;;
 	?" $msg_swap_encrypt")
 		# Toggle the variable referenced both by the menu and later
 		if [ "$ZFSBOOT_SWAP_ENCRYPTION" ]; then
 			ZFSBOOT_SWAP_ENCRYPTION=
 		else
 			ZFSBOOT_SWAP_ENCRYPTION=1
 		fi
 		;;
 	esac
 done
 
 return $SUCCESS
 
 ################################################################################
 # END
 ################################################################################
Index: projects/ifnet/usr.sbin/ctld/ctld.c
===================================================================
--- projects/ifnet/usr.sbin/ctld/ctld.c	(revision 281172)
+++ projects/ifnet/usr.sbin/ctld/ctld.c	(revision 281173)
@@ -1,2597 +1,2600 @@
 /*-
  * Copyright (c) 2012 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Edward Tomasz Napierala under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
 #include <netdb.h>
 #include <signal.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "ctld.h"
 #include "isns.h"
 
 bool proxy_mode = false;
 
 static volatile bool sighup_received = false;
 static volatile bool sigterm_received = false;
 static volatile bool sigalrm_received = false;
 
 static int nchildren = 0;
 static uint16_t last_portal_group_tag = 0;
 
 static void
 usage(void)
 {
 
 	fprintf(stderr, "usage: ctld [-d][-f config-file]\n");
 	exit(1);
 }
 
 char *
 checked_strdup(const char *s)
 {
 	char *c;
 
 	c = strdup(s);
 	if (c == NULL)
 		log_err(1, "strdup");
 	return (c);
 }
 
 struct conf *
 conf_new(void)
 {
 	struct conf *conf;
 
 	conf = calloc(1, sizeof(*conf));
 	if (conf == NULL)
 		log_err(1, "calloc");
 	TAILQ_INIT(&conf->conf_luns);
 	TAILQ_INIT(&conf->conf_targets);
 	TAILQ_INIT(&conf->conf_auth_groups);
 	TAILQ_INIT(&conf->conf_ports);
 	TAILQ_INIT(&conf->conf_portal_groups);
 	TAILQ_INIT(&conf->conf_pports);
 	TAILQ_INIT(&conf->conf_isns);
 
 	conf->conf_isns_period = 900;
 	conf->conf_isns_timeout = 5;
 	conf->conf_debug = 0;
 	conf->conf_timeout = 60;
 	conf->conf_maxproc = 30;
 
 	return (conf);
 }
 
 void
 conf_delete(struct conf *conf)
 {
 	struct lun *lun, *ltmp;
 	struct target *targ, *tmp;
 	struct auth_group *ag, *cagtmp;
 	struct portal_group *pg, *cpgtmp;
 	struct pport *pp, *pptmp;
 	struct isns *is, *istmp;
 
 	assert(conf->conf_pidfh == NULL);
 
 	TAILQ_FOREACH_SAFE(lun, &conf->conf_luns, l_next, ltmp)
 		lun_delete(lun);
 	TAILQ_FOREACH_SAFE(targ, &conf->conf_targets, t_next, tmp)
 		target_delete(targ);
 	TAILQ_FOREACH_SAFE(ag, &conf->conf_auth_groups, ag_next, cagtmp)
 		auth_group_delete(ag);
 	TAILQ_FOREACH_SAFE(pg, &conf->conf_portal_groups, pg_next, cpgtmp)
 		portal_group_delete(pg);
 	TAILQ_FOREACH_SAFE(pp, &conf->conf_pports, pp_next, pptmp)
 		pport_delete(pp);
 	TAILQ_FOREACH_SAFE(is, &conf->conf_isns, i_next, istmp)
 		isns_delete(is);
 	assert(TAILQ_EMPTY(&conf->conf_ports));
 	free(conf->conf_pidfile_path);
 	free(conf);
 }
 
 static struct auth *
 auth_new(struct auth_group *ag)
 {
 	struct auth *auth;
 
 	auth = calloc(1, sizeof(*auth));
 	if (auth == NULL)
 		log_err(1, "calloc");
 	auth->a_auth_group = ag;
 	TAILQ_INSERT_TAIL(&ag->ag_auths, auth, a_next);
 	return (auth);
 }
 
 static void
 auth_delete(struct auth *auth)
 {
 	TAILQ_REMOVE(&auth->a_auth_group->ag_auths, auth, a_next);
 
 	free(auth->a_user);
 	free(auth->a_secret);
 	free(auth->a_mutual_user);
 	free(auth->a_mutual_secret);
 	free(auth);
 }
 
 const struct auth *
 auth_find(const struct auth_group *ag, const char *user)
 {
 	const struct auth *auth;
 
 	TAILQ_FOREACH(auth, &ag->ag_auths, a_next) {
 		if (strcmp(auth->a_user, user) == 0)
 			return (auth);
 	}
 
 	return (NULL);
 }
 
 static void
 auth_check_secret_length(struct auth *auth)
 {
 	size_t len;
 
 	len = strlen(auth->a_secret);
 	if (len > 16) {
 		if (auth->a_auth_group->ag_name != NULL)
 			log_warnx("secret for user \"%s\", auth-group \"%s\", "
 			    "is too long; it should be at most 16 characters "
 			    "long", auth->a_user, auth->a_auth_group->ag_name);
 		else
 			log_warnx("secret for user \"%s\", target \"%s\", "
 			    "is too long; it should be at most 16 characters "
 			    "long", auth->a_user,
 			    auth->a_auth_group->ag_target->t_name);
 	}
 	if (len < 12) {
 		if (auth->a_auth_group->ag_name != NULL)
 			log_warnx("secret for user \"%s\", auth-group \"%s\", "
 			    "is too short; it should be at least 12 characters "
 			    "long", auth->a_user,
 			    auth->a_auth_group->ag_name);
 		else
 			log_warnx("secret for user \"%s\", target \"%s\", "
 			    "is too short; it should be at least 16 characters "
 			    "long", auth->a_user,
 			    auth->a_auth_group->ag_target->t_name);
 	}
 
 	if (auth->a_mutual_secret != NULL) {
 		len = strlen(auth->a_mutual_secret);
 		if (len > 16) {
 			if (auth->a_auth_group->ag_name != NULL)
 				log_warnx("mutual secret for user \"%s\", "
 				    "auth-group \"%s\", is too long; it should "
 				    "be at most 16 characters long",
 				    auth->a_user, auth->a_auth_group->ag_name);
 			else
 				log_warnx("mutual secret for user \"%s\", "
 				    "target \"%s\", is too long; it should "
 				    "be at most 16 characters long",
 				    auth->a_user,
 				    auth->a_auth_group->ag_target->t_name);
 		}
 		if (len < 12) {
 			if (auth->a_auth_group->ag_name != NULL)
 				log_warnx("mutual secret for user \"%s\", "
 				    "auth-group \"%s\", is too short; it "
 				    "should be at least 12 characters long",
 				    auth->a_user, auth->a_auth_group->ag_name);
 			else
 				log_warnx("mutual secret for user \"%s\", "
 				    "target \"%s\", is too short; it should be "
 				    "at least 16 characters long",
 				    auth->a_user,
 				    auth->a_auth_group->ag_target->t_name);
 		}
 	}
 }
 
 const struct auth *
 auth_new_chap(struct auth_group *ag, const char *user,
     const char *secret)
 {
 	struct auth *auth;
 
 	if (ag->ag_type == AG_TYPE_UNKNOWN)
 		ag->ag_type = AG_TYPE_CHAP;
 	if (ag->ag_type != AG_TYPE_CHAP) {
 		if (ag->ag_name != NULL)
 			log_warnx("cannot mix \"chap\" authentication with "
 			    "other types for auth-group \"%s\"", ag->ag_name);
 		else
 			log_warnx("cannot mix \"chap\" authentication with "
 			    "other types for target \"%s\"",
 			    ag->ag_target->t_name);
 		return (NULL);
 	}
 
 	auth = auth_new(ag);
 	auth->a_user = checked_strdup(user);
 	auth->a_secret = checked_strdup(secret);
 
 	auth_check_secret_length(auth);
 
 	return (auth);
 }
 
 const struct auth *
 auth_new_chap_mutual(struct auth_group *ag, const char *user,
     const char *secret, const char *user2, const char *secret2)
 {
 	struct auth *auth;
 
 	if (ag->ag_type == AG_TYPE_UNKNOWN)
 		ag->ag_type = AG_TYPE_CHAP_MUTUAL;
 	if (ag->ag_type != AG_TYPE_CHAP_MUTUAL) {
 		if (ag->ag_name != NULL)
 			log_warnx("cannot mix \"chap-mutual\" authentication "
 			    "with other types for auth-group \"%s\"",
 			    ag->ag_name);
 		else
 			log_warnx("cannot mix \"chap-mutual\" authentication "
 			    "with other types for target \"%s\"",
 			    ag->ag_target->t_name);
 		return (NULL);
 	}
 
 	auth = auth_new(ag);
 	auth->a_user = checked_strdup(user);
 	auth->a_secret = checked_strdup(secret);
 	auth->a_mutual_user = checked_strdup(user2);
 	auth->a_mutual_secret = checked_strdup(secret2);
 
 	auth_check_secret_length(auth);
 
 	return (auth);
 }
 
 const struct auth_name *
 auth_name_new(struct auth_group *ag, const char *name)
 {
 	struct auth_name *an;
 
 	an = calloc(1, sizeof(*an));
 	if (an == NULL)
 		log_err(1, "calloc");
 	an->an_auth_group = ag;
 	an->an_initator_name = checked_strdup(name);
 	TAILQ_INSERT_TAIL(&ag->ag_names, an, an_next);
 	return (an);
 }
 
 static void
 auth_name_delete(struct auth_name *an)
 {
 	TAILQ_REMOVE(&an->an_auth_group->ag_names, an, an_next);
 
 	free(an->an_initator_name);
 	free(an);
 }
 
 bool
 auth_name_defined(const struct auth_group *ag)
 {
 	if (TAILQ_EMPTY(&ag->ag_names))
 		return (false);
 	return (true);
 }
 
 const struct auth_name *
 auth_name_find(const struct auth_group *ag, const char *name)
 {
 	const struct auth_name *auth_name;
 
 	TAILQ_FOREACH(auth_name, &ag->ag_names, an_next) {
 		if (strcmp(auth_name->an_initator_name, name) == 0)
 			return (auth_name);
 	}
 
 	return (NULL);
 }
 
 int
 auth_name_check(const struct auth_group *ag, const char *initiator_name)
 {
 	if (!auth_name_defined(ag))
 		return (0);
 
 	if (auth_name_find(ag, initiator_name) == NULL)
 		return (1);
 
 	return (0);
 }
 
 const struct auth_portal *
 auth_portal_new(struct auth_group *ag, const char *portal)
 {
 	struct auth_portal *ap;
 	char *net, *mask, *str, *tmp;
 	int len, dm, m;
 
 	ap = calloc(1, sizeof(*ap));
 	if (ap == NULL)
 		log_err(1, "calloc");
 	ap->ap_auth_group = ag;
 	ap->ap_initator_portal = checked_strdup(portal);
 	mask = str = checked_strdup(portal);
 	net = strsep(&mask, "/");
 	if (net[0] == '[')
 		net++;
 	len = strlen(net);
 	if (len == 0)
 		goto error;
 	if (net[len - 1] == ']')
 		net[len - 1] = 0;
 	if (strchr(net, ':') != NULL) {
 		struct sockaddr_in6 *sin6 =
 		    (struct sockaddr_in6 *)&ap->ap_sa;
 
 		sin6->sin6_len = sizeof(*sin6);
 		sin6->sin6_family = AF_INET6;
 		if (inet_pton(AF_INET6, net, &sin6->sin6_addr) <= 0)
 			goto error;
 		dm = 128;
 	} else {
 		struct sockaddr_in *sin =
 		    (struct sockaddr_in *)&ap->ap_sa;
 
 		sin->sin_len = sizeof(*sin);
 		sin->sin_family = AF_INET;
 		if (inet_pton(AF_INET, net, &sin->sin_addr) <= 0)
 			goto error;
 		dm = 32;
 	}
 	if (mask != NULL) {
 		m = strtol(mask, &tmp, 0);
 		if (m < 0 || m > dm || tmp[0] != 0)
 			goto error;
 	} else
 		m = dm;
 	ap->ap_mask = m;
 	free(str);
 	TAILQ_INSERT_TAIL(&ag->ag_portals, ap, ap_next);
 	return (ap);
 
 error:
 	free(ap);
 	log_errx(1, "Incorrect initiator portal '%s'", portal);
 	return (NULL);
 }
 
 static void
 auth_portal_delete(struct auth_portal *ap)
 {
 	TAILQ_REMOVE(&ap->ap_auth_group->ag_portals, ap, ap_next);
 
 	free(ap->ap_initator_portal);
 	free(ap);
 }
 
 bool
 auth_portal_defined(const struct auth_group *ag)
 {
 	if (TAILQ_EMPTY(&ag->ag_portals))
 		return (false);
 	return (true);
 }
 
 const struct auth_portal *
 auth_portal_find(const struct auth_group *ag, const struct sockaddr_storage *ss)
 {
 	const struct auth_portal *ap;
 	const uint8_t *a, *b;
 	int i;
 	uint8_t bmask;
 
 	TAILQ_FOREACH(ap, &ag->ag_portals, ap_next) {
 		if (ap->ap_sa.ss_family != ss->ss_family)
 			continue;
 		if (ss->ss_family == AF_INET) {
 			a = (const uint8_t *)
 			    &((const struct sockaddr_in *)ss)->sin_addr;
 			b = (const uint8_t *)
 			    &((const struct sockaddr_in *)&ap->ap_sa)->sin_addr;
 		} else {
 			a = (const uint8_t *)
 			    &((const struct sockaddr_in6 *)ss)->sin6_addr;
 			b = (const uint8_t *)
 			    &((const struct sockaddr_in6 *)&ap->ap_sa)->sin6_addr;
 		}
 		for (i = 0; i < ap->ap_mask / 8; i++) {
 			if (a[i] != b[i])
 				goto next;
 		}
 		if (ap->ap_mask % 8) {
 			bmask = 0xff << (8 - (ap->ap_mask % 8));
 			if ((a[i] & bmask) != (b[i] & bmask))
 				goto next;
 		}
 		return (ap);
 next:
 		;
 	}
 
 	return (NULL);
 }
 
 int
 auth_portal_check(const struct auth_group *ag, const struct sockaddr_storage *sa)
 {
 
 	if (!auth_portal_defined(ag))
 		return (0);
 
 	if (auth_portal_find(ag, sa) == NULL)
 		return (1);
 
 	return (0);
 }
 
 struct auth_group *
 auth_group_new(struct conf *conf, const char *name)
 {
 	struct auth_group *ag;
 
 	if (name != NULL) {
 		ag = auth_group_find(conf, name);
 		if (ag != NULL) {
 			log_warnx("duplicated auth-group \"%s\"", name);
 			return (NULL);
 		}
 	}
 
 	ag = calloc(1, sizeof(*ag));
 	if (ag == NULL)
 		log_err(1, "calloc");
 	if (name != NULL)
 		ag->ag_name = checked_strdup(name);
 	TAILQ_INIT(&ag->ag_auths);
 	TAILQ_INIT(&ag->ag_names);
 	TAILQ_INIT(&ag->ag_portals);
 	ag->ag_conf = conf;
 	TAILQ_INSERT_TAIL(&conf->conf_auth_groups, ag, ag_next);
 
 	return (ag);
 }
 
 void
 auth_group_delete(struct auth_group *ag)
 {
 	struct auth *auth, *auth_tmp;
 	struct auth_name *auth_name, *auth_name_tmp;
 	struct auth_portal *auth_portal, *auth_portal_tmp;
 
 	TAILQ_REMOVE(&ag->ag_conf->conf_auth_groups, ag, ag_next);
 
 	TAILQ_FOREACH_SAFE(auth, &ag->ag_auths, a_next, auth_tmp)
 		auth_delete(auth);
 	TAILQ_FOREACH_SAFE(auth_name, &ag->ag_names, an_next, auth_name_tmp)
 		auth_name_delete(auth_name);
 	TAILQ_FOREACH_SAFE(auth_portal, &ag->ag_portals, ap_next,
 	    auth_portal_tmp)
 		auth_portal_delete(auth_portal);
 	free(ag->ag_name);
 	free(ag);
 }
 
 struct auth_group *
 auth_group_find(const struct conf *conf, const char *name)
 {
 	struct auth_group *ag;
 
 	TAILQ_FOREACH(ag, &conf->conf_auth_groups, ag_next) {
 		if (ag->ag_name != NULL && strcmp(ag->ag_name, name) == 0)
 			return (ag);
 	}
 
 	return (NULL);
 }
 
 int
 auth_group_set_type(struct auth_group *ag, const char *str)
 {
 	int type;
 
 	if (strcmp(str, "none") == 0) {
 		type = AG_TYPE_NO_AUTHENTICATION;
 	} else if (strcmp(str, "deny") == 0) {
 		type = AG_TYPE_DENY;
 	} else if (strcmp(str, "chap") == 0) {
 		type = AG_TYPE_CHAP;
 	} else if (strcmp(str, "chap-mutual") == 0) {
 		type = AG_TYPE_CHAP_MUTUAL;
 	} else {
 		if (ag->ag_name != NULL)
 			log_warnx("invalid auth-type \"%s\" for auth-group "
 			    "\"%s\"", str, ag->ag_name);
 		else
 			log_warnx("invalid auth-type \"%s\" for target "
 			    "\"%s\"", str, ag->ag_target->t_name);
 		return (1);
 	}
 
 	if (ag->ag_type != AG_TYPE_UNKNOWN && ag->ag_type != type) {
 		if (ag->ag_name != NULL) {
 			log_warnx("cannot set auth-type to \"%s\" for "
 			    "auth-group \"%s\"; already has a different "
 			    "type", str, ag->ag_name);
 		} else {
 			log_warnx("cannot set auth-type to \"%s\" for target "
 			    "\"%s\"; already has a different type",
 			    str, ag->ag_target->t_name);
 		}
 		return (1);
 	}
 
 	ag->ag_type = type;
 
 	return (0);
 }
 
 static struct portal *
 portal_new(struct portal_group *pg)
 {
 	struct portal *portal;
 
 	portal = calloc(1, sizeof(*portal));
 	if (portal == NULL)
 		log_err(1, "calloc");
 	TAILQ_INIT(&portal->p_targets);
 	portal->p_portal_group = pg;
 	TAILQ_INSERT_TAIL(&pg->pg_portals, portal, p_next);
 	return (portal);
 }
 
 static void
 portal_delete(struct portal *portal)
 {
 
 	TAILQ_REMOVE(&portal->p_portal_group->pg_portals, portal, p_next);
 	if (portal->p_ai != NULL)
 		freeaddrinfo(portal->p_ai);
 	free(portal->p_listen);
 	free(portal);
 }
 
 struct portal_group *
 portal_group_new(struct conf *conf, const char *name)
 {
 	struct portal_group *pg;
 
 	pg = portal_group_find(conf, name);
 	if (pg != NULL) {
 		log_warnx("duplicated portal-group \"%s\"", name);
 		return (NULL);
 	}
 
 	pg = calloc(1, sizeof(*pg));
 	if (pg == NULL)
 		log_err(1, "calloc");
 	pg->pg_name = checked_strdup(name);
 	TAILQ_INIT(&pg->pg_portals);
 	TAILQ_INIT(&pg->pg_ports);
 	pg->pg_conf = conf;
 	pg->pg_tag = 0;		/* Assigned later in conf_apply(). */
 	TAILQ_INSERT_TAIL(&conf->conf_portal_groups, pg, pg_next);
 
 	return (pg);
 }
 
 void
 portal_group_delete(struct portal_group *pg)
 {
 	struct portal *portal, *tmp;
 	struct port *port, *tport;
 
 	TAILQ_FOREACH_SAFE(port, &pg->pg_ports, p_pgs, tport)
 		port_delete(port);
 	TAILQ_REMOVE(&pg->pg_conf->conf_portal_groups, pg, pg_next);
 
 	TAILQ_FOREACH_SAFE(portal, &pg->pg_portals, p_next, tmp)
 		portal_delete(portal);
 	free(pg->pg_name);
 	free(pg->pg_offload);
 	free(pg->pg_redirection);
 	free(pg);
 }
 
 struct portal_group *
 portal_group_find(const struct conf *conf, const char *name)
 {
 	struct portal_group *pg;
 
 	TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 		if (strcmp(pg->pg_name, name) == 0)
 			return (pg);
 	}
 
 	return (NULL);
 }
 
 static int
 parse_addr_port(char *arg, const char *def_port, struct addrinfo **ai)
 {
 	struct addrinfo hints;
 	char *str, *addr, *ch;
 	const char *port;
 	int error, colons = 0;
 
 	str = arg = strdup(arg);
 	if (arg[0] == '[') {
 		/*
 		 * IPv6 address in square brackets, perhaps with port.
 		 */
 		arg++;
 		addr = strsep(&arg, "]");
 		if (arg == NULL)
 			return (1);
 		if (arg[0] == '\0') {
 			port = def_port;
 		} else if (arg[0] == ':') {
 			port = arg + 1;
 		} else {
 			free(str);
 			return (1);
 		}
 	} else {
 		/*
 		 * Either IPv6 address without brackets - and without
 		 * a port - or IPv4 address.  Just count the colons.
 		 */
 		for (ch = arg; *ch != '\0'; ch++) {
 			if (*ch == ':')
 				colons++;
 		}
 		if (colons > 1) {
 			addr = arg;
 			port = def_port;
 		} else {
 			addr = strsep(&arg, ":");
 			if (arg == NULL)
 				port = def_port;
 			else
 				port = arg;
 		}
 	}
 
 	memset(&hints, 0, sizeof(hints));
 	hints.ai_family = PF_UNSPEC;
 	hints.ai_socktype = SOCK_STREAM;
 	hints.ai_flags = AI_PASSIVE;
 	error = getaddrinfo(addr, port, &hints, ai);
 	free(str);
 	return ((error != 0) ? 1 : 0);
 }
 
 int
 portal_group_add_listen(struct portal_group *pg, const char *value, bool iser)
 {
 	struct portal *portal;
 
 	portal = portal_new(pg);
 	portal->p_listen = checked_strdup(value);
 	portal->p_iser = iser;
 
 	if (parse_addr_port(portal->p_listen, "3260", &portal->p_ai)) {
 		log_warnx("invalid listen address %s", portal->p_listen);
 		portal_delete(portal);
 		return (1);
 	}
 
 	/*
 	 * XXX: getaddrinfo(3) may return multiple addresses; we should turn
 	 *	those into multiple portals.
 	 */
 
 	return (0);
 }
 
 int
 isns_new(struct conf *conf, const char *addr)
 {
 	struct isns *isns;
 
 	isns = calloc(1, sizeof(*isns));
 	if (isns == NULL)
 		log_err(1, "calloc");
 	isns->i_conf = conf;
 	TAILQ_INSERT_TAIL(&conf->conf_isns, isns, i_next);
 	isns->i_addr = checked_strdup(addr);
 
 	if (parse_addr_port(isns->i_addr, "3205", &isns->i_ai)) {
 		log_warnx("invalid iSNS address %s", isns->i_addr);
 		isns_delete(isns);
 		return (1);
 	}
 
 	/*
 	 * XXX: getaddrinfo(3) may return multiple addresses; we should turn
 	 *	those into multiple servers.
 	 */
 
 	return (0);
 }
 
 void
 isns_delete(struct isns *isns)
 {
 
 	TAILQ_REMOVE(&isns->i_conf->conf_isns, isns, i_next);
 	free(isns->i_addr);
 	if (isns->i_ai != NULL)
 		freeaddrinfo(isns->i_ai);
 	free(isns);
 }
 
 static int
 isns_do_connect(struct isns *isns)
 {
 	int s;
 
 	s = socket(isns->i_ai->ai_family, isns->i_ai->ai_socktype,
 	    isns->i_ai->ai_protocol);
 	if (s < 0) {
 		log_warn("socket(2) failed for %s", isns->i_addr);
 		return (-1);
 	}
 	if (connect(s, isns->i_ai->ai_addr, isns->i_ai->ai_addrlen)) {
 		log_warn("connect(2) failed for %s", isns->i_addr);
 		close(s);
 		return (-1);
 	}
 	return(s);
 }
 
 static int
 isns_do_register(struct isns *isns, int s, const char *hostname)
 {
 	struct conf *conf = isns->i_conf;
 	struct target *target;
 	struct portal *portal;
 	struct portal_group *pg;
 	struct port *port;
 	struct isns_req *req;
 	int res = 0;
 	uint32_t error;
 
 	req = isns_req_create(ISNS_FUNC_DEVATTRREG, ISNS_FLAG_CLIENT);
 	isns_req_add_str(req, 32, TAILQ_FIRST(&conf->conf_targets)->t_name);
 	isns_req_add_delim(req);
 	isns_req_add_str(req, 1, hostname);
 	isns_req_add_32(req, 2, 2); /* 2 -- iSCSI */
 	isns_req_add_32(req, 6, conf->conf_isns_period);
 	TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 		if (pg->pg_unassigned)
 			continue;
 		TAILQ_FOREACH(portal, &pg->pg_portals, p_next) {
 			isns_req_add_addr(req, 16, portal->p_ai);
 			isns_req_add_port(req, 17, portal->p_ai);
 		}
 	}
 	TAILQ_FOREACH(target, &conf->conf_targets, t_next) {
 		isns_req_add_str(req, 32, target->t_name);
 		isns_req_add_32(req, 33, 1); /* 1 -- Target*/
 		if (target->t_alias != NULL)
 			isns_req_add_str(req, 34, target->t_alias);
 		TAILQ_FOREACH(port, &target->t_ports, p_ts) {
 			if ((pg = port->p_portal_group) == NULL)
 				continue;
 			isns_req_add_32(req, 51, pg->pg_tag);
 			TAILQ_FOREACH(portal, &pg->pg_portals, p_next) {
 				isns_req_add_addr(req, 49, portal->p_ai);
 				isns_req_add_port(req, 50, portal->p_ai);
 			}
 		}
 	}
 	res = isns_req_send(s, req);
 	if (res < 0) {
 		log_warn("send(2) failed for %s", isns->i_addr);
 		goto quit;
 	}
 	res = isns_req_receive(s, req);
 	if (res < 0) {
 		log_warn("receive(2) failed for %s", isns->i_addr);
 		goto quit;
 	}
 	error = isns_req_get_status(req);
 	if (error != 0) {
 		log_warnx("iSNS register error %d for %s", error, isns->i_addr);
 		res = -1;
 	}
 quit:
 	isns_req_free(req);
 	return (res);
 }
 
 static int
 isns_do_check(struct isns *isns, int s, const char *hostname)
 {
 	struct conf *conf = isns->i_conf;
 	struct isns_req *req;
 	int res = 0;
 	uint32_t error;
 
 	req = isns_req_create(ISNS_FUNC_DEVATTRQRY, ISNS_FLAG_CLIENT);
 	isns_req_add_str(req, 32, TAILQ_FIRST(&conf->conf_targets)->t_name);
 	isns_req_add_str(req, 1, hostname);
 	isns_req_add_delim(req);
 	isns_req_add(req, 2, 0, NULL);
 	res = isns_req_send(s, req);
 	if (res < 0) {
 		log_warn("send(2) failed for %s", isns->i_addr);
 		goto quit;
 	}
 	res = isns_req_receive(s, req);
 	if (res < 0) {
 		log_warn("receive(2) failed for %s", isns->i_addr);
 		goto quit;
 	}
 	error = isns_req_get_status(req);
 	if (error != 0) {
 		log_warnx("iSNS check error %d for %s", error, isns->i_addr);
 		res = -1;
 	}
 quit:
 	isns_req_free(req);
 	return (res);
 }
 
 static int
 isns_do_deregister(struct isns *isns, int s, const char *hostname)
 {
 	struct conf *conf = isns->i_conf;
 	struct isns_req *req;
 	int res = 0;
 	uint32_t error;
 
 	req = isns_req_create(ISNS_FUNC_DEVDEREG, ISNS_FLAG_CLIENT);
 	isns_req_add_str(req, 32, TAILQ_FIRST(&conf->conf_targets)->t_name);
 	isns_req_add_delim(req);
 	isns_req_add_str(req, 1, hostname);
 	res = isns_req_send(s, req);
 	if (res < 0) {
 		log_warn("send(2) failed for %s", isns->i_addr);
 		goto quit;
 	}
 	res = isns_req_receive(s, req);
 	if (res < 0) {
 		log_warn("receive(2) failed for %s", isns->i_addr);
 		goto quit;
 	}
 	error = isns_req_get_status(req);
 	if (error != 0) {
 		log_warnx("iSNS deregister error %d for %s", error, isns->i_addr);
 		res = -1;
 	}
 quit:
 	isns_req_free(req);
 	return (res);
 }
 
 void
 isns_register(struct isns *isns, struct isns *oldisns)
 {
 	struct conf *conf = isns->i_conf;
 	int s;
 	char hostname[256];
 
 	if (TAILQ_EMPTY(&conf->conf_targets) ||
 	    TAILQ_EMPTY(&conf->conf_portal_groups))
 		return;
 	set_timeout(conf->conf_isns_timeout, false);
 	s = isns_do_connect(isns);
 	if (s < 0) {
 		set_timeout(0, false);
 		return;
 	}
 	gethostname(hostname, sizeof(hostname));
 
 	if (oldisns == NULL || TAILQ_EMPTY(&oldisns->i_conf->conf_targets))
 		oldisns = isns;
 	isns_do_deregister(oldisns, s, hostname);
 	isns_do_register(isns, s, hostname);
 	close(s);
 	set_timeout(0, false);
 }
 
 void
 isns_check(struct isns *isns)
 {
 	struct conf *conf = isns->i_conf;
 	int s, res;
 	char hostname[256];
 
 	if (TAILQ_EMPTY(&conf->conf_targets) ||
 	    TAILQ_EMPTY(&conf->conf_portal_groups))
 		return;
 	set_timeout(conf->conf_isns_timeout, false);
 	s = isns_do_connect(isns);
 	if (s < 0) {
 		set_timeout(0, false);
 		return;
 	}
 	gethostname(hostname, sizeof(hostname));
 
 	res = isns_do_check(isns, s, hostname);
 	if (res < 0) {
 		isns_do_deregister(isns, s, hostname);
 		isns_do_register(isns, s, hostname);
 	}
 	close(s);
 	set_timeout(0, false);
 }
 
 void
 isns_deregister(struct isns *isns)
 {
 	struct conf *conf = isns->i_conf;
 	int s;
 	char hostname[256];
 
 	if (TAILQ_EMPTY(&conf->conf_targets) ||
 	    TAILQ_EMPTY(&conf->conf_portal_groups))
 		return;
 	set_timeout(conf->conf_isns_timeout, false);
 	s = isns_do_connect(isns);
 	if (s < 0)
 		return;
 	gethostname(hostname, sizeof(hostname));
 
 	isns_do_deregister(isns, s, hostname);
 	close(s);
 	set_timeout(0, false);
 }
 
 int
 portal_group_set_filter(struct portal_group *pg, const char *str)
 {
 	int filter;
 
 	if (strcmp(str, "none") == 0) {
 		filter = PG_FILTER_NONE;
 	} else if (strcmp(str, "portal") == 0) {
 		filter = PG_FILTER_PORTAL;
 	} else if (strcmp(str, "portal-name") == 0) {
 		filter = PG_FILTER_PORTAL_NAME;
 	} else if (strcmp(str, "portal-name-auth") == 0) {
 		filter = PG_FILTER_PORTAL_NAME_AUTH;
 	} else {
 		log_warnx("invalid discovery-filter \"%s\" for portal-group "
 		    "\"%s\"; valid values are \"none\", \"portal\", "
 		    "\"portal-name\", and \"portal-name-auth\"",
 		    str, pg->pg_name);
 		return (1);
 	}
 
 	if (pg->pg_discovery_filter != PG_FILTER_UNKNOWN &&
 	    pg->pg_discovery_filter != filter) {
 		log_warnx("cannot set discovery-filter to \"%s\" for "
 		    "portal-group \"%s\"; already has a different "
 		    "value", str, pg->pg_name);
 		return (1);
 	}
 
 	pg->pg_discovery_filter = filter;
 
 	return (0);
 }
 
 int
 portal_group_set_offload(struct portal_group *pg, const char *offload)
 {
 
 	if (pg->pg_offload != NULL) {
 		log_warnx("cannot set offload to \"%s\" for "
 		    "portal-group \"%s\"; already defined",
 		    offload, pg->pg_name);
 		return (1);
 	}
 
 	pg->pg_offload = checked_strdup(offload);
 
 	return (0);
 }
 
 int
 portal_group_set_redirection(struct portal_group *pg, const char *addr)
 {
 
 	if (pg->pg_redirection != NULL) {
 		log_warnx("cannot set redirection to \"%s\" for "
 		    "portal-group \"%s\"; already defined",
 		    addr, pg->pg_name);
 		return (1);
 	}
 
 	pg->pg_redirection = checked_strdup(addr);
 
 	return (0);
 }
 
 static bool
 valid_hex(const char ch)
 {
 	switch (ch) {
 	case '0':
 	case '1':
 	case '2':
 	case '3':
 	case '4':
 	case '5':
 	case '6':
 	case '7':
 	case '8':
 	case '9':
 	case 'a':
 	case 'A':
 	case 'b':
 	case 'B':
 	case 'c':
 	case 'C':
 	case 'd':
 	case 'D':
 	case 'e':
 	case 'E':
 	case 'f':
 	case 'F':
 		return (true);
 	default:
 		return (false);
 	}
 }
 
 bool
 valid_iscsi_name(const char *name)
 {
 	int i;
 
 	if (strlen(name) >= MAX_NAME_LEN) {
 		log_warnx("overlong name for target \"%s\"; max length allowed "
 		    "by iSCSI specification is %d characters",
 		    name, MAX_NAME_LEN);
 		return (false);
 	}
 
 	/*
 	 * In the cases below, we don't return an error, just in case the admin
 	 * was right, and we're wrong.
 	 */
 	if (strncasecmp(name, "iqn.", strlen("iqn.")) == 0) {
 		for (i = strlen("iqn."); name[i] != '\0'; i++) {
 			/*
 			 * XXX: We should verify UTF-8 normalisation, as defined
 			 *      by 3.2.6.2: iSCSI Name Encoding.
 			 */
 			if (isalnum(name[i]))
 				continue;
 			if (name[i] == '-' || name[i] == '.' || name[i] == ':')
 				continue;
 			log_warnx("invalid character \"%c\" in target name "
 			    "\"%s\"; allowed characters are letters, digits, "
 			    "'-', '.', and ':'", name[i], name);
 			break;
 		}
 		/*
 		 * XXX: Check more stuff: valid date and a valid reversed domain.
 		 */
 	} else if (strncasecmp(name, "eui.", strlen("eui.")) == 0) {
 		if (strlen(name) != strlen("eui.") + 16)
 			log_warnx("invalid target name \"%s\"; the \"eui.\" "
 			    "should be followed by exactly 16 hexadecimal "
 			    "digits", name);
 		for (i = strlen("eui."); name[i] != '\0'; i++) {
 			if (!valid_hex(name[i])) {
 				log_warnx("invalid character \"%c\" in target "
 				    "name \"%s\"; allowed characters are 1-9 "
 				    "and A-F", name[i], name);
 				break;
 			}
 		}
 	} else if (strncasecmp(name, "naa.", strlen("naa.")) == 0) {
 		if (strlen(name) > strlen("naa.") + 32)
 			log_warnx("invalid target name \"%s\"; the \"naa.\" "
 			    "should be followed by at most 32 hexadecimal "
 			    "digits", name);
 		for (i = strlen("naa."); name[i] != '\0'; i++) {
 			if (!valid_hex(name[i])) {
 				log_warnx("invalid character \"%c\" in target "
 				    "name \"%s\"; allowed characters are 1-9 "
 				    "and A-F", name[i], name);
 				break;
 			}
 		}
 	} else {
 		log_warnx("invalid target name \"%s\"; should start with "
 		    "either \".iqn\", \"eui.\", or \"naa.\"",
 		    name);
 	}
 	return (true);
 }
 
 struct pport *
 pport_new(struct conf *conf, const char *name, uint32_t ctl_port)
 {
 	struct pport *pp;
 
 	pp = calloc(1, sizeof(*pp));
 	if (pp == NULL)
 		log_err(1, "calloc");
 	pp->pp_conf = conf;
 	pp->pp_name = checked_strdup(name);
 	pp->pp_ctl_port = ctl_port;
 	TAILQ_INIT(&pp->pp_ports);
 	TAILQ_INSERT_TAIL(&conf->conf_pports, pp, pp_next);
 	return (pp);
 }
 
 struct pport *
 pport_find(const struct conf *conf, const char *name)
 {
 	struct pport *pp;
 
 	TAILQ_FOREACH(pp, &conf->conf_pports, pp_next) {
 		if (strcasecmp(pp->pp_name, name) == 0)
 			return (pp);
 	}
 	return (NULL);
 }
 
 struct pport *
 pport_copy(struct pport *pp, struct conf *conf)
 {
 	struct pport *ppnew;
 
 	ppnew = pport_new(conf, pp->pp_name, pp->pp_ctl_port);
 	return (ppnew);
 }
 
 void
 pport_delete(struct pport *pp)
 {
 	struct port *port, *tport;
 
 	TAILQ_FOREACH_SAFE(port, &pp->pp_ports, p_ts, tport)
 		port_delete(port);
 	TAILQ_REMOVE(&pp->pp_conf->conf_pports, pp, pp_next);
 	free(pp->pp_name);
 	free(pp);
 }
 
 struct port *
 port_new(struct conf *conf, struct target *target, struct portal_group *pg)
 {
 	struct port *port;
 	char *name;
 	int ret;
 
 	ret = asprintf(&name, "%s-%s", pg->pg_name, target->t_name);
 	if (ret <= 0)
 		log_err(1, "asprintf");
 	if (port_find(conf, name) != NULL) {
 		log_warnx("duplicate port \"%s\"", name);
 		free(name);
 		return (NULL);
 	}
 	port = calloc(1, sizeof(*port));
 	if (port == NULL)
 		log_err(1, "calloc");
 	port->p_conf = conf;
 	port->p_name = name;
 	TAILQ_INSERT_TAIL(&conf->conf_ports, port, p_next);
 	TAILQ_INSERT_TAIL(&target->t_ports, port, p_ts);
 	port->p_target = target;
 	TAILQ_INSERT_TAIL(&pg->pg_ports, port, p_pgs);
 	port->p_portal_group = pg;
 	return (port);
 }
 
 struct port *
 port_new_pp(struct conf *conf, struct target *target, struct pport *pp)
 {
 	struct port *port;
 	char *name;
 	int ret;
 
 	ret = asprintf(&name, "%s-%s", pp->pp_name, target->t_name);
 	if (ret <= 0)
 		log_err(1, "asprintf");
 	if (port_find(conf, name) != NULL) {
 		log_warnx("duplicate port \"%s\"", name);
 		free(name);
 		return (NULL);
 	}
 	port = calloc(1, sizeof(*port));
 	if (port == NULL)
 		log_err(1, "calloc");
 	port->p_conf = conf;
 	port->p_name = name;
 	TAILQ_INSERT_TAIL(&conf->conf_ports, port, p_next);
 	TAILQ_INSERT_TAIL(&target->t_ports, port, p_ts);
 	port->p_target = target;
 	TAILQ_INSERT_TAIL(&pp->pp_ports, port, p_pps);
 	port->p_pport = pp;
 	return (port);
 }
 
 struct port *
 port_find(const struct conf *conf, const char *name)
 {
 	struct port *port;
 
 	TAILQ_FOREACH(port, &conf->conf_ports, p_next) {
 		if (strcasecmp(port->p_name, name) == 0)
 			return (port);
 	}
 
 	return (NULL);
 }
 
 struct port *
 port_find_in_pg(const struct portal_group *pg, const char *target)
 {
 	struct port *port;
 
 	TAILQ_FOREACH(port, &pg->pg_ports, p_pgs) {
 		if (strcasecmp(port->p_target->t_name, target) == 0)
 			return (port);
 	}
 
 	return (NULL);
 }
 
 void
 port_delete(struct port *port)
 {
 
 	if (port->p_portal_group)
 		TAILQ_REMOVE(&port->p_portal_group->pg_ports, port, p_pgs);
 	if (port->p_pport)
 		TAILQ_REMOVE(&port->p_pport->pp_ports, port, p_pps);
 	if (port->p_target)
 		TAILQ_REMOVE(&port->p_target->t_ports, port, p_ts);
 	TAILQ_REMOVE(&port->p_conf->conf_ports, port, p_next);
 	free(port->p_name);
 	free(port);
 }
 
 struct target *
 target_new(struct conf *conf, const char *name)
 {
 	struct target *targ;
 	int i, len;
 
 	targ = target_find(conf, name);
 	if (targ != NULL) {
 		log_warnx("duplicated target \"%s\"", name);
 		return (NULL);
 	}
 	if (valid_iscsi_name(name) == false) {
 		log_warnx("target name \"%s\" is invalid", name);
 		return (NULL);
 	}
 	targ = calloc(1, sizeof(*targ));
 	if (targ == NULL)
 		log_err(1, "calloc");
 	targ->t_name = checked_strdup(name);
 
 	/*
 	 * RFC 3722 requires us to normalize the name to lowercase.
 	 */
 	len = strlen(name);
 	for (i = 0; i < len; i++)
 		targ->t_name[i] = tolower(targ->t_name[i]);
 
 	targ->t_conf = conf;
 	TAILQ_INIT(&targ->t_ports);
 	TAILQ_INSERT_TAIL(&conf->conf_targets, targ, t_next);
 
 	return (targ);
 }
 
 void
 target_delete(struct target *targ)
 {
 	struct port *port, *tport;
 
 	TAILQ_FOREACH_SAFE(port, &targ->t_ports, p_ts, tport)
 		port_delete(port);
 	TAILQ_REMOVE(&targ->t_conf->conf_targets, targ, t_next);
 
 	free(targ->t_name);
 	free(targ->t_redirection);
 	free(targ);
 }
 
 struct target *
 target_find(struct conf *conf, const char *name)
 {
 	struct target *targ;
 
 	TAILQ_FOREACH(targ, &conf->conf_targets, t_next) {
 		if (strcasecmp(targ->t_name, name) == 0)
 			return (targ);
 	}
 
 	return (NULL);
 }
 
 int
 target_set_redirection(struct target *target, const char *addr)
 {
 
 	if (target->t_redirection != NULL) {
 		log_warnx("cannot set redirection to \"%s\" for "
 		    "target \"%s\"; already defined",
 		    addr, target->t_name);
 		return (1);
 	}
 
 	target->t_redirection = checked_strdup(addr);
 
 	return (0);
 }
 
 struct lun *
 lun_new(struct conf *conf, const char *name)
 {
 	struct lun *lun;
 
 	lun = lun_find(conf, name);
 	if (lun != NULL) {
 		log_warnx("duplicated lun \"%s\"", name);
 		return (NULL);
 	}
 
 	lun = calloc(1, sizeof(*lun));
 	if (lun == NULL)
 		log_err(1, "calloc");
 	lun->l_conf = conf;
 	lun->l_name = checked_strdup(name);
 	TAILQ_INIT(&lun->l_options);
 	TAILQ_INSERT_TAIL(&conf->conf_luns, lun, l_next);
 
 	return (lun);
 }
 
 void
 lun_delete(struct lun *lun)
 {
 	struct target *targ;
 	struct lun_option *lo, *tmp;
 	int i;
 
 	TAILQ_FOREACH(targ, &lun->l_conf->conf_targets, t_next) {
 		for (i = 0; i < MAX_LUNS; i++) {
 			if (targ->t_luns[i] == lun)
 				targ->t_luns[i] = NULL;
 		}
 	}
 	TAILQ_REMOVE(&lun->l_conf->conf_luns, lun, l_next);
 
 	TAILQ_FOREACH_SAFE(lo, &lun->l_options, lo_next, tmp)
 		lun_option_delete(lo);
 	free(lun->l_name);
 	free(lun->l_backend);
 	free(lun->l_device_id);
 	free(lun->l_path);
 	free(lun->l_scsiname);
 	free(lun->l_serial);
 	free(lun);
 }
 
 struct lun *
 lun_find(const struct conf *conf, const char *name)
 {
 	struct lun *lun;
 
 	TAILQ_FOREACH(lun, &conf->conf_luns, l_next) {
 		if (strcmp(lun->l_name, name) == 0)
 			return (lun);
 	}
 
 	return (NULL);
 }
 
 void
 lun_set_backend(struct lun *lun, const char *value)
 {
 	free(lun->l_backend);
 	lun->l_backend = checked_strdup(value);
 }
 
 void
 lun_set_blocksize(struct lun *lun, size_t value)
 {
 
 	lun->l_blocksize = value;
 }
 
 void
 lun_set_device_id(struct lun *lun, const char *value)
 {
 	free(lun->l_device_id);
 	lun->l_device_id = checked_strdup(value);
 }
 
 void
 lun_set_path(struct lun *lun, const char *value)
 {
 	free(lun->l_path);
 	lun->l_path = checked_strdup(value);
 }
 
 void
 lun_set_scsiname(struct lun *lun, const char *value)
 {
 	free(lun->l_scsiname);
 	lun->l_scsiname = checked_strdup(value);
 }
 
 void
 lun_set_serial(struct lun *lun, const char *value)
 {
 	free(lun->l_serial);
 	lun->l_serial = checked_strdup(value);
 }
 
 void
 lun_set_size(struct lun *lun, size_t value)
 {
 
 	lun->l_size = value;
 }
 
 void
 lun_set_ctl_lun(struct lun *lun, uint32_t value)
 {
 
 	lun->l_ctl_lun = value;
 }
 
 struct lun_option *
 lun_option_new(struct lun *lun, const char *name, const char *value)
 {
 	struct lun_option *lo;
 
 	lo = lun_option_find(lun, name);
 	if (lo != NULL) {
 		log_warnx("duplicated lun option \"%s\" for lun \"%s\"",
 		    name, lun->l_name);
 		return (NULL);
 	}
 
 	lo = calloc(1, sizeof(*lo));
 	if (lo == NULL)
 		log_err(1, "calloc");
 	lo->lo_name = checked_strdup(name);
 	lo->lo_value = checked_strdup(value);
 	lo->lo_lun = lun;
 	TAILQ_INSERT_TAIL(&lun->l_options, lo, lo_next);
 
 	return (lo);
 }
 
 void
 lun_option_delete(struct lun_option *lo)
 {
 
 	TAILQ_REMOVE(&lo->lo_lun->l_options, lo, lo_next);
 
 	free(lo->lo_name);
 	free(lo->lo_value);
 	free(lo);
 }
 
 struct lun_option *
 lun_option_find(const struct lun *lun, const char *name)
 {
 	struct lun_option *lo;
 
 	TAILQ_FOREACH(lo, &lun->l_options, lo_next) {
 		if (strcmp(lo->lo_name, name) == 0)
 			return (lo);
 	}
 
 	return (NULL);
 }
 
 void
 lun_option_set(struct lun_option *lo, const char *value)
 {
 
 	free(lo->lo_value);
 	lo->lo_value = checked_strdup(value);
 }
 
 static struct connection *
 connection_new(struct portal *portal, int fd, const char *host,
     const struct sockaddr *client_sa)
 {
 	struct connection *conn;
 
 	conn = calloc(1, sizeof(*conn));
 	if (conn == NULL)
 		log_err(1, "calloc");
 	conn->conn_portal = portal;
 	conn->conn_socket = fd;
 	conn->conn_initiator_addr = checked_strdup(host);
 	memcpy(&conn->conn_initiator_sa, client_sa, client_sa->sa_len);
 
 	/*
 	 * Default values, from RFC 3720, section 12.
 	 */
 	conn->conn_max_data_segment_length = 8192;
 	conn->conn_max_burst_length = 262144;
 	conn->conn_immediate_data = true;
 
 	return (conn);
 }
 
 #if 0
 static void
 conf_print(struct conf *conf)
 {
 	struct auth_group *ag;
 	struct auth *auth;
 	struct auth_name *auth_name;
 	struct auth_portal *auth_portal;
 	struct portal_group *pg;
 	struct portal *portal;
 	struct target *targ;
 	struct lun *lun;
 	struct lun_option *lo;
 
 	TAILQ_FOREACH(ag, &conf->conf_auth_groups, ag_next) {
 		fprintf(stderr, "auth-group %s {\n", ag->ag_name);
 		TAILQ_FOREACH(auth, &ag->ag_auths, a_next)
 			fprintf(stderr, "\t chap-mutual %s %s %s %s\n",
 			    auth->a_user, auth->a_secret,
 			    auth->a_mutual_user, auth->a_mutual_secret);
 		TAILQ_FOREACH(auth_name, &ag->ag_names, an_next)
 			fprintf(stderr, "\t initiator-name %s\n",
 			    auth_name->an_initator_name);
 		TAILQ_FOREACH(auth_portal, &ag->ag_portals, an_next)
 			fprintf(stderr, "\t initiator-portal %s\n",
 			    auth_portal->an_initator_portal);
 		fprintf(stderr, "}\n");
 	}
 	TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 		fprintf(stderr, "portal-group %s {\n", pg->pg_name);
 		TAILQ_FOREACH(portal, &pg->pg_portals, p_next)
 			fprintf(stderr, "\t listen %s\n", portal->p_listen);
 		fprintf(stderr, "}\n");
 	}
 	TAILQ_FOREACH(lun, &conf->conf_luns, l_next) {
 		fprintf(stderr, "\tlun %s {\n", lun->l_name);
 		fprintf(stderr, "\t\tpath %s\n", lun->l_path);
 		TAILQ_FOREACH(lo, &lun->l_options, lo_next)
 			fprintf(stderr, "\t\toption %s %s\n",
 			    lo->lo_name, lo->lo_value);
 		fprintf(stderr, "\t}\n");
 	}
 	TAILQ_FOREACH(targ, &conf->conf_targets, t_next) {
 		fprintf(stderr, "target %s {\n", targ->t_name);
 		if (targ->t_alias != NULL)
 			fprintf(stderr, "\t alias %s\n", targ->t_alias);
 		fprintf(stderr, "}\n");
 	}
 }
 #endif
 
 static int
 conf_verify_lun(struct lun *lun)
 {
 	const struct lun *lun2;
 
 	if (lun->l_backend == NULL)
 		lun_set_backend(lun, "block");
 	if (strcmp(lun->l_backend, "block") == 0) {
 		if (lun->l_path == NULL) {
 			log_warnx("missing path for lun \"%s\"",
 			    lun->l_name);
 			return (1);
 		}
 	} else if (strcmp(lun->l_backend, "ramdisk") == 0) {
 		if (lun->l_size == 0) {
 			log_warnx("missing size for ramdisk-backed lun \"%s\"",
 			    lun->l_name);
 			return (1);
 		}
 		if (lun->l_path != NULL) {
 			log_warnx("path must not be specified "
 			    "for ramdisk-backed lun \"%s\"",
 			    lun->l_name);
 			return (1);
 		}
 	}
 	if (lun->l_blocksize == 0) {
 		lun_set_blocksize(lun, DEFAULT_BLOCKSIZE);
 	} else if (lun->l_blocksize < 0) {
 		log_warnx("invalid blocksize for lun \"%s\"; "
 		    "must be larger than 0", lun->l_name);
 		return (1);
 	}
 	if (lun->l_size != 0 && lun->l_size % lun->l_blocksize != 0) {
 		log_warnx("invalid size for lun \"%s\"; "
 		    "must be multiple of blocksize", lun->l_name);
 		return (1);
 	}
 	TAILQ_FOREACH(lun2, &lun->l_conf->conf_luns, l_next) {
 		if (lun == lun2)
 			continue;
 		if (lun->l_path != NULL && lun2->l_path != NULL &&
 		    strcmp(lun->l_path, lun2->l_path) == 0) {
 			log_debugx("WARNING: path \"%s\" duplicated "
 			    "between lun \"%s\", and "
 			    "lun \"%s\"", lun->l_path,
 			    lun->l_name, lun2->l_name);
 		}
 	}
 
 	return (0);
 }
 
 int
 conf_verify(struct conf *conf)
 {
 	struct auth_group *ag;
 	struct portal_group *pg;
 	struct port *port;
 	struct target *targ;
 	struct lun *lun;
 	bool found;
 	int error, i;
 
 	if (conf->conf_pidfile_path == NULL)
 		conf->conf_pidfile_path = checked_strdup(DEFAULT_PIDFILE);
 
 	TAILQ_FOREACH(lun, &conf->conf_luns, l_next) {
 		error = conf_verify_lun(lun);
 		if (error != 0)
 			return (error);
 	}
 	TAILQ_FOREACH(targ, &conf->conf_targets, t_next) {
 		if (targ->t_auth_group == NULL) {
 			targ->t_auth_group = auth_group_find(conf,
 			    "default");
 			assert(targ->t_auth_group != NULL);
 		}
 		if (TAILQ_EMPTY(&targ->t_ports)) {
 			pg = portal_group_find(conf, "default");
 			assert(pg != NULL);
 			port_new(conf, targ, pg);
 		}
 		found = false;
 		for (i = 0; i < MAX_LUNS; i++) {
 			if (targ->t_luns[i] != NULL)
 				found = true;
 		}
 		if (!found && targ->t_redirection == NULL) {
 			log_warnx("no LUNs defined for target \"%s\"",
 			    targ->t_name);
 		}
 		if (found && targ->t_redirection != NULL) {
 			log_debugx("target \"%s\" contains luns, "
 			    " but configured for redirection",
 			    targ->t_name);
 		}
 	}
 	TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 		assert(pg->pg_name != NULL);
 		if (pg->pg_discovery_auth_group == NULL) {
 			pg->pg_discovery_auth_group =
 			    auth_group_find(conf, "default");
 			assert(pg->pg_discovery_auth_group != NULL);
 		}
 
 		if (pg->pg_discovery_filter == PG_FILTER_UNKNOWN)
 			pg->pg_discovery_filter = PG_FILTER_NONE;
 
 		if (!TAILQ_EMPTY(&pg->pg_ports)) {
 			if (pg->pg_redirection != NULL) {
 				log_debugx("portal-group \"%s\" assigned "
 				    "to target, but configured "
 				    "for redirection",
 				    pg->pg_name);
 			}
 			pg->pg_unassigned = false;
 		} else {
 			if (strcmp(pg->pg_name, "default") != 0)
 				log_warnx("portal-group \"%s\" not assigned "
 				    "to any target", pg->pg_name);
 			pg->pg_unassigned = true;
 		}
 	}
 	TAILQ_FOREACH(ag, &conf->conf_auth_groups, ag_next) {
 		if (ag->ag_name == NULL)
 			assert(ag->ag_target != NULL);
 		else
 			assert(ag->ag_target == NULL);
 
 		found = false;
 		TAILQ_FOREACH(targ, &conf->conf_targets, t_next) {
 			if (targ->t_auth_group == ag) {
 				found = true;
 				break;
 			}
 		}
 		TAILQ_FOREACH(port, &conf->conf_ports, p_next) {
 			if (port->p_auth_group == ag) {
 				found = true;
 				break;
 			}
 		}
 		TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 			if (pg->pg_discovery_auth_group == ag) {
 				found = true;
 				break;
 			}
 		}
 		if (!found && ag->ag_name != NULL &&
 		    strcmp(ag->ag_name, "default") != 0 &&
 		    strcmp(ag->ag_name, "no-authentication") != 0 &&
 		    strcmp(ag->ag_name, "no-access") != 0) {
 			log_warnx("auth-group \"%s\" not assigned "
 			    "to any target", ag->ag_name);
 		}
 	}
 
 	return (0);
 }
 
 static int
 conf_apply(struct conf *oldconf, struct conf *newconf)
 {
 	struct lun *oldlun, *newlun, *tmplun;
 	struct portal_group *oldpg, *newpg;
 	struct portal *oldp, *newp;
 	struct port *oldport, *newport, *tmpport;
 	struct isns *oldns, *newns;
 	pid_t otherpid;
 	int changed, cumulated_error = 0, error, sockbuf;
 	int one = 1;
 
 	if (oldconf->conf_debug != newconf->conf_debug) {
 		log_debugx("changing debug level to %d", newconf->conf_debug);
 		log_init(newconf->conf_debug);
 	}
 
 	if (oldconf->conf_pidfh != NULL) {
 		assert(oldconf->conf_pidfile_path != NULL);
 		if (newconf->conf_pidfile_path != NULL &&
 		    strcmp(oldconf->conf_pidfile_path,
 		    newconf->conf_pidfile_path) == 0) {
 			newconf->conf_pidfh = oldconf->conf_pidfh;
 			oldconf->conf_pidfh = NULL;
 		} else {
 			log_debugx("removing pidfile %s",
 			    oldconf->conf_pidfile_path);
 			pidfile_remove(oldconf->conf_pidfh);
 			oldconf->conf_pidfh = NULL;
 		}
 	}
 
 	if (newconf->conf_pidfh == NULL && newconf->conf_pidfile_path != NULL) {
 		log_debugx("opening pidfile %s", newconf->conf_pidfile_path);
 		newconf->conf_pidfh =
 		    pidfile_open(newconf->conf_pidfile_path, 0600, &otherpid);
 		if (newconf->conf_pidfh == NULL) {
 			if (errno == EEXIST)
 				log_errx(1, "daemon already running, pid: %jd.",
 				    (intmax_t)otherpid);
 			log_err(1, "cannot open or create pidfile \"%s\"",
 			    newconf->conf_pidfile_path);
 		}
 	}
 
 	/*
 	 * Go through the new portal groups, assigning tags or preserving old.
 	 */
 	TAILQ_FOREACH(newpg, &newconf->conf_portal_groups, pg_next) {
 		oldpg = portal_group_find(oldconf, newpg->pg_name);
 		if (oldpg != NULL)
 			newpg->pg_tag = oldpg->pg_tag;
 		else
 			newpg->pg_tag = ++last_portal_group_tag;
 	}
 
 	/* Deregister on removed iSNS servers. */
 	TAILQ_FOREACH(oldns, &oldconf->conf_isns, i_next) {
 		TAILQ_FOREACH(newns, &newconf->conf_isns, i_next) {
 			if (strcmp(oldns->i_addr, newns->i_addr) == 0)
 				break;
 		}
 		if (newns == NULL)
 			isns_deregister(oldns);
 	}
 
 	/*
 	 * XXX: If target or lun removal fails, we should somehow "move"
 	 *      the old lun or target into newconf, so that subsequent
 	 *      conf_apply() would try to remove them again.  That would
 	 *      be somewhat hairy, though, and lun deletion failures don't
 	 *      really happen, so leave it as it is for now.
 	 */
 	/*
 	 * First, remove any ports present in the old configuration
 	 * and missing in the new one.
 	 */
 	TAILQ_FOREACH_SAFE(oldport, &oldconf->conf_ports, p_next, tmpport) {
 		newport = port_find(newconf, oldport->p_name);
 		if (newport != NULL)
 			continue;
 		log_debugx("removing port \"%s\"", oldport->p_name);
 		error = kernel_port_remove(oldport);
 		if (error != 0) {
 			log_warnx("failed to remove port %s",
 			    oldport->p_name);
 			/*
 			 * XXX: Uncomment after fixing the root cause.
 			 *
 			 * cumulated_error++;
 			 */
 		}
 	}
 
 	/*
 	 * Second, remove any LUNs present in the old configuration
 	 * and missing in the new one.
 	 */
 	TAILQ_FOREACH_SAFE(oldlun, &oldconf->conf_luns, l_next, tmplun) {
 		newlun = lun_find(newconf, oldlun->l_name);
 		if (newlun == NULL) {
 			log_debugx("lun \"%s\", CTL lun %d "
 			    "not found in new configuration; "
 			    "removing", oldlun->l_name, oldlun->l_ctl_lun);
 			error = kernel_lun_remove(oldlun);
 			if (error != 0) {
 				log_warnx("failed to remove lun \"%s\", "
 				    "CTL lun %d",
 				    oldlun->l_name, oldlun->l_ctl_lun);
 				cumulated_error++;
 			}
 			continue;
 		}
 
 		/*
 		 * Also remove the LUNs changed by more than size.
 		 */
 		changed = 0;
 		assert(oldlun->l_backend != NULL);
 		assert(newlun->l_backend != NULL);
 		if (strcmp(newlun->l_backend, oldlun->l_backend) != 0) {
 			log_debugx("backend for lun \"%s\", "
 			    "CTL lun %d changed; removing",
 			    oldlun->l_name, oldlun->l_ctl_lun);
 			changed = 1;
 		}
 		if (oldlun->l_blocksize != newlun->l_blocksize) {
 			log_debugx("blocksize for lun \"%s\", "
 			    "CTL lun %d changed; removing",
 			    oldlun->l_name, oldlun->l_ctl_lun);
 			changed = 1;
 		}
 		if (newlun->l_device_id != NULL &&
 		    (oldlun->l_device_id == NULL ||
 		     strcmp(oldlun->l_device_id, newlun->l_device_id) !=
 		     0)) {
 			log_debugx("device-id for lun \"%s\", "
 			    "CTL lun %d changed; removing",
 			    oldlun->l_name, oldlun->l_ctl_lun);
 			changed = 1;
 		}
 		if (newlun->l_path != NULL &&
 		    (oldlun->l_path == NULL ||
 		     strcmp(oldlun->l_path, newlun->l_path) != 0)) {
 			log_debugx("path for lun \"%s\", "
 			    "CTL lun %d, changed; removing",
 			    oldlun->l_name, oldlun->l_ctl_lun);
 			changed = 1;
 		}
 		if (newlun->l_serial != NULL &&
 		    (oldlun->l_serial == NULL ||
 		     strcmp(oldlun->l_serial, newlun->l_serial) != 0)) {
 			log_debugx("serial for lun \"%s\", "
 			    "CTL lun %d changed; removing",
 			    oldlun->l_name, oldlun->l_ctl_lun);
 			changed = 1;
 		}
 		if (changed) {
 			error = kernel_lun_remove(oldlun);
 			if (error != 0) {
 				log_warnx("failed to remove lun \"%s\", "
 				    "CTL lun %d",
 				    oldlun->l_name, oldlun->l_ctl_lun);
 				cumulated_error++;
 			}
 			lun_delete(oldlun);
 			continue;
 		}
 
 		lun_set_ctl_lun(newlun, oldlun->l_ctl_lun);
 	}
 
 	TAILQ_FOREACH_SAFE(newlun, &newconf->conf_luns, l_next, tmplun) {
 		oldlun = lun_find(oldconf, newlun->l_name);
 		if (oldlun != NULL) {
 			if (newlun->l_size != oldlun->l_size ||
 			    newlun->l_size == 0) {
 				log_debugx("resizing lun \"%s\", CTL lun %d",
 				    newlun->l_name, newlun->l_ctl_lun);
 				error = kernel_lun_resize(newlun);
 				if (error != 0) {
 					log_warnx("failed to "
 					    "resize lun \"%s\", CTL lun %d",
 					    newlun->l_name,
 					    newlun->l_ctl_lun);
 					cumulated_error++;
 				}
 			}
 			continue;
 		}
 		log_debugx("adding lun \"%s\"", newlun->l_name);
 		error = kernel_lun_add(newlun);
 		if (error != 0) {
 			log_warnx("failed to add lun \"%s\"", newlun->l_name);
 			lun_delete(newlun);
 			cumulated_error++;
 		}
 	}
 
 	/*
 	 * Now add new ports or modify existing ones.
 	 */
 	TAILQ_FOREACH(newport, &newconf->conf_ports, p_next) {
 		oldport = port_find(oldconf, newport->p_name);
 
 		if (oldport == NULL) {
 			log_debugx("adding port \"%s\"", newport->p_name);
 			error = kernel_port_add(newport);
 		} else {
 			log_debugx("updating port \"%s\"", newport->p_name);
 			newport->p_ctl_port = oldport->p_ctl_port;
 			error = kernel_port_update(newport);
 		}
 		if (error != 0) {
 			log_warnx("failed to %s port %s",
 			    (oldport == NULL) ? "add" : "update",
 			    newport->p_name);
 			/*
 			 * XXX: Uncomment after fixing the root cause.
 			 *
 			 * cumulated_error++;
 			 */
 		}
 	}
 
 	/*
 	 * Go through the new portals, opening the sockets as neccessary.
 	 */
 	TAILQ_FOREACH(newpg, &newconf->conf_portal_groups, pg_next) {
 		if (newpg->pg_unassigned) {
 			log_debugx("not listening on portal-group \"%s\", "
 			    "not assigned to any target",
 			    newpg->pg_name);
 			continue;
 		}
 		TAILQ_FOREACH(newp, &newpg->pg_portals, p_next) {
 			/*
 			 * Try to find already open portal and reuse
 			 * the listening socket.  We don't care about
 			 * what portal or portal group that was, what
 			 * matters is the listening address.
 			 */
 			TAILQ_FOREACH(oldpg, &oldconf->conf_portal_groups,
 			    pg_next) {
 				TAILQ_FOREACH(oldp, &oldpg->pg_portals,
 				    p_next) {
 					if (strcmp(newp->p_listen,
 					    oldp->p_listen) == 0 &&
 					    oldp->p_socket > 0) {
 						newp->p_socket =
 						    oldp->p_socket;
 						oldp->p_socket = 0;
 						break;
 					}
 				}
 			}
 			if (newp->p_socket > 0) {
 				/*
 				 * We're done with this portal.
 				 */
 				continue;
 			}
 
 #ifdef ICL_KERNEL_PROXY
 			if (proxy_mode) {
 				newpg->pg_conf->conf_portal_id++;
 				newp->p_id = newpg->pg_conf->conf_portal_id;
 				log_debugx("listening on %s, portal-group "
 				    "\"%s\", portal id %d, using ICL proxy",
 				    newp->p_listen, newpg->pg_name, newp->p_id);
 				kernel_listen(newp->p_ai, newp->p_iser,
 				    newp->p_id);
 				continue;
 			}
 #endif
 			assert(proxy_mode == false);
 			assert(newp->p_iser == false);
 
 			log_debugx("listening on %s, portal-group \"%s\"",
 			    newp->p_listen, newpg->pg_name);
 			newp->p_socket = socket(newp->p_ai->ai_family,
 			    newp->p_ai->ai_socktype,
 			    newp->p_ai->ai_protocol);
 			if (newp->p_socket < 0) {
 				log_warn("socket(2) failed for %s",
 				    newp->p_listen);
 				cumulated_error++;
 				continue;
 			}
 			sockbuf = SOCKBUF_SIZE;
 			if (setsockopt(newp->p_socket, SOL_SOCKET, SO_RCVBUF,
 			    &sockbuf, sizeof(sockbuf)) == -1)
 				log_warn("setsockopt(SO_RCVBUF) failed "
 				    "for %s", newp->p_listen);
 			sockbuf = SOCKBUF_SIZE;
 			if (setsockopt(newp->p_socket, SOL_SOCKET, SO_SNDBUF,
 			    &sockbuf, sizeof(sockbuf)) == -1)
 				log_warn("setsockopt(SO_SNDBUF) failed "
 				    "for %s", newp->p_listen);
 			error = setsockopt(newp->p_socket, SOL_SOCKET,
 			    SO_REUSEADDR, &one, sizeof(one));
 			if (error != 0) {
 				log_warn("setsockopt(SO_REUSEADDR) failed "
 				    "for %s", newp->p_listen);
 				close(newp->p_socket);
 				newp->p_socket = 0;
 				cumulated_error++;
 				continue;
 			}
 			error = bind(newp->p_socket, newp->p_ai->ai_addr,
 			    newp->p_ai->ai_addrlen);
 			if (error != 0) {
 				log_warn("bind(2) failed for %s",
 				    newp->p_listen);
 				close(newp->p_socket);
 				newp->p_socket = 0;
 				cumulated_error++;
 				continue;
 			}
 			error = listen(newp->p_socket, -1);
 			if (error != 0) {
 				log_warn("listen(2) failed for %s",
 				    newp->p_listen);
 				close(newp->p_socket);
 				newp->p_socket = 0;
 				cumulated_error++;
 				continue;
 			}
 		}
 	}
 
 	/*
 	 * Go through the no longer used sockets, closing them.
 	 */
 	TAILQ_FOREACH(oldpg, &oldconf->conf_portal_groups, pg_next) {
 		TAILQ_FOREACH(oldp, &oldpg->pg_portals, p_next) {
 			if (oldp->p_socket <= 0)
 				continue;
 			log_debugx("closing socket for %s, portal-group \"%s\"",
 			    oldp->p_listen, oldpg->pg_name);
 			close(oldp->p_socket);
 			oldp->p_socket = 0;
 		}
 	}
 
 	/* (Re-)Register on remaining/new iSNS servers. */
 	TAILQ_FOREACH(newns, &newconf->conf_isns, i_next) {
 		TAILQ_FOREACH(oldns, &oldconf->conf_isns, i_next) {
 			if (strcmp(oldns->i_addr, newns->i_addr) == 0)
 				break;
 		}
 		isns_register(newns, oldns);
 	}
 
 	/* Schedule iSNS update */
 	if (!TAILQ_EMPTY(&newconf->conf_isns))
 		set_timeout((newconf->conf_isns_period + 2) / 3, false);
 
 	return (cumulated_error);
 }
 
 bool
 timed_out(void)
 {
 
 	return (sigalrm_received);
 }
 
 static void
 sigalrm_handler_fatal(int dummy __unused)
 {
 	/*
 	 * It would be easiest to just log an error and exit.  We can't
 	 * do this, though, because log_errx() is not signal safe, since
 	 * it calls syslog(3).  Instead, set a flag checked by pdu_send()
 	 * and pdu_receive(), to call log_errx() there.  Should they fail
 	 * to notice, we'll exit here one second later.
 	 */
 	if (sigalrm_received) {
 		/*
 		 * Oh well.  Just give up and quit.
 		 */
 		_exit(2);
 	}
 
 	sigalrm_received = true;
 }
 
 static void
 sigalrm_handler(int dummy __unused)
 {
 
 	sigalrm_received = true;
 }
 
 void
 set_timeout(int timeout, int fatal)
 {
 	struct sigaction sa;
 	struct itimerval itv;
 	int error;
 
 	if (timeout <= 0) {
 		log_debugx("session timeout disabled");
 		bzero(&itv, sizeof(itv));
 		error = setitimer(ITIMER_REAL, &itv, NULL);
 		if (error != 0)
 			log_err(1, "setitimer");
 		sigalrm_received = false;
 		return;
 	}
 
 	sigalrm_received = false;
 	bzero(&sa, sizeof(sa));
 	if (fatal)
 		sa.sa_handler = sigalrm_handler_fatal;
 	else
 		sa.sa_handler = sigalrm_handler;
 	sigfillset(&sa.sa_mask);
 	error = sigaction(SIGALRM, &sa, NULL);
 	if (error != 0)
 		log_err(1, "sigaction");
 
 	/*
 	 * First SIGALRM will arive after conf_timeout seconds.
 	 * If we do nothing, another one will arrive a second later.
 	 */
 	log_debugx("setting session timeout to %d seconds", timeout);
 	bzero(&itv, sizeof(itv));
 	itv.it_interval.tv_sec = 1;
 	itv.it_value.tv_sec = timeout;
 	error = setitimer(ITIMER_REAL, &itv, NULL);
 	if (error != 0)
 		log_err(1, "setitimer");
 }
 
 static int
 wait_for_children(bool block)
 {
 	pid_t pid;
 	int status;
 	int num = 0;
 
 	for (;;) {
 		/*
 		 * If "block" is true, wait for at least one process.
 		 */
 		if (block && num == 0)
 			pid = wait4(-1, &status, 0, NULL);
 		else
 			pid = wait4(-1, &status, WNOHANG, NULL);
 		if (pid <= 0)
 			break;
 		if (WIFSIGNALED(status)) {
 			log_warnx("child process %d terminated with signal %d",
 			    pid, WTERMSIG(status));
 		} else if (WEXITSTATUS(status) != 0) {
 			log_warnx("child process %d terminated with exit status %d",
 			    pid, WEXITSTATUS(status));
 		} else {
 			log_debugx("child process %d terminated gracefully", pid);
 		}
 		num++;
 	}
 
 	return (num);
 }
 
 static void
 handle_connection(struct portal *portal, int fd,
     const struct sockaddr *client_sa, bool dont_fork)
 {
 	struct connection *conn;
 	int error;
 	pid_t pid;
 	char host[NI_MAXHOST + 1];
 	struct conf *conf;
 
 	conf = portal->p_portal_group->pg_conf;
 
 	if (dont_fork) {
 		log_debugx("incoming connection; not forking due to -d flag");
 	} else {
 		nchildren -= wait_for_children(false);
 		assert(nchildren >= 0);
 
 		while (conf->conf_maxproc > 0 && nchildren >= conf->conf_maxproc) {
 			log_debugx("maxproc limit of %d child processes hit; "
 			    "waiting for child process to exit", conf->conf_maxproc);
 			nchildren -= wait_for_children(true);
 			assert(nchildren >= 0);
 		}
 		log_debugx("incoming connection; forking child process #%d",
 		    nchildren);
 		nchildren++;
 		pid = fork();
 		if (pid < 0)
 			log_err(1, "fork");
 		if (pid > 0) {
 			close(fd);
 			return;
 		}
 	}
 	pidfile_close(conf->conf_pidfh);
 
 	error = getnameinfo(client_sa, client_sa->sa_len,
 	    host, sizeof(host), NULL, 0, NI_NUMERICHOST);
 	if (error != 0)
 		log_errx(1, "getnameinfo: %s", gai_strerror(error));
 
 	log_debugx("accepted connection from %s; portal group \"%s\"",
 	    host, portal->p_portal_group->pg_name);
 	log_set_peer_addr(host);
 	setproctitle("%s", host);
 
 	conn = connection_new(portal, fd, host, client_sa);
 	set_timeout(conf->conf_timeout, true);
 	kernel_capsicate();
 	login(conn);
 	if (conn->conn_session_type == CONN_SESSION_TYPE_NORMAL) {
 		kernel_handoff(conn);
 		log_debugx("connection handed off to the kernel");
 	} else {
 		assert(conn->conn_session_type == CONN_SESSION_TYPE_DISCOVERY);
 		discovery(conn);
 	}
 	log_debugx("nothing more to do; exiting");
 	exit(0);
 }
 
 static int
 fd_add(int fd, fd_set *fdset, int nfds)
 {
 
 	/*
 	 * Skip sockets which we failed to bind.
 	 */
 	if (fd <= 0)
 		return (nfds);
 
 	FD_SET(fd, fdset);
 	if (fd > nfds)
 		nfds = fd;
 	return (nfds);
 }
 
 static void
 main_loop(struct conf *conf, bool dont_fork)
 {
 	struct portal_group *pg;
 	struct portal *portal;
 	struct sockaddr_storage client_sa;
 	socklen_t client_salen;
 #ifdef ICL_KERNEL_PROXY
 	int connection_id;
 	int portal_id;
 #endif
 	fd_set fdset;
 	int error, nfds, client_fd;
 
 	pidfile_write(conf->conf_pidfh);
 
 	for (;;) {
 		if (sighup_received || sigterm_received || timed_out())
 			return;
 
 #ifdef ICL_KERNEL_PROXY
 		if (proxy_mode) {
 			client_salen = sizeof(client_sa);
 			kernel_accept(&connection_id, &portal_id,
 			    (struct sockaddr *)&client_sa, &client_salen);
 			assert(client_salen >= client_sa.ss_len);
 
 			log_debugx("incoming connection, id %d, portal id %d",
 			    connection_id, portal_id);
 			TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 				TAILQ_FOREACH(portal, &pg->pg_portals, p_next) {
 					if (portal->p_id == portal_id) {
 						goto found;
 					}
 				}
 			}
 
 			log_errx(1, "kernel returned invalid portal_id %d",
 			    portal_id);
 
 found:
 			handle_connection(portal, connection_id,
 			    (struct sockaddr *)&client_sa, dont_fork);
 		} else {
 #endif
 			assert(proxy_mode == false);
 
 			FD_ZERO(&fdset);
 			nfds = 0;
 			TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 				TAILQ_FOREACH(portal, &pg->pg_portals, p_next)
 					nfds = fd_add(portal->p_socket, &fdset, nfds);
 			}
 			error = select(nfds + 1, &fdset, NULL, NULL, NULL);
 			if (error <= 0) {
 				if (errno == EINTR)
 					return;
 				log_err(1, "select");
 			}
 			TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 				TAILQ_FOREACH(portal, &pg->pg_portals, p_next) {
 					if (!FD_ISSET(portal->p_socket, &fdset))
 						continue;
 					client_salen = sizeof(client_sa);
 					client_fd = accept(portal->p_socket,
 					    (struct sockaddr *)&client_sa,
 					    &client_salen);
-					if (client_fd < 0)
+					if (client_fd < 0) {
+						if (errno == ECONNABORTED)
+							continue;
 						log_err(1, "accept");
+					}
 					assert(client_salen >= client_sa.ss_len);
 
 					handle_connection(portal, client_fd,
 					    (struct sockaddr *)&client_sa,
 					    dont_fork);
 					break;
 				}
 			}
 #ifdef ICL_KERNEL_PROXY
 		}
 #endif
 	}
 }
 
 static void
 sighup_handler(int dummy __unused)
 {
 
 	sighup_received = true;
 }
 
 static void
 sigterm_handler(int dummy __unused)
 {
 
 	sigterm_received = true;
 }
 
 static void
 sigchld_handler(int dummy __unused)
 {
 
 	/*
 	 * The only purpose of this handler is to make SIGCHLD
 	 * interrupt the ISCSIDWAIT ioctl(2), so we can call
 	 * wait_for_children().
 	 */
 }
 
 static void
 register_signals(void)
 {
 	struct sigaction sa;
 	int error;
 
 	bzero(&sa, sizeof(sa));
 	sa.sa_handler = sighup_handler;
 	sigfillset(&sa.sa_mask);
 	error = sigaction(SIGHUP, &sa, NULL);
 	if (error != 0)
 		log_err(1, "sigaction");
 
 	sa.sa_handler = sigterm_handler;
 	error = sigaction(SIGTERM, &sa, NULL);
 	if (error != 0)
 		log_err(1, "sigaction");
 
 	sa.sa_handler = sigterm_handler;
 	error = sigaction(SIGINT, &sa, NULL);
 	if (error != 0)
 		log_err(1, "sigaction");
 
 	sa.sa_handler = sigchld_handler;
 	error = sigaction(SIGCHLD, &sa, NULL);
 	if (error != 0)
 		log_err(1, "sigaction");
 }
 
 int
 main(int argc, char **argv)
 {
 	struct conf *oldconf, *newconf, *tmpconf;
 	struct isns *newns;
 	const char *config_path = DEFAULT_CONFIG_PATH;
 	int debug = 0, ch, error;
 	bool dont_daemonize = false;
 
 	while ((ch = getopt(argc, argv, "df:R")) != -1) {
 		switch (ch) {
 		case 'd':
 			dont_daemonize = true;
 			debug++;
 			break;
 		case 'f':
 			config_path = optarg;
 			break;
 		case 'R':
 #ifndef ICL_KERNEL_PROXY
 			log_errx(1, "ctld(8) compiled without ICL_KERNEL_PROXY "
 			    "does not support iSER protocol");
 #endif
 			proxy_mode = true;
 			break;
 		case '?':
 		default:
 			usage();
 		}
 	}
 	argc -= optind;
 	if (argc != 0)
 		usage();
 
 	log_init(debug);
 	kernel_init();
 
 	oldconf = conf_new_from_kernel();
 	newconf = conf_new_from_file(config_path, oldconf);
 	if (newconf == NULL)
 		log_errx(1, "configuration error; exiting");
 	if (debug > 0) {
 		oldconf->conf_debug = debug;
 		newconf->conf_debug = debug;
 	}
 
 	error = conf_apply(oldconf, newconf);
 	if (error != 0)
 		log_errx(1, "failed to apply configuration; exiting");
 
 	conf_delete(oldconf);
 	oldconf = NULL;
 
 	register_signals();
 
 	if (dont_daemonize == false) {
 		log_debugx("daemonizing");
 		if (daemon(0, 0) == -1) {
 			log_warn("cannot daemonize");
 			pidfile_remove(newconf->conf_pidfh);
 			exit(1);
 		}
 	}
 
 	/* Schedule iSNS update */
 	if (!TAILQ_EMPTY(&newconf->conf_isns))
 		set_timeout((newconf->conf_isns_period + 2) / 3, false);
 
 	for (;;) {
 		main_loop(newconf, dont_daemonize);
 		if (sighup_received) {
 			sighup_received = false;
 			log_debugx("received SIGHUP, reloading configuration");
 			tmpconf = conf_new_from_file(config_path, newconf);
 			if (tmpconf == NULL) {
 				log_warnx("configuration error, "
 				    "continuing with old configuration");
 			} else {
 				if (debug > 0)
 					tmpconf->conf_debug = debug;
 				oldconf = newconf;
 				newconf = tmpconf;
 				error = conf_apply(oldconf, newconf);
 				if (error != 0)
 					log_warnx("failed to reload "
 					    "configuration");
 				conf_delete(oldconf);
 				oldconf = NULL;
 			}
 		} else if (sigterm_received) {
 			log_debugx("exiting on signal; "
 			    "reloading empty configuration");
 
 			log_debugx("removing CTL iSCSI ports "
 			    "and terminating all connections");
 
 			oldconf = newconf;
 			newconf = conf_new();
 			if (debug > 0)
 				newconf->conf_debug = debug;
 			error = conf_apply(oldconf, newconf);
 			if (error != 0)
 				log_warnx("failed to apply configuration");
 			conf_delete(oldconf);
 			oldconf = NULL;
 
 			log_warnx("exiting on signal");
 			exit(0);
 		} else {
 			nchildren -= wait_for_children(false);
 			assert(nchildren >= 0);
 			if (timed_out()) {
 				set_timeout(0, false);
 				TAILQ_FOREACH(newns, &newconf->conf_isns, i_next)
 					isns_check(newns);
 				/* Schedule iSNS update */
 				if (!TAILQ_EMPTY(&newconf->conf_isns)) {
 					set_timeout((newconf->conf_isns_period
 					    + 2) / 3,
 					    false);
 				}
 			}
 		}
 	}
 	/* NOTREACHED */
 }
Index: projects/ifnet
===================================================================
--- projects/ifnet	(revision 281172)
+++ projects/ifnet	(revision 281173)

Property changes on: projects/ifnet
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r281153-281172