Index: projects/clang900-import/Makefile.inc1
===================================================================
--- projects/clang900-import/Makefile.inc1	(revision 352536)
+++ projects/clang900-import/Makefile.inc1	(revision 352537)
@@ -1,3399 +1,3400 @@
 #
 # $FreeBSD$
 #
 # Make command line options:
 #	-DNO_CLEANDIR run ${MAKE} clean, instead of ${MAKE} cleandir
 #	-DNO_CLEAN do not clean at all
 #	-DDB_FROM_SRC use the user/group databases in src/etc instead of
 #	    the system database when installing.
 #	-DNO_SHARE do not go into share subdir
 #	-DKERNFAST define NO_KERNEL{CONFIG,CLEAN,OBJ}
 #	-DNO_KERNELCONFIG do not run config in ${MAKE} buildkernel
 #	-DNO_KERNELCLEAN do not run ${MAKE} clean in ${MAKE} buildkernel
 #	-DNO_KERNELOBJ do not run ${MAKE} obj in ${MAKE} buildkernel
 #	-DNO_PORTSUPDATE do not update ports in ${MAKE} update
 #	-DNO_ROOT install without using root privilege
 #	-DNO_DOCUPDATE do not update doc in ${MAKE} update
 #	-DWITHOUT_CTF do not run the DTrace CTF conversion tools on built objects
 #	LOCAL_DIRS="list of dirs" to add additional dirs to the SUBDIR list
 #	LOCAL_ITOOLS="list of tools" to add additional tools to the ITOOLS list
 #	LOCAL_LIB_DIRS="list of dirs" to add additional dirs to libraries target
 #	LOCAL_MTREE="list of mtree files" to process to allow local directories
 #	    to be created before files are installed
 #	LOCAL_TOOL_DIRS="list of dirs" to add additional dirs to the build-tools
 #	    list
 #	LOCAL_XTOOL_DIRS="list of dirs" to add additional dirs to the
 #	    cross-tools target
 #	METALOG="path to metadata log" to write permission and ownership
 #	    when NO_ROOT is set.  (default: ${DESTDIR}/METALOG)
 #	TARGET="machine" to crossbuild world for a different machine type
 #	TARGET_ARCH= may be required when a TARGET supports multiple endians
 #	BUILDENV_SHELL= shell to launch for the buildenv target (def:${SHELL})
 #	WORLD_FLAGS= additional flags to pass to make(1) during buildworld
 #	KERNEL_FLAGS= additional flags to pass to make(1) during buildkernel
 #	SUBDIR_OVERRIDE="list of dirs" to build rather than everything.
 #	    All libraries and includes, and some build tools will still build.
 
 #
 # The intended user-driven targets are:
 # buildworld  - rebuild *everything*, including glue to help do upgrades
 # installworld- install everything built by "buildworld"
 # checkworld  - run test suite on installed world
 # doxygen     - build API documentation of the kernel
 # update      - convenient way to update your source tree (eg: svn/svnup)
 #
 # Standard targets (not defined here) are documented in the makefiles in
 # /usr/share/mk.  These include:
 #		obj depend all install clean cleandepend cleanobj
 
 .if !defined(TARGET) || !defined(TARGET_ARCH)
 .error "Both TARGET and TARGET_ARCH must be defined."
 .endif
 
 .if make(showconfig) || make(test-system-*)
 _MKSHOWCONFIG=	t
 .endif
 
 SRCDIR?=	${.CURDIR}
 LOCALBASE?=	/usr/local
 
 # Cross toolchain changes must be in effect before bsd.compiler.mk
 # so that gets the right CC, and pass CROSS_TOOLCHAIN to submakes.
 .if defined(CROSS_TOOLCHAIN)
 .if exists(${LOCALBASE}/share/toolchains/${CROSS_TOOLCHAIN}.mk)
 .include "${LOCALBASE}/share/toolchains/${CROSS_TOOLCHAIN}.mk"
 .elif exists(${CROSS_TOOLCHAIN})
 .include "${CROSS_TOOLCHAIN}"
 .else
 .error CROSS_TOOLCHAIN ${CROSS_TOOLCHAIN} not found
 .endif
 CROSSENV+=CROSS_TOOLCHAIN="${CROSS_TOOLCHAIN}"
 .endif
 .if defined(CROSS_TOOLCHAIN_PREFIX)
 CROSS_COMPILER_PREFIX?=${CROSS_TOOLCHAIN_PREFIX}
 .endif
 
 XCOMPILERS=	CC CXX CPP
 .for COMPILER in ${XCOMPILERS}
 .if defined(CROSS_COMPILER_PREFIX)
 X${COMPILER}?=	${CROSS_COMPILER_PREFIX}${${COMPILER}}
 .else
 X${COMPILER}?=	${${COMPILER}}
 .endif
 .endfor
 # If a full path to an external cross compiler is given, don't build
 # a cross compiler.
 .if ${XCC:N${CCACHE_BIN}:M/*}
 MK_CLANG_BOOTSTRAP=	no
 MK_GCC_BOOTSTRAP=	no
 .endif
 
 # Pull in compiler metadata from buildworld/toolchain if possible to avoid
 # running CC from bsd.compiler.mk.
 .if make(installworld) || make(install) || make(distributeworld) || \
     make(stageworld)
 .-include "${OBJTOP}/toolchain-metadata.mk"
 .if !defined(_LOADED_TOOLCHAIN_METADATA)
 .error A build is required first.  You may have the wrong MAKEOBJDIRPREFIX set.
 .endif
 .endif
 
 # Pull in COMPILER_TYPE and COMPILER_FREEBSD_VERSION early. Pull it from the
 # tree to be friendlier to foreign OS builds. It's safe to do so unconditionally
 # here since we will always have the right make, unlike in src/Makefile
 # Don't include bsd.linker.mk yet until XBINUTILS is handled (after src.opts.mk)
 _NO_INCLUDE_LINKERMK=	t
 # We also want the X_COMPILER* variables if we are using an external toolchain.
 _WANT_TOOLCHAIN_CROSS_VARS=	t
 .include "share/mk/bsd.compiler.mk"
 .undef _NO_INCLUDE_LINKERMK
 .undef _WANT_TOOLCHAIN_CROSS_VARS
 # src.opts.mk depends on COMPILER_FEATURES
 .include "share/mk/src.opts.mk"
 
 .if ${TARGET} == ${MACHINE}
 TARGET_CPUTYPE?=${CPUTYPE}
 .else
 TARGET_CPUTYPE?=
 .endif
 .if !empty(TARGET_CPUTYPE)
 _TARGET_CPUTYPE=${TARGET_CPUTYPE}
 .else
 _TARGET_CPUTYPE=dummy
 .endif
 .if ${TARGET} == "arm"
 .if ${TARGET_ARCH:Marmv[67]*} != "" && ${TARGET_CPUTYPE:M*soft*} == ""
 TARGET_ABI=	gnueabihf
 .else
 TARGET_ABI=	gnueabi
 .endif
 .endif
 MACHINE_ABI?=	unknown
 MACHINE_TRIPLE?=${MACHINE_ARCH:S/amd64/x86_64/:C/hf$//:S/mipsn32/mips64/}-${MACHINE_ABI}-freebsd13.0
 TARGET_ABI?=	unknown
 TARGET_TRIPLE?=	${TARGET_ARCH:S/amd64/x86_64/:C/hf$//:S/mipsn32/mips64/}-${TARGET_ABI}-freebsd13.0
 KNOWN_ARCHES?=	aarch64/arm64 \
 		amd64 \
 		arm \
 		armv6/arm \
 		armv7/arm \
 		i386 \
 		mips \
 		mipsel/mips \
 		mips64el/mips \
 		mipsn32el/mips \
 		mips64/mips \
 		mipsn32/mips \
 		mipshf/mips \
 		mipselhf/mips \
 		mips64elhf/mips \
 		mips64hf/mips \
 		powerpc \
 		powerpc64/powerpc \
 		powerpcspe/powerpc \
 		riscv64/riscv \
 		riscv64sf/riscv \
 		sparc64
 
 .if ${TARGET} == ${TARGET_ARCH}
 _t=		${TARGET}
 .else
 _t=		${TARGET_ARCH}/${TARGET}
 .endif
 .for _t in ${_t}
 .if empty(KNOWN_ARCHES:M${_t})
 .error Unknown target ${TARGET_ARCH}:${TARGET}.
 .endif
 .endfor
 
 # If all targets are disabled for system llvm then don't expect it to work
 # for cross-builds.
 .if !defined(TOOLS_PREFIX) && ${MK_LLVM_TARGET_ALL} == "no" && \
     ${MACHINE} != ${TARGET} && ${MACHINE_ARCH} != ${TARGET_ARCH} && \
     !make(showconfig)
 MK_SYSTEM_COMPILER=	no
 MK_SYSTEM_LINKER=	no
 .endif
 
 # Handle external binutils.
 .if defined(CROSS_TOOLCHAIN_PREFIX)
 CROSS_BINUTILS_PREFIX?=${CROSS_TOOLCHAIN_PREFIX}
 .endif
 # If we do not have a bootstrap binutils (because the in-tree one does not
 # support the target architecture), provide a default cross-binutils prefix.
 # This allows riscv64 builds, for example, to automatically use the
 # riscv64-binutils port or package.
 .if !make(showconfig) && !defined(_NO_INCLUDE_COMPILERMK)
 .if !empty(BROKEN_OPTIONS:MBINUTILS_BOOTSTRAP) && \
     ${MK_LLD_BOOTSTRAP} == "no" && \
     !defined(CROSS_BINUTILS_PREFIX)
 CROSS_BINUTILS_PREFIX=/usr/local/${TARGET_TRIPLE}/bin/
 .if !exists(${CROSS_BINUTILS_PREFIX})
 .error In-tree binutils does not support the ${TARGET_ARCH} architecture. Install the ${TARGET_ARCH}-binutils port or package or set CROSS_BINUTILS_PREFIX.
 .endif
 .endif
 .endif
 XBINUTILS=	AS AR LD NM OBJCOPY RANLIB SIZE STRINGS
 .for BINUTIL in ${XBINUTILS}
 .if defined(CROSS_BINUTILS_PREFIX) && \
     exists(${CROSS_BINUTILS_PREFIX}/${${BINUTIL}})
 X${BINUTIL}?=	${CROSS_BINUTILS_PREFIX:C,/*$,,}/${${BINUTIL}}
 .else
 X${BINUTIL}?=	${${BINUTIL}}
 .endif
 .endfor
 
 # If a full path to an external linker is given, don't build lld.
 .if ${XLD:M/*}
 MK_LLD_BOOTSTRAP=	no
 .endif
 
 # We also want the X_LINKER* variables if we are using an external toolchain.
 _WANT_TOOLCHAIN_CROSS_VARS=	t
 .include "share/mk/bsd.linker.mk"
 .undef _WANT_TOOLCHAIN_CROSS_VARS
 
 # Begin WITH_SYSTEM_COMPILER / WITH_SYSTEM_LD
 
 # WITH_SYSTEM_COMPILER - Pull in needed values and make a decision.
 
 # Check if there is a local compiler that can satisfy as an external compiler.
 # Which compiler is expected to be used?
 .if ${MK_CLANG_BOOTSTRAP} == "yes"
 WANT_COMPILER_TYPE=	clang
 .elif ${MK_GCC_BOOTSTRAP} == "yes"
 WANT_COMPILER_TYPE=	gcc
 .else
 WANT_COMPILER_TYPE=
 .endif
 
 .if !defined(WANT_COMPILER_FREEBSD_VERSION) && !make(showconfig) && \
     !make(test-system-linker)
 .if ${WANT_COMPILER_TYPE} == "clang"
 WANT_COMPILER_FREEBSD_VERSION_FILE= lib/clang/freebsd_cc_version.h
 WANT_COMPILER_FREEBSD_VERSION!= \
 	awk '$$2 == "FREEBSD_CC_VERSION" {printf("%d\n", $$3)}' \
 	${SRCDIR}/${WANT_COMPILER_FREEBSD_VERSION_FILE} || echo unknown
 WANT_COMPILER_VERSION_FILE= lib/clang/include/clang/Basic/Version.inc
 WANT_COMPILER_VERSION!= \
 	awk '$$2 == "CLANG_VERSION" {split($$3, a, "."); print a[1] * 10000 + a[2] * 100 + a[3]}' \
 	${SRCDIR}/${WANT_COMPILER_VERSION_FILE} || echo unknown
 .elif ${WANT_COMPILER_TYPE} == "gcc"
 WANT_COMPILER_FREEBSD_VERSION_FILE= gnu/usr.bin/cc/cc_tools/freebsd-native.h
 WANT_COMPILER_FREEBSD_VERSION!= \
 	awk '$$2 == "FBSD_CC_VER" {printf("%d\n", $$3)}' \
 	${SRCDIR}/${WANT_COMPILER_FREEBSD_VERSION_FILE} || echo unknown
 WANT_COMPILER_VERSION_FILE= contrib/gcc/BASE-VER
 WANT_COMPILER_VERSION!= \
 	awk -F. '{print $$1 * 10000 + $$2 * 100 + $$3}' \
 	${SRCDIR}/${WANT_COMPILER_VERSION_FILE} || echo unknown
 .endif
 .export WANT_COMPILER_FREEBSD_VERSION WANT_COMPILER_VERSION
 .endif	# !defined(WANT_COMPILER_FREEBSD_VERSION)
 
 # It needs to be the same revision as we would build for the bootstrap.
 # If the expected vs CC is different then we can't skip.
 # GCC cannot be used for cross-arch yet.  For clang we pass -target later if
 # TARGET_ARCH!=MACHINE_ARCH.
 .if ${MK_SYSTEM_COMPILER} == "yes" && \
     defined(WANT_COMPILER_FREEBSD_VERSION) && \
     (${MK_CLANG_BOOTSTRAP} == "yes" || ${MK_GCC_BOOTSTRAP} == "yes") && \
     !make(xdev*) && \
     ${X_COMPILER_TYPE} == ${WANT_COMPILER_TYPE} && \
     (${X_COMPILER_TYPE} == "clang" || ${TARGET_ARCH} == ${MACHINE_ARCH}) && \
     ${X_COMPILER_VERSION} == ${WANT_COMPILER_VERSION} && \
     ${X_COMPILER_FREEBSD_VERSION} == ${WANT_COMPILER_FREEBSD_VERSION}
 # Everything matches, disable the bootstrap compiler.
 MK_CLANG_BOOTSTRAP=	no
 MK_GCC_BOOTSTRAP=	no
 USING_SYSTEM_COMPILER=	yes
 .endif	# ${WANT_COMPILER_TYPE} == ${COMPILER_TYPE}
 
 # WITH_SYSTEM_LD - Pull in needed values and make a decision.
 
 # Check if there is a local linker that can satisfy as an external linker.
 # Which linker is expected to be used?
 .if ${MK_LLD_BOOTSTRAP} == "yes"
 WANT_LINKER_TYPE=		lld
 .elif ${MK_BINUTILS_BOOTSTRAP} == "yes"
 # Note that there's no support for bfd in WITH_SYSTEM_LINKER.
 WANT_LINKER_TYPE=	bfd
 .else
 WANT_LINKER_TYPE=
 .endif
 
 .if !defined(WANT_LINKER_FREEBSD_VERSION) && !make(showconfig) && \
     !make(test-system-compiler)
 .if ${WANT_LINKER_TYPE} == "lld"
 WANT_LINKER_FREEBSD_VERSION_FILE= lib/clang/include/lld/Common/Version.inc
 WANT_LINKER_FREEBSD_VERSION!= \
 	awk '$$2 == "LLD_REVISION_STRING" {gsub(/"/, "", $$3); print $$3}' \
 	${SRCDIR}/${WANT_LINKER_FREEBSD_VERSION_FILE} || echo unknown
 WANT_LINKER_VERSION_FILE= lib/clang/include/lld/Common/Version.inc
 WANT_LINKER_VERSION!= \
 	awk '$$2 == "LLD_VERSION" {split($$3, a, "."); print a[1] * 10000 + a[2] * 100 + a[3]}' \
 	${SRCDIR}/${WANT_LINKER_VERSION_FILE} || echo unknown
 .else
 WANT_LINKER_FREEBSD_VERSION_FILE=
 WANT_LINKER_FREEBSD_VERSION=
 .endif
 .export WANT_LINKER_FREEBSD_VERSION WANT_LINKER_VERSION
 .endif	# !defined(WANT_LINKER_FREEBSD_VERSION)
 
 .if ${MK_SYSTEM_LINKER} == "yes" && \
     defined(WANT_LINKER_FREEBSD_VERSION) && \
     (${MK_LLD_BOOTSTRAP} == "yes") && \
     !make(xdev*) && \
     ${X_LINKER_TYPE} == ${WANT_LINKER_TYPE} && \
     ${X_LINKER_VERSION} == ${WANT_LINKER_VERSION} && \
     ${X_LINKER_FREEBSD_VERSION} == ${WANT_LINKER_FREEBSD_VERSION}
 # Everything matches, disable the bootstrap linker.
 MK_LLD_BOOTSTRAP=	no
 USING_SYSTEM_LINKER=	yes
 .endif	# ${WANT_LINKER_TYPE} == ${LINKER_TYPE}
 
 # WITH_SYSTEM_COMPILER / WITH_SYSTEM_LINKER - Handle defaults and debug.
 USING_SYSTEM_COMPILER?=	no
 USING_SYSTEM_LINKER?=	no
 
 TEST_SYSTEM_COMPILER_VARS= \
 	USING_SYSTEM_COMPILER MK_SYSTEM_COMPILER \
 	MK_CROSS_COMPILER MK_CLANG_BOOTSTRAP MK_GCC_BOOTSTRAP \
 	WANT_COMPILER_TYPE WANT_COMPILER_VERSION WANT_COMPILER_VERSION_FILE \
 	WANT_COMPILER_FREEBSD_VERSION WANT_COMPILER_FREEBSD_VERSION_FILE \
 	CC COMPILER_TYPE COMPILER_FEATURES COMPILER_VERSION \
 	COMPILER_FREEBSD_VERSION \
 	XCC X_COMPILER_TYPE X_COMPILER_FEATURES X_COMPILER_VERSION \
 	X_COMPILER_FREEBSD_VERSION
 TEST_SYSTEM_LINKER_VARS= \
 	USING_SYSTEM_LINKER MK_SYSTEM_LINKER \
 	MK_LLD_BOOTSTRAP MK_BINUTILS_BOOTSTRAP \
 	WANT_LINKER_TYPE WANT_LINKER_VERSION WANT_LINKER_VERSION_FILE \
 	WANT_LINKER_FREEBSD_VERSION WANT_LINKER_FREEBSD_VERSION_FILE \
 	LD LINKER_TYPE LINKER_FEATURES LINKER_VERSION \
 	LINKER_FREEBSD_VERSION \
 	XLD X_LINKER_TYPE X_LINKER_FEATURES X_LINKER_VERSION \
 	X_LINKER_FREEBSD_VERSION
 
 .for _t in compiler linker
 test-system-${_t}: .PHONY
 .for v in ${TEST_SYSTEM_${_t:tu}_VARS}
 	${_+_}@printf "%-35s= %s\n" "${v}" "${${v}}"
 .endfor
 .endfor
 .if (make(buildworld) || make(buildkernel) || make(kernel-toolchain) || \
     make(toolchain) || make(_cross-tools))
 .if ${USING_SYSTEM_COMPILER} == "yes"
 .info SYSTEM_COMPILER: Determined that CC=${CC} matches the source tree.  Not bootstrapping a cross-compiler.
 .elif ${MK_CLANG_BOOTSTRAP} == "yes"
 .info SYSTEM_COMPILER: libclang will be built for bootstrapping a cross-compiler.
 .endif
 .if ${USING_SYSTEM_LINKER} == "yes"
 .info SYSTEM_LINKER: Determined that LD=${LD} matches the source tree.  Not bootstrapping a cross-linker.
 .elif ${MK_LLD_BOOTSTRAP} == "yes"
 .info SYSTEM_LINKER: libclang will be built for bootstrapping a cross-linker.
 .endif
 .endif
 
 # End WITH_SYSTEM_COMPILER / WITH_SYSTEM_LD
 
 # Store some compiler metadata for use in installworld where we don't
 # want to invoke CC at all.
 _TOOLCHAIN_METADATA_VARS=	COMPILER_VERSION \
 				COMPILER_TYPE \
 				COMPILER_FEATURES \
 				COMPILER_FREEBSD_VERSION \
 				LINKER_VERSION \
 				LINKER_FEATURES \
 				LINKER_TYPE \
 				LINKER_FREEBSD_VERSION
 toolchain-metadata.mk: .PHONY .META
 	@: > ${.TARGET}
 	@echo ".info Using cached toolchain metadata from build at $$(hostname) on $$(date)" \
 	    > ${.TARGET}
 	@echo "_LOADED_TOOLCHAIN_METADATA=t" >> ${.TARGET}
 .for v in ${_TOOLCHAIN_METADATA_VARS}
 	@echo "${v}=${${v}}" >> ${.TARGET}
 	@echo "X_${v}=${X_${v}}" >> ${.TARGET}
 .endfor
 	@echo ".export ${_TOOLCHAIN_METADATA_VARS}" >> ${.TARGET}
 	@echo ".export ${_TOOLCHAIN_METADATA_VARS:C,^,X_,}" >> ${.TARGET}
 
 
 # We must do lib/ and libexec/ before bin/ in case of a mid-install error to
 # keep the users system reasonably usable.  For static->dynamic root upgrades,
 # we don't want to install a dynamic binary without rtld and the needed
 # libraries.  More commonly, for dynamic root, we don't want to install a
 # binary that requires a newer library version that hasn't been installed yet.
 # This ordering is not a guarantee though.  The only guarantee of a working
 # system here would require fine-grained ordering of all components based
 # on their dependencies.
 .if !empty(SUBDIR_OVERRIDE)
 SUBDIR=	${SUBDIR_OVERRIDE}
 .else
 SUBDIR=	lib libexec
 # Add LOCAL_LIB_DIRS, but only if they will not be picked up as a SUBDIR
 # of a LOCAL_DIRS directory.  This allows LOCAL_DIRS=foo and
 # LOCAL_LIB_DIRS=foo/lib to behave as expected.
 .for _DIR in ${LOCAL_DIRS:M*/} ${LOCAL_DIRS:N*/:S|$|/|}
 _REDUNDANT_LIB_DIRS+=    ${LOCAL_LIB_DIRS:M${_DIR}*}
 .endfor
 .for _DIR in ${LOCAL_LIB_DIRS}
 .if ${_DIR} == ".WAIT" || (empty(_REDUNDANT_LIB_DIRS:M${_DIR}) && exists(${.CURDIR}/${_DIR}/Makefile))
 SUBDIR+=	${_DIR}
 .endif
 .endfor
 .if !defined(NO_ROOT) && (make(installworld) || make(install))
 # Ensure libraries are installed before progressing.
 SUBDIR+=.WAIT
 .endif
 SUBDIR+=bin
 .if ${MK_CDDL} != "no"
 SUBDIR+=cddl
 .endif
 SUBDIR+=gnu include
 .if ${MK_KERBEROS} != "no"
 SUBDIR+=kerberos5
 .endif
 .if ${MK_RESCUE} != "no"
 SUBDIR+=rescue
 .endif
 SUBDIR+=sbin
 .if ${MK_CRYPT} != "no"
 SUBDIR+=secure
 .endif
 .if !defined(NO_SHARE)
 SUBDIR+=share
 .endif
 .if ${MK_BOOT} != "no"
 SUBDIR+=stand
 .endif
 SUBDIR+=sys usr.bin usr.sbin
 .if ${MK_TESTS} != "no"
 SUBDIR+=	tests
 .endif
 
 # Local directories are built in parallel with the base system directories.
 # Users may insert a .WAIT directive at the beginning or elsewhere within
 # the LOCAL_DIRS and LOCAL_LIB_DIRS lists as needed.
 .for _DIR in ${LOCAL_DIRS}
 .if ${_DIR} == ".WAIT" || exists(${.CURDIR}/${_DIR}/Makefile)
 SUBDIR+=	${_DIR}
 .endif
 .endfor
 
 # We must do etc/ last as it hooks into building the man whatis file
 # by calling 'makedb' in share/man.  This is only relevant for
 # install/distribute so they build the whatis file after every manpage is
 # installed.
 .if make(installworld) || make(install)
 SUBDIR+=.WAIT
 .endif
 SUBDIR+=etc
 
 .endif	# !empty(SUBDIR_OVERRIDE)
 
 .if defined(NOCLEAN)
 .warning NOCLEAN option is deprecated. Use NO_CLEAN instead.
 NO_CLEAN=	${NOCLEAN}
 .endif
 .if defined(NO_CLEANDIR)
 CLEANDIR=	clean cleandepend
 .else
 CLEANDIR=	cleandir
 .endif
 
 .if defined(WORLDFAST)
 NO_CLEAN=	t
 NO_OBJWALK=	t
 .endif
 
 .if ${MK_META_MODE} == "yes"
 # If filemon is used then we can rely on the build being incremental-safe.
 # The .meta files will also track the build command and rebuild should
 # it change.
 .if empty(.MAKE.MODE:Mnofilemon)
 NO_CLEAN=	t
 .endif
 .endif
 .if defined(NO_OBJWALK) || ${MK_AUTO_OBJ} == "yes"
 NO_OBJWALK=	t
 NO_KERNELOBJ=	t
 .endif
 .if !defined(NO_OBJWALK)
 _obj=		obj
 .endif
 
 LOCAL_TOOL_DIRS?=
 PACKAGEDIR?=	${DESTDIR}/${DISTDIR}
 
 .if empty(SHELL:M*csh*)
 BUILDENV_SHELL?=${SHELL}
 .else
 BUILDENV_SHELL?=/bin/sh
 .endif
 
 .if !defined(_MKSHOWCONFIG)
 .if !defined(SVN_CMD) || empty(SVN_CMD)
 . for _P in /usr/bin /usr/local/bin
 .  for _S in svn svnlite
 .   if exists(${_P}/${_S})
 SVN_CMD=   ${_P}/${_S}
 .   endif
 .  endfor
 . endfor
 .export SVN_CMD
 .endif
 SVNFLAGS?=	-r HEAD
 .if !defined(VCS_REVISION) || empty(VCS_REVISION)
 .if !defined(SVNVERSION_CMD) || empty(SVNVERSION_CMD)
 . for _D in ${PATH:S,:, ,g}
 .  if exists(${_D}/svnversion)
 SVNVERSION_CMD?=${_D}/svnversion
 .  endif
 .  if exists(${_D}/svnliteversion)
 SVNVERSION_CMD?=${_D}/svnliteversion
 .  endif
 . endfor
 .endif
 _VCS_REVISION?=	$$(eval ${SVNVERSION_CMD} ${SRCDIR})
 . if !empty(_VCS_REVISION)
 VCS_REVISION=	$$(echo r${_VCS_REVISION})
 . endif
 .export VCS_REVISION
 .endif
 
 .if !defined(OSRELDATE)
 .if exists(/usr/include/osreldate.h)
 OSRELDATE!=	awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \
 		/usr/include/osreldate.h
 .else
 OSRELDATE=	0
 .endif
 .export OSRELDATE
 .endif
 
 # Set VERSION for CTFMERGE to use via the default CTFFLAGS=-L VERSION.
 .if !defined(_REVISION)
 _REVISION!=	${MAKE} -C ${SRCDIR}/release MK_AUTO_OBJ=no -V REVISION
 .export _REVISION
 .endif
 .if !defined(_BRANCH)
 _BRANCH!=	${MAKE} -C ${SRCDIR}/release MK_AUTO_OBJ=no -V BRANCH
 .export _BRANCH
 .endif
 .if !defined(SRCRELDATE)
 SRCRELDATE!=	awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \
 		${SRCDIR}/sys/sys/param.h
 .export SRCRELDATE
 .endif
 .if !defined(VERSION)
 VERSION=	FreeBSD ${_REVISION}-${_BRANCH:C/-p[0-9]+$//} ${TARGET_ARCH} ${SRCRELDATE}
 .export VERSION
 .endif
 
 .if !defined(PKG_VERSION)
 .if ${_BRANCH:MSTABLE*} || ${_BRANCH:MCURRENT*}
 TIMENOW=	%Y%m%d%H%M%S
 EXTRA_REVISION=	.s${TIMENOW:gmtime}
 .elif ${_BRANCH:MALPHA*}
 EXTRA_REVISION= _${_BRANCH:C/-ALPHA/.a/}
 .elif ${_BRANCH:MBETA*}
 EXTRA_REVISION= _${_BRANCH:C/-BETA/.b/}
 .elif ${_BRANCH:MRC*}
 EXTRA_REVISION= _${_BRANCH:C/-RC/.r/}
 .elif ${_BRANCH:MPRERELEASE*}
 EXTRA_REVISION= _${_BRANCH:C/-PRERELEASE/.p/}
 .elif ${_BRANCH:M*-p*}
 EXTRA_REVISION=	_${_BRANCH:C/.*-p([0-9]+$)/\1/}
 .endif
 PKG_VERSION=	${_REVISION}${EXTRA_REVISION}
 .endif
 .endif	# !defined(PKG_VERSION)
 
 .if !defined(_MKSHOWCONFIG)
 _CPUTYPE!=	MAKEFLAGS= CPUTYPE=${_TARGET_CPUTYPE} ${MAKE} -f /dev/null \
 		-m ${.CURDIR}/share/mk MK_AUTO_OBJ=no -V CPUTYPE
 .if ${_CPUTYPE} != ${_TARGET_CPUTYPE}
 .error CPUTYPE global should be set with ?=.
 .endif
 .endif
 .if make(buildworld)
 BUILD_ARCH!=	uname -p
 .if ${MACHINE_ARCH} != ${BUILD_ARCH}
 .error To cross-build, set TARGET_ARCH.
 .endif
 .endif
 WORLDTMP?=	${OBJTOP}/tmp
 BPATH=		${CCACHE_WRAPPER_PATH_PFX}${WORLDTMP}/legacy/usr/sbin:${WORLDTMP}/legacy/usr/bin:${WORLDTMP}/legacy/bin
 XPATH=		${WORLDTMP}/usr/sbin:${WORLDTMP}/usr/bin
 
 # When building we want to find the cross tools before the host tools in ${BPATH}.
 # We also need to add UNIVERSE_TOOLCHAIN_PATH so that we can find the shared
 # toolchain files (clang, lld, etc.) during make universe/tinderbox
 STRICTTMPPATH=	${XPATH}:${BPATH}:${UNIVERSE_TOOLCHAIN_PATH}
 # We should not be using tools from /usr/bin accidentally since this could cause
 # the build to break on other systems that don't have that tool. For now we
 # still allow using the old behaviour (inheriting $PATH) if
 # BUILD_WITH_STRICT_TMPPATH is set to 0 but this will eventually be removed.
 
 # Currently strict $PATH can cause build failures and does not work yet with
 # USING_SYSTEM_LINKER/USING_SYSTEM_COMPILER. Once these issues have been
 # resolved it will be turned on by default.
 BUILD_WITH_STRICT_TMPPATH?=0
 .if ${BUILD_WITH_STRICT_TMPPATH} != 0
 TMPPATH=	${STRICTTMPPATH}
 .else
 TMPPATH=	${STRICTTMPPATH}:${PATH}
 .endif
 
 #
 # Avoid running mktemp(1) unless actually needed.
 # It may not be functional, e.g., due to new ABI
 # when in the middle of installing over this system.
 #
 .if make(distributeworld) || make(installworld) || make(stageworld)
 .if ${BUILD_WITH_STRICT_TMPPATH} != 0
 MKTEMP=${WORLDTMP}/legacy/usr/bin/mktemp
 .if !exists(${MKTEMP})
 .error "mktemp binary doesn't exist in expected location: ${MKTEMP}"
 .endif
 .else
 MKTEMP=mktemp
 .endif
 INSTALLTMP!=	${MKTEMP} -d -u -t install
 .endif
 
 .if make(stagekernel) || make(distributekernel)
 TAGS+=		kernel
 PACKAGE=	kernel
 .endif
 
 #
 # Building a world goes through the following stages
 #
 # 1. legacy stage [BMAKE]
 #	This stage is responsible for creating compatibility
 #	shims that are needed by the bootstrap-tools,
 #	build-tools and cross-tools stages. These are generally
 #	APIs that tools from one of those three stages need to
 #	build that aren't present on the host.
 # 1. bootstrap-tools stage [BMAKE]
 #	This stage is responsible for creating programs that
 #	are needed for backward compatibility reasons. They
 #	are not built as cross-tools.
 # 2. build-tools stage [TMAKE]
 #	This stage is responsible for creating the object
 #	tree and building any tools that are needed during
 #	the build process. Some programs are listed during
 #	this phase because they build binaries to generate
 #	files needed to build these programs. This stage also
 #	builds the 'build-tools' target rather than 'all'.
 # 3. cross-tools stage [XMAKE]
 #	This stage is responsible for creating any tools that
 #	are needed for building the system. A cross-compiler is one
 #	of them. This differs from build tools in two ways:
 #	1. the 'all' target is built rather than 'build-tools'
 #	2. these tools are installed into TMPPATH for stage 4.
 # 4. world stage [WMAKE]
 #	This stage actually builds the world.
 # 5. install stage (optional) [IMAKE]
 #	This stage installs a previously built world.
 #
 
 BOOTSTRAPPING?=	0
 # Keep these in sync
 MINIMUM_SUPPORTED_OSREL?= 1002501
 MINIMUM_SUPPORTED_REL?= 10.3
 
 # Common environment for world related stages
 CROSSENV+=	\
 		MACHINE_ARCH=${TARGET_ARCH} \
 		MACHINE=${TARGET} \
 		CPUTYPE=${TARGET_CPUTYPE}
 .if ${MK_META_MODE} != "no"
 # Don't rebuild build-tools targets during normal build.
 CROSSENV+=	BUILD_TOOLS_META=.NOMETA
 .endif
 .if defined(TARGET_CFLAGS)
 CROSSENV+=	${TARGET_CFLAGS}
 .endif
 
 BOOTSTRAPPING_OSRELDATE?=${OSRELDATE}
 
 # bootstrap-tools stage
 BMAKEENV=	INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		TOOLS_PREFIX=${TOOLS_PREFIX_UNDEF:U${WORLDTMP}} \
 		PATH=${BPATH}:${PATH} \
 		WORLDTMP=${WORLDTMP} \
 		MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}"
 # need to keep this in sync with targets/pseudo/bootstrap-tools/Makefile
 BSARGS= 	DESTDIR= \
 		OBJTOP='${WORLDTMP}/obj-tools' \
 		OBJROOT='$${OBJTOP}/' \
 		MAKEOBJDIRPREFIX= \
 		BOOTSTRAPPING=${BOOTSTRAPPING_OSRELDATE} \
 		BWPHASE=${.TARGET:C,^_,,} \
 		SSP_CFLAGS= \
 		MK_HTML=no NO_LINT=yes MK_MAN=no \
 		-DNO_PIC MK_PROFILE=no -DNO_SHARED \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 		MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \
 		MK_LLDB=no MK_RETPOLINE=no MK_TESTS=no \
 		MK_INCLUDES=yes
 
 BMAKE=		\
 		${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		${BSARGS}
 .if empty(.MAKEOVERRIDES:MMK_LLVM_TARGET_ALL)
 BMAKE+=		MK_LLVM_TARGET_ALL=no
 .endif
 
 # build-tools stage
 TMAKE=		\
 		${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \
 		DESTDIR= \
 		BOOTSTRAPPING=${BOOTSTRAPPING_OSRELDATE} \
 		BWPHASE=${.TARGET:C,^_,,} \
 		SSP_CFLAGS= \
 		-DNO_LINT \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 		MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \
 		MK_LLDB=no MK_RETPOLINE=no MK_TESTS=no
 
 # cross-tools stage
 # TOOLS_PREFIX set in BMAKE
 XMAKE=		${BMAKE} \
 		TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \
 		MK_GDB=no MK_TESTS=no
 
 # kernel-tools stage
 KTMAKEENV=	INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${BPATH}:${PATH} \
 		WORLDTMP=${WORLDTMP}
 KTMAKE=		\
 		TOOLS_PREFIX=${TOOLS_PREFIX_UNDEF:U${WORLDTMP}} \
 		${KTMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		DESTDIR= \
 		OBJTOP='${WORLDTMP}/obj-kernel-tools' \
 		OBJROOT='$${OBJTOP}/' \
 		MAKEOBJDIRPREFIX= \
 		BOOTSTRAPPING=${BOOTSTRAPPING_OSRELDATE} \
 		SSP_CFLAGS= \
 		MK_HTML=no -DNO_LINT MK_MAN=no \
 		-DNO_PIC MK_PROFILE=no -DNO_SHARED \
 		-DNO_CPU_CFLAGS MK_RETPOLINE=no MK_WARNS=no MK_CTF=no
 
 # world stage
 WMAKEENV=	${CROSSENV} \
 		INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${TMPPATH} \
 		SYSROOT=${WORLDTMP}
 
 # make hierarchy
 HMAKE=		PATH=${TMPPATH} ${MAKE} LOCAL_MTREE=${LOCAL_MTREE:Q}
 .if defined(NO_ROOT)
 HMAKE+=		PATH=${TMPPATH} METALOG=${METALOG} -DNO_ROOT
 .endif
 
 CROSSENV+=	CC="${XCC} ${XCFLAGS}" CXX="${XCXX} ${XCXXFLAGS} ${XCFLAGS}" \
 		CPP="${XCPP} ${XCFLAGS}" \
 		AS="${XAS}" AR="${XAR}" LD="${XLD}" LLVM_LINK="${XLLVM_LINK}" \
 		NM=${XNM} OBJCOPY="${XOBJCOPY}" \
 		RANLIB=${XRANLIB} STRINGS=${XSTRINGS} \
 		SIZE="${XSIZE}"
 
 .if defined(CROSS_BINUTILS_PREFIX) && exists(${CROSS_BINUTILS_PREFIX})
 # In the case of xdev-build tools, CROSS_BINUTILS_PREFIX won't be a
 # directory, but the compiler will look in the right place for its
 # tools so we don't need to tell it where to look.
 BFLAGS+=	-B${CROSS_BINUTILS_PREFIX}
 .endif
 
 
 # The internal bootstrap compiler has a default sysroot set by TOOLS_PREFIX
 # and target set by TARGET/TARGET_ARCH.  However, there are several needs to
 # always pass an explicit --sysroot and -target.
 # - External compiler needs sysroot and target flags.
 # - External ld needs sysroot.
 # - To be clear about the use of a sysroot when using the internal compiler.
 # - Easier debugging.
 # - Allowing WITH_SYSTEM_COMPILER+WITH_META_MODE to work together due to
 #   the flip-flopping build command when sometimes using external and
 #   sometimes using internal.
 # - Allow using lld which has no support for default paths.
 .if !defined(CROSS_BINUTILS_PREFIX) || !exists(${CROSS_BINUTILS_PREFIX})
 BFLAGS+=	-B${WORLDTMP}/usr/bin
 .endif
 .if ${WANT_COMPILER_TYPE} == gcc || \
     (defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc)
 .elif ${WANT_COMPILER_TYPE} == clang || \
     (defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == clang)
 XCFLAGS+=	-target ${TARGET_TRIPLE}
 .endif
 XCFLAGS+=	--sysroot=${WORLDTMP}
 
 .if !empty(BFLAGS)
 XCFLAGS+=	${BFLAGS}
 .endif
 
 .if ${MK_LIB32} != "no" && (${TARGET_ARCH} == "amd64" || \
     ${TARGET_ARCH} == "powerpc64" || ${TARGET_ARCH:Mmips64*} != "")
 LIBCOMPAT= 32
 .include "Makefile.libcompat"
 .elif ${MK_LIBSOFT} != "no" && ${TARGET_ARCH:Marmv[67]*} != ""
 LIBCOMPAT= SOFT
 .include "Makefile.libcompat"
 .endif
 
 # META_MODE normally ignores host file changes since every build updates
 # timestamps (see NO_META_IGNORE_HOST in sys.mk).  There are known times
 # when the ABI breaks though that we want to force rebuilding WORLDTMP
 # to get updated host tools.
 .if ${MK_META_MODE} == "yes" && defined(NO_CLEAN) && \
     !defined(NO_META_IGNORE_HOST) && !defined(NO_META_IGNORE_HOST_HEADERS) && \
     !defined(_MKSHOWCONFIG)
 # r318736 - ino64 major ABI breakage
 META_MODE_BAD_ABI_VERS+=	1200031
 
 .if !defined(OBJDIR_HOST_OSRELDATE)
 .if exists(${OBJTOP}/host-osreldate.h)
 OBJDIR_HOST_OSRELDATE!=	\
     awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \
     ${OBJTOP}/host-osreldate.h
 .elif exists(${WORLDTMP}/usr/include/osreldate.h)
 OBJDIR_HOST_OSRELDATE=	0
 .endif
 .export OBJDIR_HOST_OSRELDATE
 .endif
 
 # Note that this logic is the opposite of normal BOOTSTRAP handling.  We want
 # to compare the WORLDTMP's OSRELDATE to the host's OSRELDATE.  If the WORLDTMP
 # is older than the ABI-breakage OSRELDATE of the HOST then we rebuild.
 .if defined(OBJDIR_HOST_OSRELDATE)
 .for _ver in ${META_MODE_BAD_ABI_VERS}
 .if ${OSRELDATE} >= ${_ver} && ${OBJDIR_HOST_OSRELDATE} < ${_ver}
 _meta_mode_need_rebuild=	${_ver}
 .endif
 .endfor
 .if defined(_meta_mode_need_rebuild)
 .info META_MODE: Rebuilding host tools due to ABI breakage in __FreeBSD_version ${_meta_mode_need_rebuild}.
 NO_META_IGNORE_HOST_HEADERS=	1
 .export NO_META_IGNORE_HOST_HEADERS
 .endif	# defined(_meta_mode_need_rebuild)
 .endif	# defined(OBJDIR_HOST_OSRELDATE)
 .endif	# ${MK_META_MODE} == "yes" && defined(NO_CLEAN) ...
 # This is only used for META_MODE+filemon to track what the oldest
 # __FreeBSD_version is in WORLDTMP.  This purposely does NOT have
 # a make dependency on /usr/include/osreldate.h as the file should
 # only be copied when it is missing or meta mode determines it has changed.
 # Since host files are normally ignored without NO_META_IGNORE_HOST
 # the file will never be updated unless that flag is specified.  This
 # allows tracking the oldest osreldate to force rebuilds via
 # META_MODE_BADABI_REVS above.
 host-osreldate.h: # DO NOT ADD /usr/include/osreldate.h here
 	@cp -f /usr/include/osreldate.h ${.TARGET}
 
 WMAKE=		${WMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		BWPHASE=${.TARGET:C,^_,,} \
 		DESTDIR=${WORLDTMP}
 
 IMAKEENV=	${CROSSENV}
 IMAKE=		${IMAKEENV} ${MAKE} -f Makefile.inc1 \
 		${IMAKE_INSTALL} ${IMAKE_MTREE}
 .if empty(.MAKEFLAGS:M-n)
 IMAKEENV+=	PATH=${STRICTTMPPATH}:${INSTALLTMP} \
 		LD_LIBRARY_PATH=${INSTALLTMP} \
 		PATH_LOCALE=${INSTALLTMP}/locale
 IMAKE+=		__MAKE_SHELL=${INSTALLTMP}/sh
 .else
 IMAKEENV+=	PATH=${TMPPATH}:${INSTALLTMP}
 .endif
 
 # When generating install media, do not allow user and group information from
 # the build host to affect the contents of the distribution.
 .if make(distributeworld) || make(distrib-dirs) || make(distribution)
 DB_FROM_SRC=	yes
 .endif
 
 .if defined(DB_FROM_SRC)
 INSTALLFLAGS+=	-N ${.CURDIR}/etc
 MTREEFLAGS+=	-N ${.CURDIR}/etc
 .endif
 _INSTALL_DDIR=	${DESTDIR}/${DISTDIR}
 INSTALL_DDIR=	${_INSTALL_DDIR:S://:/:g:C:/$::}
 .if defined(NO_ROOT)
 METALOG?=	${DESTDIR}/${DISTDIR}/METALOG
 METALOG:=	${METALOG:C,//+,/,g}
 IMAKE+=		-DNO_ROOT METALOG=${METALOG}
 INSTALLFLAGS+=	-U -M ${METALOG} -D ${INSTALL_DDIR}
 MTREEFLAGS+=	-W
 .endif
 .if defined(BUILD_PKGS)
 INSTALLFLAGS+=	-h sha256
 .endif
 .if defined(DB_FROM_SRC) || defined(NO_ROOT)
 IMAKE_INSTALL=	INSTALL="install ${INSTALLFLAGS}"
 IMAKE_MTREE=	MTREE_CMD="mtree ${MTREEFLAGS}"
 .endif
 
 DESTDIR_MTREEFLAGS=	-deU
 # When creating worldtmp we don't need to set the directories as owned by root
 # so we also pass -W
 WORLDTMP_MTREEFLAGS=	-deUW
 .if defined(NO_ROOT)
 # When building with -DNO_ROOT we shouldn't be changing the directories
 # that are created by mtree to be owned by root/wheel.
 DESTDIR_MTREEFLAGS+=	-W
 .endif
 MTREE?=	mtree
 .if ${BUILD_WITH_STRICT_TMPPATH} != 0
 MTREE=	${WORLDTMP}/legacy/usr/sbin/mtree
 .endif
 WORLDTMP_MTREE=	${MTREE} ${WORLDTMP_MTREEFLAGS}
 DESTDIR_MTREE=	${MTREE} ${DESTDIR_MTREEFLAGS}
 
 # kernel stage
 KMAKEENV=	${WMAKEENV:NSYSROOT=*}
 KMAKE=		${KMAKEENV} ${MAKE} ${.MAKEFLAGS} ${KERNEL_FLAGS} KERNEL=${INSTKERNNAME}
 
 #
 # buildworld
 #
 # Attempt to rebuild the entire system, with reasonable chance of
 # success, regardless of how old your existing system is.
 #
 _sanity_check: .PHONY .MAKE
 .if ${.CURDIR:C/[^,]//g} != ""
 #	The m4 build of sendmail files doesn't like it if ',' is used
 #	anywhere in the path of it's files.
 	@echo
 	@echo "*** Error: path to source tree contains a comma ','"
 	@echo
 	@false
 .elif ${.CURDIR:M*\:*} != ""
 #	Using ':' leaks into PATH and breaks finding cross-tools.
 	@echo
 	@echo "*** Error: path to source tree contains a colon ':'"
 	@echo
 	@false
 .endif
 
 # Our current approach to dependency tracking cannot cope with certain source
 # tree changes, particularly with respect to removing source files and
 # replacing generated files.  Handle these cases here in an ad-hoc fashion.
 _cleanobj_fast_depend_hack: .PHONY
 # Syscall stubs rewritten in C and obsolete MD assembly implementations
 # Date      SVN Rev  Syscalls
 # 20180604  r334626  brk sbrk
 .for f in brk sbrk
 	@if [ -e "${OBJTOP}/lib/libc/.depend.${f}.o" ] && \
 	    egrep -qw '${f}\.[sS]' ${OBJTOP}/lib/libc/.depend.${f}.o; then \
 		echo "Removing stale dependencies for ${f} syscall wrappers"; \
 		rm -f ${OBJTOP}/lib/libc/.depend.${f}.* \
 		   ${LIBCOMPAT:D${LIBCOMPAT_OBJTOP}/lib/libc/.depend.${f}.*}; \
 	fi
 .endfor
 # 20181013  r339348  bcopy reimplemented as .c
 .for f in bcopy memcpy memmove
 	@if [ -e "${OBJTOP}/lib/libc/.depend.${f}.o" ] && \
 	    egrep -qw 'bcopy\.[sS]' ${OBJTOP}/lib/libc/.depend.${f}.o; then \
 		echo "Removing stale dependencies for bcopy"; \
 		rm -f ${OBJTOP}/lib/libc/.depend.${f}.* \
 		   ${LIBCOMPAT:D${LIBCOMPAT_OBJTOP}/lib/libc/.depend.${f}.*}; \
 	fi
 .endfor
 # 20181115  r340463  bzero reimplemented as .c
 	@if [ -e "${OBJTOP}/lib/libc/.depend.bzero.o" ] && \
 	    egrep -qw 'bzero\.[sS]' ${OBJTOP}/lib/libc/.depend.bzero.o; then \
 		echo "Removing stale dependencies for bzero"; \
 		rm -f ${OBJTOP}/lib/libc/.depend.bzero.* \
 		   ${LIBCOMPAT:D${LIBCOMPAT_OBJTOP}/lib/libc/.depend.bzero.*}; \
 	fi
 # 20181009 track migration from ntp's embedded libevent to updated one
 	@if [ -e "${OBJTOP}/usr.sbin/ntp/libntpevent/.depend.bufferevent_openssl.o" ] && \
 	    egrep -q 'contrib/ntp/sntp/libevent/bufferevent_openssl.c' \
 	    ${OBJTOP}/usr.sbin/ntp/libntpevent/.depend.bufferevent_openssl.o ; then \
 		echo "Removing stale libevent dependencies"; \
 		rm -f ${OBJTOP}/usr.sbin/ntp/libntpevent/.depend.*; \
 	fi
 
 # 20181209  r341759 track migration across wpa update
 	@if [ -e "${OBJTOP}/usr.sbin/wpa/wpa_supplicant/.depend.rrm.o" ] && \
 	    egrep -q 'src/ap/rrm.c' \
 	    ${OBJTOP}/usr.sbin/wpa/wpa_supplicant/.depend.rrm.o; then \
 		echo "Removing stale wpa dependencies"; \
 		rm -f ${OBJTOP}/usr.sbin/wpa/*/.depend*; \
 	fi
 
 _worldtmp: .PHONY
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Rebuilding the temporary build tree"
 	@echo "--------------------------------------------------------------"
 .if !defined(NO_CLEAN)
 	rm -rf ${WORLDTMP}
 .else
 # Note: for delete-old we need to set $PATH to also include the host $PATH
 # since otherwise a partial build with missing symlinks in ${WORLDTMP}/legacy/
 # will fail to run due to missing binaries. $WMAKE sets PATH to only ${TMPPATH}
 # so we remove that assingnment from $WMAKE and prepend the new $PATH
 	${_+_}@if [ -e "${WORLDTMP}" ]; then \
 		echo ">>> Deleting stale files in build tree..."; \
 		cd ${.CURDIR}; env PATH=${TMPPATH}:${PATH} ${WMAKE:NPATH=*} \
 		    _NO_INCLUDE_COMPILERMK=t -DBATCH_DELETE_OLD_FILES delete-old \
 		    delete-old-libs >/dev/null; \
 	fi
 	rm -rf ${WORLDTMP}/legacy/usr/include
 .if ${USING_SYSTEM_COMPILER} == "yes"
 .for cc in cc c++
 	if [ -x ${WORLDTMP}/usr/bin/${cc} ]; then \
 		inum=$$(stat -f %i ${WORLDTMP}/usr/bin/${cc}); \
 		find ${WORLDTMP}/usr/bin -inum $${inum} -delete; \
 	fi
 .endfor
 .endif	# ${USING_SYSTEM_COMPILER} == "yes"
 .if ${USING_SYSTEM_LINKER} == "yes"
 	@rm -f ${WORLDTMP}/usr/bin/ld ${WORLDTMP}/usr/bin/ld.lld
 .endif	# ${USING_SYSTEM_LINKER} == "yes"
 .endif	# !defined(NO_CLEAN)
 	@mkdir -p ${WORLDTMP}
 	@touch ${WORLDTMP}/${.TARGET}
 # We can't use mtree to create the worldtmp directories since it may not be
 # available on the target system (this happens e.g. when building on non-FreeBSD)
 	cd ${.CURDIR}/tools/build; \
 	    ${MAKE} DIRPRFX=tools/build/ DESTDIR=${WORLDTMP}/legacy installdirs
 # In order to build without inheriting $PATH we need to add symlinks to the host
 # tools in $WORLDTMP for the tools that we don't build during bootstrap-tools
 	cd ${.CURDIR}/tools/build; \
 	    ${MAKE} DIRPRFX=tools/build/ DESTDIR=${WORLDTMP}/legacy host-symlinks
 
 _legacy:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 1.1: legacy release compatibility shims"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${BMAKE} legacy
 _bootstrap-tools:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 1.2: bootstrap tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${BMAKE} bootstrap-tools
 	mkdir -p ${WORLDTMP}/usr ${WORLDTMP}/lib/casper ${WORLDTMP}/lib/geom
 	${WORLDTMP_MTREE} -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${WORLDTMP}/usr >/dev/null
 	${WORLDTMP_MTREE} -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${WORLDTMP}/usr/include >/dev/null
 	ln -sf ${.CURDIR}/sys ${WORLDTMP}
 .if ${MK_DEBUG_FILES} != "no"
 	${WORLDTMP_MTREE} -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${WORLDTMP}/usr/lib >/dev/null
 .endif
 .for _mtree in ${LOCAL_MTREE}
 	${WORLDTMP_MTREE} -f ${.CURDIR}/${_mtree} -p ${WORLDTMP} > /dev/null
 .endfor
 _cleanobj:
 .if !defined(NO_CLEAN)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.1: cleaning up the object tree"
 	@echo "--------------------------------------------------------------"
 	# Avoid including bsd.compiler.mk in clean and obj with _NO_INCLUDE_COMPILERMK
 	# since the restricted $PATH might not contain a valid cc binary
 	${_+_}cd ${.CURDIR}; ${WMAKE} _NO_INCLUDE_COMPILERMK=t ${CLEANDIR}
 .if defined(LIBCOMPAT)
 	${_+_}cd ${.CURDIR}; ${LIBCOMPATWMAKE} _NO_INCLUDE_COMPILERMK=t -f Makefile.inc1 ${CLEANDIR}
 .endif
 .else
 	${_+_}cd ${.CURDIR}; ${WMAKE} _NO_INCLUDE_COMPILERMK=t _cleanobj_fast_depend_hack
 .endif	# !defined(NO_CLEAN)
 _obj:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.2: rebuilding the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${WMAKE} _NO_INCLUDE_COMPILERMK=t obj
 _build-tools:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.3: build tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${TMAKE} build-tools
 _cross-tools:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 3: cross tools"
 	@echo "--------------------------------------------------------------"
 	@rm -f ${OBJTOP}/toolchain-metadata.mk
 	${_+_}cd ${.CURDIR}; ${XMAKE} cross-tools
 	${_+_}cd ${.CURDIR}; ${XMAKE} kernel-tools
 _build-metadata:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 3.1: recording build metadata"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${WMAKE} toolchain-metadata.mk
 	${_+_}cd ${.CURDIR}; ${WMAKE} host-osreldate.h
 _includes:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.1: building includes"
 	@echo "--------------------------------------------------------------"
 # Special handling for SUBDIR_OVERRIDE in buildworld as they most likely need
 # headers from default SUBDIR.  Do SUBDIR_OVERRIDE includes last.
 	${_+_}cd ${.CURDIR}; ${WMAKE} SUBDIR_OVERRIDE= SHARED=symlinks \
 	    MK_INCLUDES=yes includes
 .if !empty(SUBDIR_OVERRIDE) && make(buildworld)
 	${_+_}cd ${.CURDIR}; ${WMAKE} MK_INCLUDES=yes SHARED=symlinks includes
 .endif
 _libraries:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.2: building libraries"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; \
 	    ${WMAKE} -DNO_FSCHG MK_HTML=no -DNO_LINT MK_MAN=no \
 	    MK_PROFILE=no MK_TESTS=no MK_TESTS_SUPPORT=${MK_TESTS} libraries
 everything: .PHONY
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.3: building everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; _PARALLEL_SUBDIR_OK=1 ${WMAKE} all
 
 WMAKE_TGTS=
 .if !defined(WORLDFAST)
 WMAKE_TGTS+=	_sanity_check _worldtmp _legacy
 .if empty(SUBDIR_OVERRIDE)
 WMAKE_TGTS+=	_bootstrap-tools
 .endif
 WMAKE_TGTS+=	_cleanobj
 .if !defined(NO_OBJWALK)
 WMAKE_TGTS+=	_obj
 .endif
 WMAKE_TGTS+=	_build-tools _cross-tools
 WMAKE_TGTS+=	_build-metadata
 WMAKE_TGTS+=	_includes
 .endif
 .if !defined(NO_LIBS)
 WMAKE_TGTS+=	_libraries
 .endif
 WMAKE_TGTS+=	everything
 .if defined(LIBCOMPAT) && empty(SUBDIR_OVERRIDE)
 WMAKE_TGTS+=	build${libcompat}
 .endif
 
 # record buildworld time in seconds
 .if make(buildworld)
 _BUILDWORLD_START!= date '+%s'
 .export _BUILDWORLD_START
 .endif
 
 buildworld: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue .PHONY
 .ORDER: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue
 
 buildworld_prologue: .PHONY
 	@echo "--------------------------------------------------------------"
 	@echo ">>> World build started on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 
 buildworld_epilogue: .PHONY
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> World build completed on `LC_ALL=C date`"
 	@seconds=$$(($$(date '+%s') - ${_BUILDWORLD_START})); \
 	  echo -n ">>> World built in $$seconds seconds, "; \
 	  echo "ncpu: $$(sysctl -n hw.ncpu)${.MAKE.JOBS:S/^/, make -j/}"
 	@echo "--------------------------------------------------------------"
 
 #
 # We need to have this as a target because the indirection between Makefile
 # and Makefile.inc1 causes the correct PATH to be used, rather than a
 # modification of the current environment's PATH.  In addition, we need
 # to quote multiword values.
 #
 buildenvvars: .PHONY
 	@echo ${WMAKEENV:Q} ${.MAKE.EXPORTED:@v@$v=\"${$v}\"@}
 
 .if ${.TARGETS:Mbuildenv}
 .if ${.MAKEFLAGS:M-j}
 .error The buildenv target is incompatible with -j
 .endif
 .endif
 BUILDENV_DIR?=	${.CURDIR}
 #
 # Note: make will report any errors the shell reports. This can
 # be odd if the last command in an interactive shell generates an
 # error or is terminated by SIGINT. These reported errors look bad,
 # but are harmless. Allowing them also allows BUIDLENV_SHELL to
 # be a complex command whose status will be returned to the caller.
 # Some scripts in tools rely on this behavior to report build errors.
 #
 buildenv: .PHONY
 	@echo Entering world for ${TARGET_ARCH}:${TARGET}
 .if ${BUILDENV_SHELL:M*zsh*}
 	@echo For ZSH you must run: export CPUTYPE=${TARGET_CPUTYPE}
 .endif
 	@cd ${BUILDENV_DIR} && env ${WMAKEENV} BUILDENV=1 ${BUILDENV_SHELL}
 
 TOOLCHAIN_TGTS=	${WMAKE_TGTS:Neverything:Nbuild${libcompat}}
 toolchain: ${TOOLCHAIN_TGTS} .PHONY
 KERNEL_TOOLCHAIN_TGTS=	${TOOLCHAIN_TGTS:N_obj:N_cleanobj:N_includes:N_libraries}
 .if make(kernel-toolchain)
 .ORDER: ${KERNEL_TOOLCHAIN_TGTS}
 .endif
 kernel-toolchain: ${KERNEL_TOOLCHAIN_TGTS} .PHONY
 
 #
 # installcheck
 #
 # Checks to be sure system is ready for installworld/installkernel.
 #
 installcheck: _installcheck_world _installcheck_kernel .PHONY
 _installcheck_world: .PHONY
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Install check world"
 	@echo "--------------------------------------------------------------"
 _installcheck_kernel: .PHONY
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Install check kernel"
 	@echo "--------------------------------------------------------------"
 
 #
 # Require DESTDIR to be set if installing for a different architecture or
 # using the user/group database in the source tree.
 #
 .if ${TARGET_ARCH} != ${MACHINE_ARCH} || ${TARGET} != ${MACHINE} || \
     defined(DB_FROM_SRC)
 .if !make(distributeworld)
 _installcheck_world: __installcheck_DESTDIR
 _installcheck_kernel: __installcheck_DESTDIR
 __installcheck_DESTDIR: .PHONY
 .if !defined(DESTDIR) || empty(DESTDIR)
 	@echo "ERROR: Please set DESTDIR!"; \
 	false
 .endif
 .endif
 .endif
 
 .if !defined(DB_FROM_SRC)
 #
 # Check for missing UIDs/GIDs.
 #
 CHECK_UIDS=	auditdistd
 CHECK_GIDS=	audit
 CHECK_UIDS+=	ntpd
 CHECK_GIDS+=	ntpd
 CHECK_UIDS+=	proxy
 CHECK_GIDS+=	proxy authpf
 CHECK_UIDS+=	smmsp
 CHECK_GIDS+=	smmsp
 CHECK_UIDS+=	unbound
 CHECK_GIDS+=	unbound
 _installcheck_world: __installcheck_UGID
 __installcheck_UGID: .PHONY
 .for uid in ${CHECK_UIDS}
 	@if ! `id -u ${uid} >/dev/null 2>&1`; then \
 		echo "ERROR: Required ${uid} user is missing, see /usr/src/UPDATING."; \
 		false; \
 	fi
 .endfor
 .for gid in ${CHECK_GIDS}
 	@if ! `find / -prune -group ${gid} >/dev/null 2>&1`; then \
 		echo "ERROR: Required ${gid} group is missing, see /usr/src/UPDATING."; \
 		false; \
 	fi
 .endfor
 .endif
 #
 # If installing over the running system (DESTDIR is / or unset) and the install
 # includes rescue, try running rescue from the objdir as a sanity check.  If
 # rescue is not functional (e.g., because it depends on a system call not
 # supported by the currently running kernel), abort the installation.
 #
 .if !make(distributeworld) && ${MK_RESCUE} != "no" && \
     (empty(DESTDIR) || ${DESTDIR} == "/") && empty(BYPASS_INSTALLCHECK_SH)
 _installcheck_world: __installcheck_sh_check
 __installcheck_sh_check: .PHONY
 	@if [ "`${OBJTOP}/rescue/rescue/rescue sh -c 'echo OK'`" != \
 	    OK ]; then \
 		echo "rescue/sh check failed, installation aborted" >&2; \
 		false; \
 	fi
 .endif
 
 #
 # Required install tools to be saved in a scratch dir for safety.
 #
 .if ${MK_ZONEINFO} != "no"
 _zoneinfo=	zic tzsetup
 .endif
 
 ITOOLS=	[ awk cap_mkdb cat chflags chmod chown cmp cp \
 	date echo egrep find grep id install ${_install-info} \
 	ln make mkdir mtree mv pwd_mkdb \
 	rm sed services_mkdb sh sort strip sysctl test true uname wc ${_zoneinfo} \
 	${LOCAL_ITOOLS}
 
 # Needed for share/man
 .if ${MK_MAN_UTILS} != "no"
 ITOOLS+=makewhatis
 .endif
 
 #
 # distributeworld
 #
 # Distributes everything compiled by a `buildworld'.
 #
 # installworld
 #
 # Installs everything compiled by a 'buildworld'.
 #
 
 # Non-base distributions produced by the base system
 EXTRA_DISTRIBUTIONS=
 .if defined(LIBCOMPAT)
 EXTRA_DISTRIBUTIONS+=	lib${libcompat}
 .endif
 .if ${MK_TESTS} != "no"
 EXTRA_DISTRIBUTIONS+=	tests
 .endif
 
 DEBUG_DISTRIBUTIONS=
 .if ${MK_DEBUG_FILES} != "no"
 DEBUG_DISTRIBUTIONS+=	base ${EXTRA_DISTRIBUTIONS:S,tests,,}
 .endif
 
 MTREE_MAGIC?=	mtree 2.0
 
 distributeworld installworld stageworld: _installcheck_world .PHONY
 	mkdir -p ${INSTALLTMP}
 	progs=$$(for prog in ${ITOOLS}; do \
 		if progpath=`which $$prog`; then \
 			echo $$progpath; \
 		else \
 			echo "Required tool $$prog not found in PATH." >&2; \
 			exit 1; \
 		fi; \
 	    done); \
 	libs=$$(ldd -f "%o %p\n" -f "%o %p\n" $$progs 2>/dev/null | sort -u | \
 	    while read line; do \
 		set -- $$line; \
 		if [ "$$2 $$3" != "not found" ]; then \
 			echo $$2; \
 		else \
 			echo "Required library $$1 not found." >&2; \
 			exit 1; \
 		fi; \
 	    done); \
 	cp $$libs $$progs ${INSTALLTMP}
 	cp -R $${PATH_LOCALE:-"/usr/share/locale"} ${INSTALLTMP}/locale
 .if defined(NO_ROOT)
 	-mkdir -p ${METALOG:H}
 	echo "#${MTREE_MAGIC}" > ${METALOG}
 .endif
 .if make(distributeworld)
 .for dist in ${EXTRA_DISTRIBUTIONS}
 	-mkdir ${DESTDIR}/${DISTDIR}/${dist}
 	${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.root.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist} >/dev/null
 	${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null
 	${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/include >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib >/dev/null
 .endif
 .if defined(LIBCOMPAT)
 	${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/usr >/dev/null
 .endif
 .endif
 .if ${MK_TESTS} != "no" && ${dist} == "tests"
 	-mkdir -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE}
 	${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/${TESTSBASE} >/dev/null
 .endif
 .endif
 .if defined(NO_ROOT)
 	${IMAKEENV} ${MTREE} -C -f ${.CURDIR}/etc/mtree/BSD.root.dist | \
 	    sed -e 's#^\./#./${dist}/#' >> ${METALOG}
 	${IMAKEENV} ${MTREE} -C -f ${.CURDIR}/etc/mtree/BSD.usr.dist | \
 	    sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG}
 	${IMAKEENV} ${MTREE} -C -f ${.CURDIR}/etc/mtree/BSD.include.dist | \
 	    sed -e 's#^\./#./${dist}/usr/include/#' >> ${METALOG}
 .if defined(LIBCOMPAT)
 	${IMAKEENV} ${MTREE} -C -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist | \
 	    sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG}
 .endif
 .endif
 .endfor
 	-mkdir ${DESTDIR}/${DISTDIR}/base
 	${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \
 	    METALOG=${METALOG} ${IMAKE_INSTALL} ${IMAKE_MTREE} \
 	    DISTBASE=/base DESTDIR=${DESTDIR}/${DISTDIR}/base \
 	    LOCAL_MTREE=${LOCAL_MTREE:Q} distrib-dirs
 	${INSTALL_SYMLINK} ${INSTALLFLAGS} usr/src/sys ${INSTALL_DDIR}/base/sys
 .endif
 	${_+_}cd ${.CURDIR}; ${IMAKE} re${.TARGET:S/world$//}; \
 	    ${IMAKEENV} rm -rf ${INSTALLTMP}
 .if make(distributeworld)
 .for dist in ${EXTRA_DISTRIBUTIONS}
 	find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -type d -empty -delete
 .endfor
 .if defined(NO_ROOT)
 .for dist in base ${EXTRA_DISTRIBUTIONS}
 	@# For each file that exists in this dist, print the corresponding
 	@# line from the METALOG.  This relies on the fact that
 	@# a line containing only the filename will sort immediately before
 	@# the relevant mtree line.
 	cd ${DESTDIR}/${DISTDIR}; \
 	find ./${dist} | sort -u ${METALOG} - | \
 	awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \
 	${DESTDIR}/${DISTDIR}/${dist}.meta
 .endfor
 .for dist in ${DEBUG_DISTRIBUTIONS}
 	@# For each file that exists in this dist, print the corresponding
 	@# line from the METALOG.  This relies on the fact that
 	@# a line containing only the filename will sort immediately before
 	@# the relevant mtree line.
 	cd ${DESTDIR}/${DISTDIR}; \
 	find ./${dist}/usr/lib/debug | sort -u ${METALOG} - | \
 	awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \
 	${DESTDIR}/${DISTDIR}/${dist}.debug.meta
 .endfor
 .endif
 .endif
 
 packageworld: .PHONY
 .for dist in base ${EXTRA_DISTRIBUTIONS}
 .if defined(NO_ROOT)
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvf - --exclude usr/lib/debug \
 	    @${DESTDIR}/${DISTDIR}/${dist}.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz
 .else
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvf - --exclude usr/lib/debug . | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz
 .endif
 .endfor
 
 .for dist in ${DEBUG_DISTRIBUTIONS}
 . if defined(NO_ROOT)
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvf - @${DESTDIR}/${DISTDIR}/${dist}.debug.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz
 . else
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvLf - usr/lib/debug | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz
 . endif
 .endfor
 
 _sysent_dirs=	sys/kern
 _sysent_dirs+=	sys/compat/freebsd32
 _sysent_dirs+=	sys/amd64/linux		\
 		sys/amd64/linux32	\
 		sys/arm64/linux		\
 		sys/i386/linux
 sysent: .PHONY
 .for _dir in ${_sysent_dirs}
 	${_+_}${MAKE} -C ${.CURDIR}/${_dir} sysent
 .endfor
 
 #
 # reinstall
 #
 # If you have a build server, you can NFS mount the source and obj directories
 # and do a 'make reinstall' on the *client* to install new binaries from the
 # most recent server build.
 #
 restage reinstall: .MAKE .PHONY
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Making hierarchy"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \
 	    LOCAL_MTREE=${LOCAL_MTREE:Q} hierarchy
 .if make(restage)
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Making distribution"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \
 	    LOCAL_MTREE=${LOCAL_MTREE:Q} distribution
 .endif
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing everything started on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install
 .if defined(LIBCOMPAT)
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install${libcompat}
 .endif
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing everything completed on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 
 redistribute: .MAKE .PHONY
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Distributing everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute
 .if defined(LIBCOMPAT)
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute${libcompat} \
 	    DISTRIBUTION=lib${libcompat}
 .endif
 
 distrib-dirs distribution: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \
 	    ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET}
 .if make(distribution)
 	${_+_}cd ${.CURDIR}; ${CROSSENV} PATH=${TMPPATH} \
 		${MAKE} -f Makefile.inc1 ${IMAKE_INSTALL} \
 		METALOG=${METALOG} MK_TESTS=no installconfig
 .endif
 
 #
 # buildkernel and installkernel
 #
 # Which kernels to build and/or install is specified by setting
 # KERNCONF. If not defined a GENERIC kernel is built/installed.
 # Only the existing (depending TARGET) config files are used
 # for building kernels and only the first of these is designated
 # as the one being installed.
 #
 # Note that we have to use TARGET instead of TARGET_ARCH when
 # we're in kernel-land. Since only TARGET_ARCH is (expected) to
 # be set to cross-build, we have to make sure TARGET is set
 # properly.
 
 .if defined(KERNFAST)
 NO_KERNELCLEAN=	t
 NO_KERNELCONFIG=	t
 NO_KERNELOBJ=		t
 # Shortcut for KERNCONF=Blah -DKERNFAST is now KERNFAST=Blah
 .if !defined(KERNCONF) && ${KERNFAST} != "1"
 KERNCONF=${KERNFAST}
 .endif
 .endif
 .if ${TARGET_ARCH} == "powerpc64"
 KERNCONF?=	GENERIC64
 .else
 KERNCONF?=	GENERIC
 .endif
 INSTKERNNAME?=	kernel
 
 KERNSRCDIR?=	${.CURDIR}/sys
 KRNLCONFDIR=	${KERNSRCDIR}/${TARGET}/conf
 KRNLOBJDIR=	${OBJTOP}${KERNSRCDIR:C,^${.CURDIR},,}
 KERNCONFDIR?=	${KRNLCONFDIR}
 
 BUILDKERNELS=
 INSTALLKERNEL=
 .if defined(NO_INSTALLKERNEL)
 # All of the BUILDKERNELS loops start at index 1.
 BUILDKERNELS+= dummy
 .endif
 .for _kernel in ${KERNCONF}
 .if !defined(_MKSHOWCONFIG) && exists(${KERNCONFDIR}/${_kernel})
 BUILDKERNELS+=	${_kernel}
 .if empty(INSTALLKERNEL) && !defined(NO_INSTALLKERNEL)
 INSTALLKERNEL= ${_kernel}
 .endif
 .else
 .if make(buildkernel)
 .error Missing KERNCONF ${KERNCONFDIR}/${_kernel}
 .endif
 .endif
 .endfor
 
 _cleankernobj_fast_depend_hack: .PHONY
 # 20180320 remove stale generated assym.s after renaming to .inc in r331254
 	@if [ -e "${OBJTOP}/sys/${KERNCONF}/assym.s" ]; then \
 		echo "Removing stale generated assym files"; \
 		rm -f ${OBJTOP}/sys/${KERNCONF}/assym.* \
 		    ${OBJTOP}/sys/${KERNCONF}/.depend.assym.*; \
 	fi
 
 ${WMAKE_TGTS:N_worldtmp:Nbuild${libcompat}} ${.ALLTARGETS:M_*:N_worldtmp}: .MAKE .PHONY
 
 # record kernel(s) build time in seconds
 .if make(buildkernel)
 _BUILDKERNEL_START!= date '+%s'
 .endif
 
 #
 # buildkernel
 #
 # Builds all kernels defined by BUILDKERNELS.
 #
 buildkernel: .MAKE .PHONY
 .if empty(BUILDKERNELS:Ndummy)
 	@echo "ERROR: Missing kernel configuration file(s) (${KERNCONF})."; \
 	false
 .endif
 	@echo
 .for _kernel in ${BUILDKERNELS:Ndummy}
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Kernel build for ${_kernel} started on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 	@echo "===> ${_kernel}"
 	mkdir -p ${KRNLOBJDIR}
 .if !defined(NO_KERNELCONFIG)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 1: configuring the kernel"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLCONFDIR}; \
 		PATH=${TMPPATH} \
 		    config ${CONFIGARGS} -d ${KRNLOBJDIR}/${_kernel} \
 			-I '${KERNCONFDIR}' '${KERNCONFDIR}/${_kernel}'
 .endif
 .if !defined(NO_CLEAN) && !defined(NO_KERNELCLEAN)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.1: cleaning up the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} ${CLEANDIR}
 .else
 	${_+_}cd ${.CURDIR}; ${WMAKE} _cleankernobj_fast_depend_hack
 .endif
 .if !defined(NO_KERNELOBJ)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.2: rebuilding the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} obj
 .endif
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.3: build tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${KTMAKE} kernel-tools
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 3.1: building everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} all -DNO_MODULES_OBJ
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Kernel build for ${_kernel} completed on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 	
 .endfor
 	@seconds=$$(($$(date '+%s') - ${_BUILDKERNEL_START})); \
 	  echo -n ">>> Kernel(s) ${BUILDKERNELS} built in $$seconds seconds, "; \
 	  echo "ncpu: $$(sysctl -n hw.ncpu)${.MAKE.JOBS:S/^/, make -j/}"
 	@echo "--------------------------------------------------------------"
 
 NO_INSTALLEXTRAKERNELS?=	yes
 
 #
 # installkernel, etc.
 #
 # Install the kernel defined by INSTALLKERNEL
 #
 installkernel installkernel.debug \
 reinstallkernel reinstallkernel.debug: _installcheck_kernel .PHONY
 .if !defined(NO_INSTALLKERNEL)
 .if empty(INSTALLKERNEL)
 	@echo "ERROR: No kernel \"${KERNCONF}\" to install."; \
 	false
 .endif
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing kernel ${INSTALLKERNEL} on $$(LC_ALL=C date)"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \
 	    ${CROSSENV} PATH=${TMPPATH} \
 	    ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME} ${.TARGET:S/kernel//}
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing kernel ${INSTALLKERNEL} completed on $$(LC_ALL=C date)"
 	@echo "--------------------------------------------------------------"
 .endif
 .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes"
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing kernel ${_kernel} $$(LC_ALL=C date)"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; \
 	    ${CROSSENV} PATH=${TMPPATH} \
 	    ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME}.${_kernel} ${.TARGET:S/kernel//}
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing kernel ${_kernel} completed on $$(LC_ALL=C date)"
 	@echo "--------------------------------------------------------------"
 .endfor
 .endif
 
 distributekernel distributekernel.debug: .PHONY
 .if !defined(NO_INSTALLKERNEL)
 .if empty(INSTALLKERNEL)
 	@echo "ERROR: No kernel \"${KERNCONF}\" to install."; \
 	false
 .endif
 	mkdir -p ${DESTDIR}/${DISTDIR}
 .if defined(NO_ROOT)
 	@echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.premeta
 .endif
 	${_+_}cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \
 	    ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.premeta/} \
 	    ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} KERNEL=${INSTKERNNAME} \
 	    DESTDIR=${INSTALL_DDIR}/kernel \
 	    ${.TARGET:S/distributekernel/install/}
 .if defined(NO_ROOT)
 	@sed -e 's|^./kernel|.|' ${DESTDIR}/${DISTDIR}/kernel.premeta > \
 	    ${DESTDIR}/${DISTDIR}/kernel.meta
 .endif
 .endif
 .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes"
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 .if defined(NO_ROOT)
 	@echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta
 .endif
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; \
 	    ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.${_kernel}.premeta/} \
 	    ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} \
 	    KERNEL=${INSTKERNNAME}.${_kernel} \
 	    DESTDIR=${INSTALL_DDIR}/kernel.${_kernel} \
 	    ${.TARGET:S/distributekernel/install/}
 .if defined(NO_ROOT)
 	@sed -e "s|^./kernel.${_kernel}|.|" \
 	    ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta > \
 	    ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta
 .endif
 .endfor
 .endif
 
 packagekernel: .PHONY
 .if defined(NO_ROOT)
 .if !defined(NO_INSTALLKERNEL)
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --exclude '*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz
 .endif
 .if ${MK_DEBUG_FILES} != "no"
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --include '*/*/*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.meta | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz
 .endif
 .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes"
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --exclude '*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz
 .if ${MK_DEBUG_FILES} != "no"
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --include '*/*/*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz
 .endif
 .endfor
 .endif
 .else
 .if !defined(NO_INSTALLKERNEL)
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --exclude '*.debug' . | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz
 .endif
 .if ${MK_DEBUG_FILES} != "no"
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --include '*/*/*.debug' $$(eval find .) | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz
 .endif
 .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes"
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --exclude '*.debug' . | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz
 .if ${MK_DEBUG_FILES} != "no"
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --include '*/*/*.debug' $$(eval find .) | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz
 .endif
 .endfor
 .endif
 .endif
 
 stagekernel: .PHONY
 	${_+_}${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} distributekernel
 
 PORTSDIR?=	/usr/ports
 WSTAGEDIR?=	${OBJTOP}/worldstage
 KSTAGEDIR?=	${OBJTOP}/kernelstage
 REPODIR?=	${OBJROOT}repo
 PKGSIGNKEY?=	# empty
 
 .ORDER:		stage-packages create-packages
 .ORDER:		create-packages create-world-packages
 .ORDER:		create-packages create-kernel-packages
 .ORDER:		create-packages sign-packages
 
 _pkgbootstrap: .PHONY
 .if make(*package*) && !exists(${LOCALBASE}/sbin/pkg)
 	@env ASSUME_ALWAYS_YES=YES pkg bootstrap
 .endif
 
 packages: .PHONY
 	${_+_}${MAKE} -C ${.CURDIR} PKG_VERSION=${PKG_VERSION} real-packages
 
 package-pkg: .PHONY
 	rm -rf /tmp/ports.${TARGET} || :
 	env ${WMAKEENV:Q} SRCDIR=${.CURDIR} PORTSDIR=${PORTSDIR} REVISION=${_REVISION} \
 		PKG_CMD=${PKG_CMD} PKG_VERSION=${PKG_VERSION} REPODIR=${REPODIR} \
 		WSTAGEDIR=${WSTAGEDIR} \
 		sh ${.CURDIR}/release/scripts/make-pkg-package.sh
 
 real-packages:	stage-packages create-packages sign-packages .PHONY
 
 stage-packages-world: .PHONY
 	@mkdir -p ${WSTAGEDIR}
 	${_+_}@cd ${.CURDIR}; \
 		${MAKE} DESTDIR=${WSTAGEDIR} -DNO_ROOT stageworld
 
 stage-packages-kernel: .PHONY
 	@mkdir -p ${KSTAGEDIR}
 	${_+_}@cd ${.CURDIR}; \
 		${MAKE} DESTDIR=${KSTAGEDIR} -DNO_ROOT stagekernel
 
 stage-packages: .PHONY stage-packages-world stage-packages-kernel
 
 _repodir: .PHONY
 	@mkdir -p ${REPODIR}
 
 create-packages-world:	_pkgbootstrap _repodir .PHONY
 	${_+_}@cd ${.CURDIR}; \
 		${MAKE} -f Makefile.inc1 \
 			DESTDIR=${WSTAGEDIR} \
 			PKG_VERSION=${PKG_VERSION} create-world-packages
 
 create-packages-kernel:	_pkgbootstrap _repodir .PHONY
 	${_+_}@cd ${.CURDIR}; \
 		${MAKE} -f Makefile.inc1 \
 			DESTDIR=${KSTAGEDIR} \
 			PKG_VERSION=${PKG_VERSION} DISTDIR=kernel \
 			create-kernel-packages
 
 create-packages: .PHONY create-packages-world create-packages-kernel
 
 create-world-packages:	_pkgbootstrap .PHONY
 	@rm -f ${WSTAGEDIR}/*.plist 2>/dev/null || :
 	@cd ${WSTAGEDIR} ; \
 		env -i LC_COLLATE=C sort ${WSTAGEDIR}/${DISTDIR}/METALOG | \
 		awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk
 	@for plist in ${WSTAGEDIR}/*.plist; do \
 	  plist=$${plist##*/} ; \
 	  pkgname=$${plist%.plist} ; \
 	  echo "_PKGS+= $${pkgname}" ; \
 	done > ${WSTAGEDIR}/packages.mk
 	${_+_}@cd ${.CURDIR}; \
 		${MAKE} -f Makefile.inc1 create-world-packages-jobs \
 		.MAKE.JOB.PREFIX=
 
 .if make(create-world-packages-jobs)
 .include "${WSTAGEDIR}/packages.mk"
 .endif
 
 create-world-packages-jobs: .PHONY
 .for pkgname in ${_PKGS}
 create-world-packages-jobs: create-world-package-${pkgname}
 create-world-package-${pkgname}: .PHONY
 	@sh ${SRCDIR}/release/packages/generate-ucl.sh -o ${pkgname} \
 		-s ${SRCDIR} -u ${WSTAGEDIR}/${pkgname}.ucl
 	@awk -F\" ' \
 		/^name/ { printf("===> Creating %s-", $$2); next } \
 		/^version/ { print $$2; next } \
 		' ${WSTAGEDIR}/${pkgname}.ucl
 	@if [ "${pkgname}" == "runtime" ]; then \
 		sed -i '' -e "s/%VCS_REVISION%/${VCS_REVISION}/" ${WSTAGEDIR}/${pkgname}.ucl ; \
 	fi
 	${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname -o ALLOW_BASE_SHLIBS=yes \
 		create -M ${WSTAGEDIR}/${pkgname}.ucl \
 		-p ${WSTAGEDIR}/${pkgname}.plist \
 		-r ${WSTAGEDIR} \
 		-o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/${PKG_VERSION}
 .endfor
 
 _default_flavor=	-default
 .if make(*package*) && exists(${KSTAGEDIR}/kernel.meta)
 . if ${MK_DEBUG_FILES} != "no"
 _debug=-debug
 . endif
 create-kernel-packages:	.PHONY
 . for flavor in "" ${_debug}
 create-kernel-packages: create-kernel-packages-flavor${flavor:C,^""$,${_default_flavor},}
 create-kernel-packages-flavor${flavor:C,^""$,${_default_flavor},}: _pkgbootstrap .PHONY
 	@cd ${KSTAGEDIR}/${DISTDIR} ; \
 	env -i LC_COLLATE=C sort ${KSTAGEDIR}/kernel.meta | \
 	awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \
 		-v kernel=yes -v _kernconf=${INSTALLKERNEL} ; \
 	sed -e "s/%VERSION%/${PKG_VERSION}/" \
 		-e "s/%PKGNAME%/kernel-${INSTALLKERNEL:tl}${flavor}/" \
 		-e "s/%KERNELDIR%/kernel/" \
 		-e "s/%COMMENT%/FreeBSD ${INSTALLKERNEL} kernel ${flavor}/" \
 		-e "s/%DESC%/FreeBSD ${INSTALLKERNEL} kernel ${flavor}/" \
 		-e "s/ %VCS_REVISION%/${VCS_REVISION}/" \
 		${SRCDIR}/release/packages/kernel.ucl \
 		> ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl ; \
 	awk -F\" ' \
 		/name/ { printf("===> Creating %s-", $$2); next } \
 		/version/ {print $$2; next } ' \
 		${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl ; \
 	${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname -o ALLOW_BASE_SHLIBS=yes \
 		create -M ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl \
 		-p ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.plist \
 		-r ${KSTAGEDIR}/${DISTDIR} \
 		-o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/${PKG_VERSION}
 . endfor
 .endif
 .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes"
 . for _kernel in ${BUILDKERNELS:[2..-1]}
 .  if exists(${KSTAGEDIR}/kernel.${_kernel}.meta)
 .   if ${MK_DEBUG_FILES} != "no"
 _debug=-debug
 .   endif
 .   for flavor in "" ${_debug}
 create-kernel-packages: create-kernel-packages-extra-flavor${flavor:C,^""$,${_default_flavor},}-${_kernel}
 create-kernel-packages-extra-flavor${flavor:C,^""$,${_default_flavor},}-${_kernel}: _pkgbootstrap .PHONY
 	@cd ${KSTAGEDIR}/kernel.${_kernel} ; \
 	env -i LC_COLLATE=C sort ${KSTAGEDIR}/kernel.${_kernel}.meta | \
 	awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \
 		-v kernel=yes -v _kernconf=${_kernel} ; \
 	sed -e "s/%VERSION%/${PKG_VERSION}/" \
 		-e "s/%PKGNAME%/kernel-${_kernel:tl}${flavor}/" \
 		-e "s/%KERNELDIR%/kernel.${_kernel}/" \
 		-e "s/%COMMENT%/FreeBSD ${_kernel} kernel ${flavor}/" \
 		-e "s/%DESC%/FreeBSD ${_kernel} kernel ${flavor}/" \
 		-e "s/ %VCS_REVISION%/${VCS_REVISION}/" \
 		${SRCDIR}/release/packages/kernel.ucl \
 		> ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \
 	awk -F\" ' \
 		/name/ { printf("===> Creating %s-", $$2); next } \
 		/version/ {print $$2; next } ' \
 		${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \
 	${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname -o ALLOW_BASE_SHLIBS=yes \
 		create -M ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl \
 		-p ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.plist \
 		-r ${KSTAGEDIR}/kernel.${_kernel} \
 		-o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/${PKG_VERSION}
 .   endfor
 .  endif
 . endfor
 .endif
 
 sign-packages:	_pkgbootstrap .PHONY
 	@[ -L "${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/latest" ] && \
 		unlink ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/latest ; \
 	${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname repo \
 		-o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/${PKG_VERSION} \
 		${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/${PKG_VERSION} \
 		${PKGSIGNKEY} ; \
 	cd ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI); \
 	ln -s ${PKG_VERSION} latest
 
 #
 #
 # checkworld
 #
 # Run test suite on installed world.
 #
 checkworld: .PHONY
 	@if [ ! -x "${LOCALBASE}/bin/kyua" ]; then \
 		echo "You need kyua (devel/kyua) to run the test suite." | /usr/bin/fmt; \
 		exit 1; \
 	fi
 	${_+_}PATH="$$PATH:${LOCALBASE}/bin" kyua test -k ${TESTSBASE}/Kyuafile
 
 #
 #
 # doxygen
 #
 # Build the API documentation with doxygen
 #
 doxygen: .PHONY
 	@if [ ! -x "${LOCALBASE}/bin/doxygen" ]; then \
 		echo "You need doxygen (devel/doxygen) to generate the API documentation of the kernel." | /usr/bin/fmt; \
 		exit 1; \
 	fi
 	${_+_}cd ${.CURDIR}/tools/kerneldoc/subsys; ${MAKE} obj all
 
 #
 # update
 #
 # Update the source tree(s), by running svn/svnup to update to the
 # latest copy.
 #
 update: .PHONY
 .if defined(SVN_UPDATE)
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Updating ${.CURDIR} using Subversion"
 	@echo "--------------------------------------------------------------"
 	@(cd ${.CURDIR}; ${SVN_CMD} update ${SVNFLAGS})
 .endif
 
 #
 # ------------------------------------------------------------------------
 #
 # From here onwards are utility targets used by the 'make world' and
 # related targets.  If your 'world' breaks, you may like to try to fix
 # the problem and manually run the following targets to attempt to
 # complete the build.  Beware, this is *not* guaranteed to work, you
 # need to have a pretty good grip on the current state of the system
 # to attempt to manually finish it.  If in doubt, 'make world' again.
 #
 
 #
 # legacy: Build compatibility shims for the next three targets. This is a
 # minimal set of tools and shims necessary to compensate for older systems
 # which don't have the APIs required by the targets built in bootstrap-tools,
 # build-tools or cross-tools.
 #
 
 
 # libnv and libl are both requirements for config(8), which is an unconditional
 # bootstrap-tool.
 _config_deps= lib/libnv usr.bin/lex/lib
 
 legacy: .PHONY
 .if ${BOOTSTRAPPING} < ${MINIMUM_SUPPORTED_OSREL} && ${BOOTSTRAPPING} != 0
 	@echo "ERROR: Source upgrades from versions prior to ${MINIMUM_SUPPORTED_REL} are not supported."; \
 	false
 .endif
 
 .for _tool in tools/build ${_config_deps}
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,includes,all,install)"; \
 	    cd ${.CURDIR}/${_tool}; \
 	    if [ -z "${NO_OBJWALK}" ]; then ${MAKE} DIRPRFX=${_tool}/ obj; fi; \
 	    ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${WORLDTMP}/legacy includes; \
 	    ${MAKE} DIRPRFX=${_tool}/ MK_INCLUDES=no all; \
 	    ${MAKE} DIRPRFX=${_tool}/ MK_INCLUDES=no \
 	        DESTDIR=${WORLDTMP}/legacy install
 .endfor
 
 #
 # bootstrap-tools: Build tools needed for compatibility. These are binaries that
 # are built to build other binaries in the system. However, the focus of these
 # binaries is usually quite narrow. Bootstrap tools use the host's compiler and
 # libraries, augmented by -legacy, in addition to the libraries built during
 # bootstrap-tools.
 #
 _bt=		_bootstrap-tools
 
 # We want to run the build with only ${WORLDTMP} in $PATH to ensure we don't
 # accidentally run tools that are incompatible but happen to be in $PATH.
 # This is especially important when building on Linux/MacOS where many of the
 # programs used during the build accept different flags or generate different
 # output. On those platforms we only symlink the tools known to be compatible
 # (e.g. basic utilities such as mkdir) into ${WORLDTMP} and build all others
 # from the FreeBSD sources during the bootstrap-tools stage.
 # We want to build without the user's $PATH starting in the bootstrap-tools
 # phase so the tools used in that phase (ln, cp, etc) must have already been
 # linked to $WORLDTMP. The tools are listed in the _host_tools_to_symlink
 # variable in tools/build/Makefile and are linked during the legacy phase.
 # Since they could be Linux or MacOS binaries, too we must only use flags that
 # are portable across operating systems.
 
 # If BOOTSTRAP_ALL_TOOLS is set we will build all the required tools from the
 # current source tree. Otherwise we create a symlink to the version found in
 # $PATH during the bootstrap-tools stage.
 .if defined(BOOTSTRAP_ALL_TOOLS)
 # BOOTSTRAPPING will be set on the command line so we can't override it here.
 # Instead set BOOTSTRAPPING_OSRELDATE so that the value 0 is set ${BSARGS}
 BOOTSTRAPPING_OSRELDATE:=	0
 .endif
 
 .if ${MK_GAMES} != "no"
 _strfile=	usr.bin/fortune/strfile
 .endif
 
 .if ${MK_GCC} != "no" && ${MK_CXX} != "no"
 _gperf=		gnu/usr.bin/gperf
 .endif
 
 .if ${MK_VT} != "no"
 _vtfontcvt=	usr.bin/vtfontcvt
 .endif
 
 # If we are not building the bootstrap because BOOTSTRAPPING is sufficient
 # we symlink the host version to $WORLDTMP instead. By doing this we can also
 # detect when a bootstrap tool is being used without the required MK_FOO.
 # If you add a new bootstrap tool where we could also use the host version,
 # please ensure that you also add a .else case where you add the tool to the
 # _bootstrap_tools_links variable.
 .if ${BOOTSTRAPPING} < 1000033
 _m4=		usr.bin/m4
 _lex=		usr.bin/lex
 # Note: lex needs m4 to build but m4 also depends on lex. However, lex can be
 # bootstrapped so we build lex first.
 ${_bt}-usr.bin/m4: ${_bt}-lib/libopenbsd ${_bt}-usr.bin/yacc ${_bt}-${_lex}
 _bt_m4_depend=${_bt}-${_m4}
 _bt_lex_depend=${_bt}-${_lex} ${_bt_m4_depend}
 .else
 _bootstrap_tools_links+=m4 lex
 .endif
 
 # ELF Tool Chain libraries are needed for ELF tools and dtrace tools.
 # r296685 fix cross-endian objcopy
 # r310724 fixed PR 215350, a crash in libdwarf with objects built by GCC 6.2.
 # r334881 added libdwarf constants used by ctfconvert.
 # r338478 fixed a crash in objcopy for mips64el objects
 # r339083 libelf: correct mips64el test to use ELF header
 # r348347 Add missing powerpc64 relocation support to libdwarf
 .if ${BOOTSTRAPPING} < 1300030
 _elftoolchain_libs= lib/libelf lib/libdwarf
 ${_bt}-lib/libelf: ${_bt_m4_depend}
 ${_bt}-lib/libdwarf: ${_bt_m4_depend}
 .endif
 
 # r245440 mtree -N support added
 # r313404 requires sha384.h for libnetbsd, added to libmd in r292782
 .if ${BOOTSTRAPPING} < 1100093
 _nmtree=	lib/libmd \
 		lib/libnetbsd \
 		usr.sbin/nmtree
 
 ${_bt}-lib/libnetbsd: ${_bt}-lib/libmd
 ${_bt}-usr.sbin/nmtree: ${_bt}-lib/libnetbsd
 .else
 _bootstrap_tools_links+=mtree
 .endif
 
 # r246097: log addition login.conf.db, passwd, pwd.db, and spwd.db with cat -l
 .if ${BOOTSTRAPPING} < 1000027
 _cat=		bin/cat
 .else
 _bootstrap_tools_links+=cat
 .endif
 
 # r277259 crunchide: Correct 64-bit section header offset
 # r281674 crunchide: always include both 32- and 64-bit ELF support
 .if ${BOOTSTRAPPING} < 1100078
 _crunchide=	usr.sbin/crunch/crunchide
 .else
 _bootstrap_tools_links+=crunchide
 .endif
 
 # r285986 crunchen: use STRIPBIN rather than STRIP
 # 1100113: Support MK_AUTO_OBJ
 # 1200006: META_MODE fixes
 .if ${BOOTSTRAPPING} < 1100078 || \
     (${MK_AUTO_OBJ} == "yes" && ${BOOTSTRAPPING} < 1100114) || \
     (${MK_META_MODE} == "yes" && ${BOOTSTRAPPING} < 1200006)
 _crunchgen=	usr.sbin/crunch/crunchgen
 .else
 _bootstrap_tools_links+=crunchgen
 .endif
 
 # r296926 -P keymap search path, MFC to stable/10 in r298297
 .if ${BOOTSTRAPPING} < 1003501 || \
 	(${BOOTSTRAPPING} >= 1100000 && ${BOOTSTRAPPING} < 1100103)
 _kbdcontrol=	usr.sbin/kbdcontrol
 .else
 _bootstrap_tools_links+=kbdcontrol
 .endif
 
 _yacc=		lib/liby \
 		usr.bin/yacc
 
 ${_bt}-usr.bin/yacc: ${_bt}-lib/liby
 
 .if ${MK_BSNMP} != "no"
 _gensnmptree=	usr.sbin/bsnmpd/gensnmptree
 .endif
 
 .if ${MK_LOCALES} != "no"
 _localedef=	usr.bin/localedef
 .endif
 
 # We need to build tblgen when we're building clang or lld, either as
 # bootstrap tools, or as the part of the normal build.
 .if ${MK_CLANG_BOOTSTRAP} != "no" || ${MK_CLANG} != "no" || \
     ${MK_LLD_BOOTSTRAP} != "no" || ${MK_LLD} != "no"
 _clang_tblgen= \
 	lib/clang/libllvmminimal \
 	usr.bin/clang/llvm-tblgen \
 	usr.bin/clang/clang-tblgen \
 	usr.bin/clang/lldb-tblgen
 # XXX: lldb-tblgen is not needed, if top-level MK_LLDB=no
 
 ${_bt}-usr.bin/clang/clang-tblgen: ${_bt}-lib/clang/libllvmminimal
 ${_bt}-usr.bin/clang/llvm-tblgen: ${_bt}-lib/clang/libllvmminimal
 ${_bt}-usr.bin/clang/lldb-tblgen: ${_bt}-lib/clang/libllvmminimal
 .endif
 
 # Default to building the GPL DTC, but build the BSDL one if users explicitly
 # request it.
 _dtc= usr.bin/dtc
 .if ${MK_GPL_DTC} != "no"
 _dtc= gnu/usr.bin/dtc
 .endif
 
 .if ${MK_LOCALES} != "no"
 _localedef=	usr.bin/localedef
 .endif
 
 .if ${MK_KERBEROS} != "no"
 _kerberos5_bootstrap_tools= \
 	kerberos5/tools/make-roken \
 	kerberos5/lib/libroken \
 	kerberos5/lib/libvers \
 	kerberos5/tools/asn1_compile \
 	kerberos5/tools/slc \
 	usr.bin/compile_et
 
 .ORDER: ${_kerberos5_bootstrap_tools:C/^/${_bt}-/g}
 .for _tool in ${_kerberos5_bootstrap_tools}
 ${_bt}-${_tool}: ${_bt}-usr.bin/yacc ${_bt_lex_depend}
 .endfor
 .endif
 
 ${_bt}-usr.bin/mandoc: ${_bt}-lib/libopenbsd
 
 # The tools listed in _basic_bootstrap_tools will generally not be
 # bootstrapped unless BOOTSTRAP_ALL_TOOL is set. However, when building on a
 # Linux or MacOS host the host versions are incompatible so we need to build
 # them from the source tree. Usually the link name will be the same as the subdir,
 # but some directories such as grep or test install multiple binaries. In that
 # case we use the _basic_bootstrap_tools_multilink variable which is a list of
 # subdirectory and comma-separated list of files.
 _basic_bootstrap_tools_multilink=usr.bin/grep grep,egrep,fgrep
 _basic_bootstrap_tools_multilink+=bin/test test,[
 # bootstrap tools needed by buildworld:
 _basic_bootstrap_tools=usr.bin/awk usr.bin/cut bin/expr usr.bin/gencat \
     usr.bin/join usr.bin/mktemp bin/rmdir usr.bin/sed usr.bin/sort \
     usr.bin/truncate usr.bin/tsort
 # elf2aout is required for sparc64 build
 _basic_bootstrap_tools+=usr.bin/elf2aout
 # file2c is required for building usr.sbin/config:
 _basic_bootstrap_tools+=usr.bin/file2c
 # uuencode/uudecode required for share/tabset
 _basic_bootstrap_tools+=usr.bin/uuencode usr.bin/uudecode
 # xargs is required by mkioctls
 _basic_bootstrap_tools+=usr.bin/xargs
 # cap_mkdb is required for share/termcap:
 _basic_bootstrap_tools+=usr.bin/cap_mkdb
 # ldd is required for installcheck (TODO: just always use /usr/bin/ldd instead?)
 _basic_bootstrap_tools+=usr.bin/ldd
 # services_mkdb/pwd_mkdb are required for installworld:
 _basic_bootstrap_tools+=usr.sbin/services_mkdb usr.sbin/pwd_mkdb
 # sysctl/chflags are required for installkernel:
 _basic_bootstrap_tools+=sbin/sysctl bin/chflags
 # mkfifo is used by sys/conf/newvers.sh
 _basic_bootstrap_tools+=usr.bin/mkfifo
 
 .if ${MK_AMD} != "no"
 # unifdef is only used by usr.sbin/amd/libamu/Makefile
 _basic_bootstrap_tools+=usr.bin/unifdef
 .endif
 
 .if ${MK_BOOT} != "no"
 _basic_bootstrap_tools+=bin/dd
 # xz/unxz is used by EFI
 _basic_bootstrap_tools_multilink+=usr.bin/xz xz,unxz
 # md5 is used by boot/beri (and possibly others)
 _basic_bootstrap_tools+=sbin/md5
 .if defined(BOOTSTRAP_ALL_TOOLS)
 ${_bt}-sbin/md5: ${_bt}-lib/libmd
 .endif
 .endif
 
 .if ${MK_ZONEINFO} != "no"
 _basic_bootstrap_tools+=usr.sbin/zic usr.sbin/tzsetup
 .endif
 
 .if defined(BOOTSTRAP_ALL_TOOLS)
 _other_bootstrap_tools+=${_basic_bootstrap_tools}
 .for _subdir _links in ${_basic_bootstrap_tools_multilink}
 _other_bootstrap_tools+=${_subdir}
 .endfor
 ${_bt}-usr.bin/awk: ${_bt_lex_depend} ${_bt}-usr.bin/yacc
 ${_bt}-bin/expr: ${_bt_lex_depend} ${_bt}-usr.bin/yacc
 # If we are bootstrapping file2c, we have to build it before config:
 ${_bt}-usr.sbin/config: ${_bt}-usr.bin/file2c ${_bt_lex_depend}
 # Note: no symlink to make/bmake in the !BOOTSTRAP_ALL_TOOLS case here since
 # the links to make/bmake make links will have already have been created in the
 # `make legacy` step. Not adding a link to make is important on non-FreeBSD
 # since "make" will usually point to GNU make there.
 _other_bootstrap_tools+=usr.bin/bmake
 .else
 # All tools in _basic_bootstrap_tools have the same name as the subdirectory
 # so we can use :T to get the name of the symlinks that we need to create.
 _bootstrap_tools_links+=${_basic_bootstrap_tools:T}
 .for _subdir _links in ${_basic_bootstrap_tools_multilink}
 _bootstrap_tools_links+=${_links:S/,/ /g}
 .endfor
 .endif	# defined(BOOTSTRAP_ALL_TOOLS)
 
 # Link the tools that we need for building but don't need to bootstrap because
 # the host version is known to be compatible into ${WORLDTMP}/legacy
 # We do this before building any of the bootstrap tools in case they depend on
 # the presence of any of the links (e.g. as m4/lex/awk)
 ${_bt}-links: .PHONY
 
 .for _tool in ${_bootstrap_tools_links}
 ${_bt}-link-${_tool}: .PHONY .MAKE
 	@if [ ! -e "${WORLDTMP}/legacy/bin/${_tool}" ]; then \
 		source_path=`which ${_tool}`; \
 		if [ ! -e "$${source_path}" ] ; then \
 			echo "Cannot find host tool '${_tool}'"; false; \
 		fi; \
 		ln -sfnv "$${source_path}" "${WORLDTMP}/legacy/bin/${_tool}"; \
 	fi
 ${_bt}-links: ${_bt}-link-${_tool}
 .endfor
 
 
 bootstrap-tools: ${_bt}-links .PHONY
 
 #	Please document (add comment) why something is in 'bootstrap-tools'.
 #	Try to bound the building of the bootstrap-tool to just the
 #	FreeBSD versions that need the tool built at this stage of the build.
 .for _tool in \
     ${_clang_tblgen} \
     ${_kerberos5_bootstrap_tools} \
     ${_strfile} \
     ${_gperf} \
     ${_dtc} \
     ${_cat} \
     ${_kbdcontrol} \
     ${_elftoolchain_libs} \
     usr.bin/lorder \
     lib/libopenbsd \
     usr.bin/mandoc \
     usr.bin/rpcgen \
     ${_yacc} \
     ${_m4} \
     ${_lex} \
     ${_other_bootstrap_tools} \
     usr.bin/xinstall \
     ${_gensnmptree} \
     usr.sbin/config \
     ${_crunchide} \
     ${_crunchgen} \
     ${_nmtree} \
     ${_vtfontcvt} \
     ${_localedef}
 ${_bt}-${_tool}: ${_bt}-links .PHONY .MAKE
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \
 		cd ${.CURDIR}/${_tool}; \
 		if [ -z "${NO_OBJWALK}" ]; then ${MAKE} DIRPRFX=${_tool}/ obj; fi; \
 		if [ "${_tool}" = "usr.bin/lex" ]; then \
 			${MAKE} DIRPRFX=${_tool}/ bootstrap; \
 		fi; \
 		${MAKE} DIRPRFX=${_tool}/ all; \
 		${MAKE} DIRPRFX=${_tool}/ DESTDIR=${WORLDTMP}/legacy install
 
 bootstrap-tools: ${_bt}-${_tool}
 .endfor
 
 #
 # build-tools: Build special purpose build tools
 #
 .if !defined(NO_SHARE) && ${MK_SYSCONS} != "no"
 _share=	share/syscons/scrnmaps
 .endif
 
 .if ${MK_GCC} != "no"
 _gcc_tools= gnu/usr.bin/cc/cc_tools
 .endif
 
 .if ${MK_RESCUE} != "no"
 # rescue includes programs that have build-tools targets
 _rescue=rescue/rescue
 .endif
 
 .if ${MK_TCSH} != "no"
 _tcsh=bin/csh
 .endif
 .if ${MK_FILE} != "no"
 _libmagic=lib/libmagic
 .endif
 
 .if ${MK_PMC} != "no" && \
     (${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386")
 _jevents=lib/libpmc/pmu-events
 .endif
 
 # kernel-toolchain skips _cleanobj, so handle cleaning up previous
 # build-tools directories if needed.
 .if !defined(NO_CLEAN) && make(kernel-toolchain)
 _bt_clean=	${CLEANDIR}
 .endif
 
 .for _tool in \
     ${_tcsh} \
     bin/sh \
     ${LOCAL_TOOL_DIRS} \
     ${_jevents} \
     lib/ncurses/ncurses \
     lib/ncurses/ncursesw \
     ${_rescue} \
     ${_share} \
     usr.bin/awk \
     ${_libmagic} \
     usr.bin/mkesdb_static \
     usr.bin/mkcsmapper_static \
     usr.bin/vi/catalog \
     ${_gcc_tools}
 build-tools_${_tool}: .PHONY
 	${_+_}@${ECHODIR} "===> ${_tool} (${_bt_clean:D${_bt_clean},}obj,build-tools)"; \
 		cd ${.CURDIR}/${_tool}; \
 		if [ -n "${_bt_clean}" ]; then ${MAKE} DIRPRFX=${_tool}/ ${_bt_clean}; fi; \
 		if [ -z "${NO_OBJWALK}" ]; then ${MAKE} DIRPRFX=${_tool}/ obj; fi; \
 		${MAKE} DIRPRFX=${_tool}/ build-tools
 build-tools: build-tools_${_tool}
 .endfor
 
 #
 # kernel-tools: Build kernel-building tools
 #
 kernel-tools: .PHONY
 	mkdir -p ${WORLDTMP}/usr
 	${WORLDTMP_MTREE} -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${WORLDTMP}/usr >/dev/null
 
 #
 # cross-tools: All the tools needed to build the rest of the system after
 # we get done with the earlier stages. It is the last set of tools needed
 # to begin building the target binaries.
 #
 .if ${TARGET_ARCH} != ${MACHINE_ARCH} || ${BUILD_WITH_STRICT_TMPPATH} != 0
 .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386"
 _btxld=		usr.sbin/btxld
 .endif
 .endif
 
 # Rebuild ctfconvert and ctfmerge to avoid difficult-to-diagnose failures
 # resulting from missing bug fixes or ELF Toolchain updates.
 .if ${MK_CDDL} != "no"
 _dtrace_tools= cddl/lib/libctf cddl/usr.bin/ctfconvert \
     cddl/usr.bin/ctfmerge
 .endif
 
 # If we're given an XAS, don't build binutils.
 .if ${XAS:M/*} == ""
 .if ${MK_BINUTILS_BOOTSTRAP} != "no"
 _binutils=	gnu/usr.bin/binutils
 .endif
 .if ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no"
 _elftctools=	lib/libelftc \
 		lib/libpe \
 		usr.bin/objcopy \
 		usr.bin/nm \
 		usr.bin/size \
 		usr.bin/strings
 # These are not required by the build, but can be useful for developers who
 # cross-build on a FreeBSD 10 host:
 _elftctools+=	usr.bin/addr2line
 .endif
 .elif ${TARGET_ARCH} != ${MACHINE_ARCH} && ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no"
 # If cross-building with an external binutils we still need to build strip for
 # the target (for at least crunchide).
 _elftctools=	lib/libelftc \
 		lib/libpe \
 		usr.bin/objcopy
 .endif
 
 .if ${MK_CLANG_BOOTSTRAP} != "no"
 _clang=		usr.bin/clang
 .endif
 .if ${MK_LLD_BOOTSTRAP} != "no"
 _lld=		usr.bin/clang/lld
 .endif
 .if ${MK_CLANG_BOOTSTRAP} != "no" || ${MK_LLD_BOOTSTRAP} != "no"
 _clang_libs=	lib/clang
 .endif
 .if ${MK_GCC_BOOTSTRAP} != "no"
 _gcc=		gnu/usr.bin/cc
 .endif
 .if ${MK_USB} != "no"
 _usb_tools=	stand/usb/tools
 .endif
 
 .if ${BUILD_WITH_STRICT_TMPPATH} != 0 || defined(BOOTSTRAP_ALL_TOOLS)
 _ar=usr.bin/ar
 .endif
 
 cross-tools: .MAKE .PHONY
 .for _tool in \
     ${LOCAL_XTOOL_DIRS} \
     ${_ar} \
     ${_clang_libs} \
     ${_clang} \
     ${_lld} \
     ${_binutils} \
     ${_elftctools} \
     ${_dtrace_tools} \
     ${_gcc} \
     ${_btxld} \
     ${_usb_tools}
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \
 		cd ${.CURDIR}/${_tool}; \
 		if [ -z "${NO_OBJWALK}" ]; then ${MAKE} DIRPRFX=${_tool}/ obj; fi; \
 		${MAKE} DIRPRFX=${_tool}/ all; \
 		${MAKE} DIRPRFX=${_tool}/ DESTDIR=${WORLDTMP} install
 .endfor
 
 #
 # native-xtools is the current target for qemu-user cross builds of ports
 # via poudriere and the imgact_binmisc kernel module.
 # This target merely builds a toolchan/sysroot, then builds the tools it wants
 # with the options it wants in a special MAKEOBJDIRPREFIX, using the toolchain
 # already built.  It then installs the static tools to NXBDESTDIR for Poudriere
 # to pickup.
 #
 NXBOBJROOT=	${OBJROOT}${MACHINE}.${MACHINE_ARCH}/nxb/
 NXBOBJTOP=	${NXBOBJROOT}${NXB_TARGET}.${NXB_TARGET_ARCH}
 NXTP?=		/nxb-bin
 .if ${NXTP:N/*}
 .error NXTP variable should be an absolute path
 .endif
 NXBDESTDIR?=	${DESTDIR}${NXTP}
 
 # This is the list of tools to be built/installed as static and where
 # appropriate to build for the given TARGET.TARGET_ARCH.
 NXBDIRS+= \
     bin/cat \
     bin/chmod \
     bin/cp \
     ${_tcsh} \
     bin/echo \
     bin/expr \
     bin/hostname \
     bin/ln \
     bin/ls \
     bin/mkdir \
     bin/mv \
     bin/ps \
     bin/realpath \
     bin/rm \
     bin/rmdir \
     bin/sh \
     bin/sleep \
     sbin/md5 \
     sbin/sysctl \
     usr.bin/addr2line \
     usr.bin/ar \
     usr.bin/awk \
     usr.bin/basename \
     usr.bin/bmake \
     usr.bin/bzip2 \
     usr.bin/cmp \
     usr.bin/diff \
     usr.bin/dirname \
     usr.bin/objcopy \
     usr.bin/env \
     usr.bin/fetch \
     usr.bin/find \
     usr.bin/grep \
     usr.bin/gzip \
     usr.bin/head \
     usr.bin/id \
     usr.bin/lex \
     usr.bin/limits \
     usr.bin/lorder \
     usr.bin/mandoc \
     usr.bin/mktemp \
     usr.bin/mt \
     usr.bin/nm \
     usr.bin/patch \
     usr.bin/readelf \
     usr.bin/sed \
     usr.bin/size \
     usr.bin/sort \
     usr.bin/strings \
     usr.bin/tar \
     usr.bin/touch \
     usr.bin/tr \
     usr.bin/true \
     usr.bin/uniq \
     usr.bin/unzip \
     usr.bin/wc \
     usr.bin/xargs \
     usr.bin/xinstall \
     usr.bin/xz \
     usr.bin/yacc \
     usr.sbin/chown
 
 SUBDIR_DEPEND_usr.bin/clang=	lib/clang
 .if ${MK_CLANG} != "no"
 NXBDIRS+=	lib/clang
 NXBDIRS+=	usr.bin/clang
 .endif
 .if ${MK_GCC} != "no"
 NXBDIRS+=	gnu/usr.bin/cc
 .endif
 .if ${MK_BINUTILS} != "no"
 NXBDIRS+=	gnu/usr.bin/binutils
 .endif
 # XXX: native-xtools passes along ${NXBDIRS} in SUBDIR_OVERRIDE that needs
 # to be evaluated after NXBDIRS is set.
 .if make(install) && !empty(SUBDIR_OVERRIDE)
 SUBDIR=	${SUBDIR_OVERRIDE}
 .endif
 
 NXBMAKEARGS+= \
 	OBJTOP=${NXBOBJTOP:Q} \
 	OBJROOT=${NXBOBJROOT:Q} \
 	MAKEOBJDIRPREFIX= \
 	-DNO_SHARED \
 	-DNO_CPU_CFLAGS \
 	-DNO_PIC \
 	SSP_CFLAGS= \
 	MK_CASPER=no \
 	MK_CLANG_EXTRAS=no \
 	MK_CLANG_FULL=no \
 	MK_CTF=no \
 	MK_DEBUG_FILES=no \
 	MK_GDB=no \
 	MK_HTML=no \
 	MK_LLDB=no \
 	MK_MAN=no \
 	MK_MAN_UTILS=yes \
 	MK_OFED=no \
 	MK_OPENSSH=no \
 	MK_PROFILE=no \
 	MK_RETPOLINE=no \
 	MK_SENDMAIL=no \
 	MK_SVNLITE=no \
 	MK_TESTS=no \
 	MK_WARNS=no \
 	MK_ZFS=no
 
 .if make(native-xtools*) && \
     (!defined(NXB_TARGET) || !defined(NXB_TARGET_ARCH))
 .error Missing NXB_TARGET / NXB_TARGET_ARCH
 .endif
 # For 'toolchain' we want to produce native binaries that themselves generate
 # native binaries.
 NXBTMAKE=	${NXBMAKEENV} ${MAKE} ${NXBMAKEARGS:N-DNO_PIC:N-DNO_SHARED} \
 		TARGET=${MACHINE} TARGET_ARCH=${MACHINE_ARCH}
 # For 'everything' we want to produce native binaries (hence -target to
 # be MACHINE) that themselves generate TARGET.TARGET_ARCH binaries.
 # TARGET/TARGET_ARCH are still passed along from user.
 #
 # Use the toolchain we create as an external toolchain.
 .if ${USING_SYSTEM_COMPILER} == "yes" || ${XCC:N${CCACHE_BIN}:M/*}
 NXBMAKE+=	XCC="${XCC}" \
 		XCXX="${XCXX}" \
 		XCPP="${XCPP}"
 .else
 NXBMAKE+=	XCC="${NXBOBJTOP}/tmp/usr/bin/cc" \
 		XCXX="${NXBOBJTOP}/tmp/usr/bin/c++" \
 		XCPP="${NXBOBJTOP}/tmp/usr/bin/cpp"
 .endif
 NXBMAKE+=	${NXBMAKEENV} ${MAKE} -f Makefile.inc1 ${NXBMAKEARGS} \
 		TARGET=${NXB_TARGET} TARGET_ARCH=${NXB_TARGET_ARCH} \
 		TARGET_TRIPLE=${MACHINE_TRIPLE:Q}
 # NXBDIRS is improperly based on MACHINE rather than NXB_TARGET.  Need to
 # invoke a sub-make to reevaluate MK_GCC, etc, for NXBDIRS.
 NXBMAKE+=	SUBDIR_OVERRIDE='$${NXBDIRS:M*}'
 # Need to avoid the -isystem logic when using clang as an external toolchain
 # even if the TARGET being built for wants GCC.
 NXBMAKE+=	WANT_COMPILER_TYPE='$${X_COMPILER_TYPE}'
 native-xtools: .PHONY
 	${_+_}cd ${.CURDIR}; ${NXBTMAKE} _cleanobj MK_GCC=yes
 	# Build the bootstrap/host/cross tools that produce native binaries
 	# Pass along MK_GCC=yes to ensure GCC-needed build tools are built.
 	# We don't quite know what the NXB_TARGET wants so just build it.
 	${_+_}cd ${.CURDIR}; ${NXBTMAKE} kernel-toolchain MK_GCC=yes
 	# Populate includes/libraries sysroot that produce native binaries.
 	# This is split out from 'toolchain' above mostly so that target LLVM
 	# libraries have a proper LLVM_DEFAULT_TARGET_TRIPLE without
 	# polluting the cross-compiler build.  The LLVM/GCC libs are skipped
 	# here to avoid the problem but are kept in 'toolchain' so that
 	# needed build tools are built.
 	${_+_}cd ${.CURDIR}; ${NXBTMAKE} _includes MK_CLANG=no MK_GCC=no
 	${_+_}cd ${.CURDIR}; ${NXBTMAKE} _libraries MK_CLANG=no MK_GCC=no
 	# Clean out improper TARGET=MACHINE files
 	${_+_}cd ${.CURDIR}/gnu/usr.bin/cc/cc_tools; ${NXBTMAKE} cleandir
 .if !defined(NO_OBJWALK)
 	${_+_}cd ${.CURDIR}; ${NXBMAKE} _obj
 .endif
 	${_+_}cd ${.CURDIR}; ${NXBMAKE} everything
 	@echo ">> native-xtools done.  Use 'make native-xtools-install' to install to a given DESTDIR"
 
 native-xtools-install: .PHONY
 	mkdir -p ${NXBDESTDIR}/bin ${NXBDESTDIR}/sbin ${NXBDESTDIR}/usr
 	${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${NXBDESTDIR}/usr >/dev/null
 	${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${NXBDESTDIR}/usr/include >/dev/null
 	${_+_}cd ${.CURDIR}; ${NXBMAKE} \
 	    DESTDIR=${NXBDESTDIR} \
 	    -DNO_ROOT \
 	    install
 
 #
 # hierarchy - ensure that all the needed directories are present
 #
 hierarchy hier: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}/etc; ${HMAKE} distrib-dirs
 
 #
 # libraries - build all libraries, and install them under ${DESTDIR}.
 #
 # The list of libraries with dependents (${_prebuild_libs}) and their
 # interdependencies (__L) are built automatically by the
 # ${.CURDIR}/tools/make_libdeps.sh script.
 #
 libraries: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}; \
 	    ${MAKE} -f Makefile.inc1 _prereq_libs; \
 	    ${MAKE} -f Makefile.inc1 _startup_libs; \
 	    ${MAKE} -f Makefile.inc1 _prebuild_libs; \
 	    ${MAKE} -f Makefile.inc1 _generic_libs
 
 #
 # static libgcc.a prerequisite for shared libc
 #
 _prereq_libs= lib/libcompiler_rt
 .if ${MK_SSP} != "no"
 _prereq_libs+= gnu/lib/libssp/libssp_nonshared
 .endif
 
 # These dependencies are not automatically generated:
 #
 # gnu/lib/csu, gnu/lib/libgcc, lib/csu and lib/libc must be built before
 # all shared libraries for ELF.
 #
 _startup_libs=	lib/csu
 .if ${MK_BSD_CRTBEGIN} == "no"
 _startup_libs+=	gnu/lib/csu
 .endif
 _startup_libs+=	lib/libcompiler_rt
 _startup_libs+=	lib/libc
 _startup_libs+=	lib/libc_nonshared
 .if ${MK_LIBCPLUSPLUS} != "no"
 _startup_libs+=	lib/libcxxrt
 .endif
 
 .if ${MK_LLVM_LIBUNWIND} != "no"
 _prereq_libs+=	lib/libgcc_eh lib/libgcc_s
 _startup_libs+=	lib/libgcc_eh lib/libgcc_s
 
 lib/libgcc_s__L: lib/libc__L
 lib/libgcc_s__L: lib/libc_nonshared__L
 .if ${MK_LIBCPLUSPLUS} != "no"
 lib/libcxxrt__L: lib/libgcc_s__L
 .endif
 
 .else # MK_LLVM_LIBUNWIND == no
 
 _prereq_libs+=	gnu/lib/libgcc
 _startup_libs+=	gnu/lib/libgcc
 
 gnu/lib/libgcc__L: lib/libc__L
 gnu/lib/libgcc__L: lib/libc_nonshared__L
 .if ${MK_LIBCPLUSPLUS} != "no"
 lib/libcxxrt__L: gnu/lib/libgcc__L
 .endif
 .endif
 
 _prebuild_libs=	${_kerberos5_lib_libasn1} \
 		${_kerberos5_lib_libhdb} \
 		${_kerberos5_lib_libheimbase} \
 		${_kerberos5_lib_libheimntlm} \
 		${_libsqlite3} \
 		${_kerberos5_lib_libheimipcc} \
 		${_kerberos5_lib_libhx509} ${_kerberos5_lib_libkrb5} \
 		${_kerberos5_lib_libroken} \
 		${_kerberos5_lib_libwind} \
 		lib/libbz2 ${_libcom_err} lib/libcrypt \
 		lib/libelf lib/libexpat \
 		lib/libfigpar \
 		${_lib_libgssapi} \
 		lib/libkiconv lib/libkvm lib/liblzma lib/libmd lib/libnv \
+		lib/libzstd \
 		${_lib_casper} \
 		lib/ncurses/ncurses lib/ncurses/ncursesw \
 		lib/libopie lib/libpam/libpam ${_lib_libthr} \
 		${_lib_libradius} lib/libsbuf lib/libtacplus \
 		lib/libgeom \
 		${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \
 		${_cddl_lib_libuutil} \
 		${_cddl_lib_libavl} \
 		${_cddl_lib_libzfs_core} ${_cddl_lib_libzfs} \
 		${_cddl_lib_libctf} \
 		lib/libufs \
 		lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \
 		${_secure_lib_libcrypto} ${_secure_lib_libssl} \
 		${_lib_libldns} ${_secure_lib_libssh}
 
 .if ${MK_GNUCXX} != "no"
 _prebuild_libs+= gnu/lib/libstdc++ gnu/lib/libsupc++
 gnu/lib/libstdc++__L: lib/msun__L
 gnu/lib/libsupc++__L: gnu/lib/libstdc++__L
 .endif
 
 .if ${MK_DIALOG} != "no"
 _prebuild_libs+= gnu/lib/libdialog
 gnu/lib/libdialog__L: lib/msun__L lib/ncurses/ncursesw__L
 .endif
 
 .if ${MK_LIBCPLUSPLUS} != "no"
 _prebuild_libs+= lib/libc++
 .endif
 
 lib/libgeom__L: lib/libexpat__L
 lib/libkvm__L: lib/libelf__L
 
 .if ${MK_LIBTHR} != "no"
 _lib_libthr=	lib/libthr
 .endif
 
 .if ${MK_RADIUS_SUPPORT} != "no"
 _lib_libradius=	lib/libradius
 .endif
 
 .if ${MK_OFED} != "no"
 _prebuild_libs+= \
 	lib/ofed/libibverbs \
 	lib/ofed/libibmad \
 	lib/ofed/libibumad \
 	lib/ofed/complib \
 	lib/ofed/libmlx5
 
 lib/ofed/libibmad__L:	lib/ofed/libibumad__L
 lib/ofed/complib__L:	lib/libthr__L
 lib/ofed/libmlx5__L:	lib/ofed/libibverbs__L lib/libthr__L
 .endif
 
 .if ${MK_CASPER} != "no"
 _lib_casper=	lib/libcasper
 .endif
 
 lib/libpjdlog__L: lib/libutil__L
 lib/libcasper__L: lib/libnv__L
 lib/liblzma__L: lib/libthr__L
 
 _generic_libs=	${_cddl_lib} gnu/lib ${_kerberos5_lib} lib ${_secure_lib} usr.bin/lex/lib
 .if ${MK_IPFILTER} != "no"
 _generic_libs+=	sbin/ipf/libipf
 .endif
 .for _DIR in ${LOCAL_LIB_DIRS}
 .if ${_DIR} == ".WAIT"  || (empty(_generic_libs:M${_DIR}) && exists(${.CURDIR}/${_DIR}/Makefile))
 _generic_libs+= ${_DIR}
 .endif
 .endfor
 
 lib/libopie__L lib/libtacplus__L: lib/libmd__L
 
 .if ${MK_CDDL} != "no"
 _cddl_lib_libumem= cddl/lib/libumem
 _cddl_lib_libnvpair= cddl/lib/libnvpair
 _cddl_lib_libavl= cddl/lib/libavl
 _cddl_lib_libuutil= cddl/lib/libuutil
 .if ${MK_ZFS} != "no"
 _cddl_lib_libzfs_core= cddl/lib/libzfs_core
 _cddl_lib_libzfs= cddl/lib/libzfs
 
 cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L
 
 cddl/lib/libzfs__L: cddl/lib/libzfs_core__L lib/msun__L lib/libutil__L
 cddl/lib/libzfs__L: lib/libthr__L lib/libmd__L lib/libz__L cddl/lib/libumem__L
 cddl/lib/libzfs__L: cddl/lib/libuutil__L cddl/lib/libavl__L lib/libgeom__L
 
 lib/libbe__L: cddl/lib/libzfs__L
 .endif
 _cddl_lib_libctf= cddl/lib/libctf
 _cddl_lib= cddl/lib
 cddl/lib/libctf__L: lib/libz__L
 .endif
 # cddl/lib/libdtrace requires lib/libproc and lib/librtld_db; it's only built
 # on select architectures though (see cddl/lib/Makefile)
 .if ${MACHINE_CPUARCH} != "sparc64"
 _prebuild_libs+=	lib/libprocstat lib/libproc lib/librtld_db
 lib/libprocstat__L: lib/libelf__L lib/libkvm__L lib/libutil__L
 lib/libproc__L: lib/libprocstat__L
 lib/librtld_db__L: lib/libprocstat__L
 .endif
 
 .if ${MK_CRYPT} != "no"
 .if ${MK_OPENSSL} != "no"
 _secure_lib_libcrypto= secure/lib/libcrypto
 _secure_lib_libssl= secure/lib/libssl
 lib/libradius__L secure/lib/libssl__L: secure/lib/libcrypto__L
 secure/lib/libcrypto__L: lib/libthr__L
 .if ${MK_LDNS} != "no"
 _lib_libldns= lib/libldns
 lib/libldns__L: secure/lib/libssl__L
 .endif
 .if ${MK_OPENSSH} != "no"
 _secure_lib_libssh= secure/lib/libssh
 secure/lib/libssh__L: lib/libz__L secure/lib/libcrypto__L lib/libcrypt__L
 .if ${MK_LDNS} != "no"
 secure/lib/libssh__L: lib/libldns__L
 .endif
 .if ${MK_GSSAPI} != "no" && ${MK_KERBEROS_SUPPORT} != "no"
 secure/lib/libssh__L: lib/libgssapi__L kerberos5/lib/libkrb5__L \
     kerberos5/lib/libhx509__L kerberos5/lib/libasn1__L lib/libcom_err__L \
     lib/libmd__L kerberos5/lib/libroken__L
 .endif
 .endif
 .endif
 _secure_lib=	secure/lib
 .endif
 
 .if ${MK_KERBEROS} != "no"
 kerberos5/lib/libasn1__L: lib/libcom_err__L kerberos5/lib/libroken__L
 kerberos5/lib/libhdb__L: kerberos5/lib/libasn1__L lib/libcom_err__L \
     kerberos5/lib/libkrb5__L kerberos5/lib/libroken__L \
     kerberos5/lib/libwind__L lib/libsqlite3__L
 kerberos5/lib/libheimntlm__L: secure/lib/libcrypto__L kerberos5/lib/libkrb5__L \
     kerberos5/lib/libroken__L lib/libcom_err__L
 kerberos5/lib/libhx509__L: kerberos5/lib/libasn1__L lib/libcom_err__L \
     secure/lib/libcrypto__L kerberos5/lib/libroken__L kerberos5/lib/libwind__L
 kerberos5/lib/libkrb5__L: kerberos5/lib/libasn1__L lib/libcom_err__L \
     lib/libcrypt__L secure/lib/libcrypto__L kerberos5/lib/libhx509__L \
     kerberos5/lib/libroken__L kerberos5/lib/libwind__L \
     kerberos5/lib/libheimbase__L kerberos5/lib/libheimipcc__L
 kerberos5/lib/libroken__L: lib/libcrypt__L
 kerberos5/lib/libwind__L: kerberos5/lib/libroken__L lib/libcom_err__L
 kerberos5/lib/libheimbase__L: lib/libthr__L
 kerberos5/lib/libheimipcc__L: kerberos5/lib/libroken__L kerberos5/lib/libheimbase__L lib/libthr__L
 .endif
 
 lib/libsqlite3__L: lib/libthr__L
 
 .if ${MK_GSSAPI} != "no"
 _lib_libgssapi=	lib/libgssapi
 .endif
 
 .if ${MK_KERBEROS} != "no"
 _kerberos5_lib=	kerberos5/lib
 _kerberos5_lib_libasn1= kerberos5/lib/libasn1
 _kerberos5_lib_libhdb= kerberos5/lib/libhdb
 _kerberos5_lib_libheimbase= kerberos5/lib/libheimbase
 _kerberos5_lib_libkrb5= kerberos5/lib/libkrb5
 _kerberos5_lib_libhx509= kerberos5/lib/libhx509
 _kerberos5_lib_libroken= kerberos5/lib/libroken
 _kerberos5_lib_libheimntlm= kerberos5/lib/libheimntlm
 _libsqlite3= lib/libsqlite3
 _kerberos5_lib_libheimipcc= kerberos5/lib/libheimipcc
 _kerberos5_lib_libwind= kerberos5/lib/libwind
 _libcom_err= lib/libcom_err
 .endif
 
 .if ${MK_NIS} != "no"
 _lib_libypclnt=	lib/libypclnt
 .endif
 
 .if ${MK_OPENSSL} == "no"
 lib/libradius__L: lib/libmd__L
 .endif
 
 lib/libproc__L: \
     ${_cddl_lib_libctf:D${_cddl_lib_libctf}__L} lib/libelf__L lib/librtld_db__L lib/libutil__L
 .if ${MK_CXX} != "no"
 .if ${MK_LIBCPLUSPLUS} != "no"
 lib/libproc__L: lib/libcxxrt__L
 .else # This implies MK_GNUCXX != "no"; see lib/libproc
 lib/libproc__L: gnu/lib/libsupc++__L
 .endif
 .endif
 
 .for _lib in ${_prereq_libs}
 ${_lib}__PL: .PHONY .MAKE
 .if !defined(_MKSHOWCONFIG) && exists(${.CURDIR}/${_lib})
 	${_+_}@${ECHODIR} "===> ${_lib} (obj,all,install)"; \
 		cd ${.CURDIR}/${_lib}; \
 		if [ -z "${NO_OBJWALK}" ]; then ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; fi; \
 		${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \
 		    DIRPRFX=${_lib}/ all; \
 		${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \
 		    DIRPRFX=${_lib}/ install
 .endif
 .endfor
 
 .for _lib in ${_startup_libs} ${_prebuild_libs} ${_generic_libs}
 ${_lib}__L: .PHONY .MAKE
 .if !defined(_MKSHOWCONFIG) && exists(${.CURDIR}/${_lib})
 	${_+_}@${ECHODIR} "===> ${_lib} (obj,all,install)"; \
 		cd ${.CURDIR}/${_lib}; \
 		if [ -z "${NO_OBJWALK}" ]; then ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; fi; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ all; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ install
 .endif
 .endfor
 
 _prereq_libs: ${_prereq_libs:S/$/__PL/}
 _startup_libs: ${_startup_libs:S/$/__L/}
 _prebuild_libs: ${_prebuild_libs:S/$/__L/}
 _generic_libs: ${_generic_libs:S/$/__L/}
 
 # Enable SUBDIR_PARALLEL when not calling 'make all', unless called from
 # 'everything' with _PARALLEL_SUBDIR_OK set.  This is because it is unlikely
 # that running 'make all' from the top-level, especially with a SUBDIR_OVERRIDE
 # or LOCAL_DIRS set, will have a reliable build if SUBDIRs are built in
 # parallel.  This is safe for the world stage of buildworld though since it has
 # already built libraries in a proper order and installed includes into
 # WORLDTMP. Special handling is done for SUBDIR ordering for 'install*' to
 # avoid trashing a system if it crashes mid-install.
 .if !make(all) || defined(_PARALLEL_SUBDIR_OK)
 SUBDIR_PARALLEL=
 .endif
 
 .include <bsd.subdir.mk>
 
 .if make(check-old) || make(check-old-dirs) || \
     make(check-old-files) || make(check-old-libs) || \
     make(delete-old) || make(delete-old-dirs) || \
     make(delete-old-files) || make(delete-old-libs)
 
 #
 # check for / delete old files section
 #
 
 .include "ObsoleteFiles.inc"
 
 OLD_LIBS_MESSAGE="Please be sure no application still uses those libraries, \
 else you can not start such an application. Consult UPDATING for more \
 information regarding how to cope with the removal/revision bump of a \
 specific library."
 
 .if !defined(BATCH_DELETE_OLD_FILES)
 RM_I=-i
 .else
 RM_I=-v
 .endif
 
 delete-old-files: .PHONY
 	@echo ">>> Removing old files (only deletes safe to delete libs)"
 # Ask for every old file if the user really wants to remove it.
 # It's annoying, but better safe than sorry.
 # NB: We cannot pass the list of OLD_FILES as a parameter because the
 # argument list will get too long. Using .for/.endfor make "loops" will make
 # the Makefile parser segfault.
 	@exec 3<&0; \
 	cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | sort | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 			chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \
 			rm ${RM_I} "${DESTDIR}/$${file}" <&3; \
 		fi; \
 		for ext in debug symbols; do \
 		  if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \
 		      "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \
 			      <&3; \
 		  fi; \
 		done; \
 	done
 # Remove catpages without corresponding manpages.
 	@exec 3<&0; \
 	find ${DESTDIR}/usr/share/man/cat* ! -type d 2>/dev/null | sort | \
 	sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \
 	while read catpage; do \
 		read manpage; \
 		if [ ! -e "$${manpage}" ]; then \
 			rm ${RM_I} $${catpage} <&3; \
 	        fi; \
 	done
 	@echo ">>> Old files removed"
 
 check-old-files: .PHONY
 	@echo ">>> Checking for old files"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 		 	echo "${DESTDIR}/$${file}"; \
 		fi; \
 		for ext in debug symbols; do \
 		  if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \
 		  fi; \
 		done; \
 	done | sort
 # Check for catpages without corresponding manpages.
 	@find ${DESTDIR}/usr/share/man/cat* ! -type d 2>/dev/null | \
 	sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \
 	while read catpage; do \
 		read manpage; \
 		if [ ! -e "$${manpage}" ]; then \
 			echo $${catpage}; \
 	        fi; \
 	done | sort
 
 delete-old-libs: .PHONY
 	@echo ">>> Removing old libraries"
 	@echo "${OLD_LIBS_MESSAGE}" | fmt
 	@exec 3<&0; \
 	cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_LIBS | xargs -n1 | sort | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 			chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \
 			rm ${RM_I} "${DESTDIR}/$${file}" <&3; \
 		fi; \
 		for ext in debug symbols; do \
 		  if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \
 		      "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \
 			      <&3; \
 		  fi; \
 		done; \
 	done
 	@echo ">>> Old libraries removed"
 
 check-old-libs: .PHONY
 	@echo ">>> Checking for old libraries"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_LIBS | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 			echo "${DESTDIR}/$${file}"; \
 		fi; \
 		for ext in debug symbols; do \
 		  if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \
 		  fi; \
 		done; \
 	done | sort
 
 delete-old-dirs: .PHONY
 	@echo ">>> Removing old directories"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_DIRS | xargs -n1 | sort -r | \
 	while read dir; do \
 		if [ -d "${DESTDIR}/$${dir}" ]; then \
 			rmdir -v "${DESTDIR}/$${dir}" || true; \
 		elif [ -L "${DESTDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \
 		fi; \
 		if [ -d "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \
 			rmdir -v "${DESTDIR}${DEBUGDIR}/$${dir}" || true; \
 		elif [ -L "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}${DEBUGDIR}/$${dir} is a link, please remove everything manually."; \
 		fi; \
 	done
 	@echo ">>> Old directories removed"
 
 check-old-dirs: .PHONY
 	@echo ">>> Checking for old directories"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_DIRS | xargs -n1 | sort -r | \
 	while read dir; do \
 		if [ -d "${DESTDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}/$${dir}"; \
 		elif [ -L "${DESTDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \
 		fi; \
 		if [ -d "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}${DEBUGDIR}/$${dir}"; \
 		elif [ -L "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}${DEBUGDIR}/$${dir} is a link, please remove everything manually."; \
 		fi; \
 	done
 
 delete-old: delete-old-files delete-old-dirs .PHONY
 	@echo "To remove old libraries run '${MAKE_CMD} delete-old-libs'."
 
 check-old: check-old-files check-old-libs check-old-dirs .PHONY
 	@echo "To remove old files and directories run '${MAKE_CMD} delete-old'."
 	@echo "To remove old libraries run '${MAKE_CMD} delete-old-libs'."
 
 .endif
 
 #
 # showconfig - show build configuration.
 #
 showconfig: .PHONY
 	@(${MAKE} -n -f ${.CURDIR}/sys/conf/kern.opts.mk -V dummy -dg1 UPDATE_DEPENDFILE=no NO_OBJ=yes; \
 	  ${MAKE} -n -f ${.CURDIR}/share/mk/src.opts.mk -V dummy -dg1 UPDATE_DEPENDFILE=no NO_OBJ=yes) 2>&1 | grep ^MK_ | sort -u
 
 .if !empty(KRNLOBJDIR) && !empty(KERNCONF)
 DTBOUTPUTPATH= ${KRNLOBJDIR}/${KERNCONF}/
 
 .if !defined(FDT_DTS_FILE) || empty(FDT_DTS_FILE)
 .if !defined(_MKSHOWCONFIG) && exists(${KERNCONFDIR}/${KERNCONF})
 FDT_DTS_FILE!= awk 'BEGIN {FS="="} /^makeoptions[[:space:]]+FDT_DTS_FILE/ {print $$2}' \
 	'${KERNCONFDIR}/${KERNCONF}' ; echo
 .endif
 .endif
 
 .endif
 
 .if !defined(DTBOUTPUTPATH) || !exists(${DTBOUTPUTPATH})
 DTBOUTPUTPATH= ${.CURDIR}
 .endif
 
 #
 # Build 'standalone' Device Tree Blob
 #
 builddtb: .PHONY
 	@PATH=${TMPPATH} MACHINE=${TARGET} \
 	${.CURDIR}/sys/tools/fdt/make_dtb.sh ${.CURDIR}/sys \
 	    "${FDT_DTS_FILE}" ${DTBOUTPUTPATH}
 
 ###############
 
 # cleanworld
 # In the following, the first 'rm' in a series will usually remove all
 # files and directories.  If it does not, then there are probably some
 # files with file flags set, so this unsets them and tries the 'rm' a
 # second time.  There are situations where this target will be cleaning
 # some directories via more than one method, but that duplication is
 # needed to correctly handle all the possible situations.  Removing all
 # files without file flags set in the first 'rm' instance saves time,
 # because 'chflags' will need to operate on fewer files afterwards.
 #
 # It is expected that BW_CANONICALOBJDIR == the CANONICALOBJDIR as would be
 # created by bsd.obj.mk, except that we don't want to .include that file
 # in this makefile.  We don't do a cleandir walk if MK_AUTO_OBJ is yes
 # since it is not possible for files to land in the wrong place.
 #
 .if make(cleanworld)
 BW_CANONICALOBJDIR:=${OBJTOP}/
 .elif make(cleanuniverse)
 BW_CANONICALOBJDIR:=${OBJROOT}
 .if ${MK_UNIFIED_OBJDIR} == "no"
 .error ${.TARGETS} only supported with WITH_UNIFIED_OBJDIR enabled.
 .endif
 .endif
 cleanworld cleanuniverse: .PHONY
 .if !empty(BW_CANONICALOBJDIR) && exists(${BW_CANONICALOBJDIR}) && \
     ${.CURDIR:tA} != ${BW_CANONICALOBJDIR:tA}
 	-rm -rf ${BW_CANONICALOBJDIR}*
 	-chflags -R 0 ${BW_CANONICALOBJDIR}
 	rm -rf ${BW_CANONICALOBJDIR}*
 .endif
 .if make(cleanworld) && ${MK_AUTO_OBJ} == "no" && \
     (empty(BW_CANONICALOBJDIR) || ${.CURDIR:tA} == ${BW_CANONICALOBJDIR:tA})
 .if ${.CURDIR} == ${.OBJDIR} || ${.CURDIR}/obj == ${.OBJDIR}
 	#   To be safe in this case, fall back to a 'make cleandir'
 	${_+_}@cd ${.CURDIR}; ${MAKE} cleandir
 .endif
 .endif
 
 .if ${TARGET} == ${MACHINE} && ${TARGET_ARCH} == ${MACHINE_ARCH}
 XDEV_CPUTYPE?=${CPUTYPE}
 .else
 XDEV_CPUTYPE?=${TARGET_CPUTYPE}
 .endif
 
 NOFUN=-DNO_FSCHG MK_HTML=no -DNO_LINT \
 	MK_MAN=no MK_NLS=no MK_PROFILE=no \
 	MK_KERBEROS=no MK_RESCUE=no MK_TESTS=no MK_WARNS=no \
 	TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \
 	CPUTYPE=${XDEV_CPUTYPE}
 
 XDDIR=${TARGET_ARCH}-freebsd
 XDTP?=/usr/${XDDIR}
 .if ${XDTP:N/*}
 .error XDTP variable should be an absolute path
 .endif
 
 CDBOBJROOT=	${OBJROOT}${MACHINE}.${MACHINE_ARCH}/xdev/
 CDBOBJTOP=	${CDBOBJROOT}${XDDIR}
 CDBENV= \
 	INSTALL="sh ${.CURDIR}/tools/install.sh"
 CDENV= ${CDBENV} \
 	TOOLS_PREFIX=${XDTP}
 CDMAKEARGS= \
 	OBJTOP=${CDBOBJTOP:Q} \
 	OBJROOT=${CDBOBJROOT:Q}
 CD2MAKEARGS= ${CDMAKEARGS}
 
 .if ${WANT_COMPILER_TYPE} == gcc || \
     (defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc)
 # GCC requires -isystem and -L when using a cross-compiler.  --sysroot
 # won't set header path and -L is used to ensure the base library path
 # is added before the port PREFIX library path.
 CD2CFLAGS+=	-isystem ${XDDESTDIR}/usr/include -L${XDDESTDIR}/usr/lib
 # GCC requires -B to find /usr/lib/crti.o when using a cross-compiler
 # combined with --sysroot.
 CD2CFLAGS+=	-B${XDDESTDIR}/usr/lib
 # Force using libc++ for external GCC.
 .if defined(X_COMPILER_TYPE) && \
     ${X_COMPILER_TYPE} == gcc && ${X_COMPILER_VERSION} >= 40800
 CD2CXXFLAGS+=	-isystem ${XDDESTDIR}/usr/include/c++/v1 -std=c++11 \
 		-nostdinc++
 .endif
 .endif
 CD2CFLAGS+=	--sysroot=${XDDESTDIR}/
 CD2ENV=${CDENV} CC="${CC} ${CD2CFLAGS}" CXX="${CXX} ${CD2CXXFLAGS} ${CD2CFLAGS}" \
 	CPP="${CPP} ${CD2CFLAGS}" \
 	MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH}
 
 CDTMP=	${OBJTOP}/${XDDIR}/tmp
 CDMAKE=${CDENV} PATH=${CDTMP}/usr/bin:${PATH} ${MAKE} ${CDMAKEARGS} ${NOFUN}
 CD2MAKE=${CD2ENV} PATH=${CDTMP}/usr/bin:${XDDESTDIR}/usr/bin:${PATH} \
 	${MAKE} ${CD2MAKEARGS} ${NOFUN}
 .if ${MK_META_MODE} != "no"
 # Don't rebuild build-tools targets during normal build.
 CD2MAKE+=	BUILD_TOOLS_META=.NOMETA
 .endif
 XDDESTDIR=${DESTDIR}${XDTP}
 
 .ORDER: xdev-build xdev-install xdev-links
 xdev: xdev-build xdev-install .PHONY
 
 .ORDER: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools
 xdev-build: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools .PHONY
 
 _xb-worldtmp: .PHONY
 	mkdir -p ${CDTMP}/usr
 	${WORLDTMP_MTREE} -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${CDTMP}/usr >/dev/null
 
 _xb-bootstrap-tools: .PHONY
 .for _tool in \
     ${_clang_tblgen} \
     ${_gperf} \
     ${_yacc}
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \
 	cd ${.CURDIR}/${_tool}; \
 	if [ -z "${NO_OBJWALK}" ]; then ${CDMAKE} DIRPRFX=${_tool}/ obj; fi; \
 	${CDMAKE} DIRPRFX=${_tool}/ all; \
 	${CDMAKE} DIRPRFX=${_tool}/ DESTDIR=${CDTMP} install
 .endfor
 
 _xb-build-tools: .PHONY
 	${_+_}@cd ${.CURDIR}; \
 	${CDBENV} ${MAKE} ${CDMAKEARGS} -f Makefile.inc1 ${NOFUN} build-tools
 
 XDEVDIRS= \
     ${_clang_libs} \
     ${_lld} \
     ${_binutils} \
     ${_elftctools} \
     usr.bin/ar \
     ${_clang} \
     ${_gcc}
 
 _xb-cross-tools: .PHONY
 .for _tool in ${XDEVDIRS}
 	${_+_}@${ECHODIR} "===> xdev ${_tool} (obj,all)"; \
 	cd ${.CURDIR}/${_tool}; \
 	if [ -z "${NO_OBJWALK}" ]; then ${CDMAKE} DIRPRFX=${_tool}/ obj; fi; \
 	${CDMAKE} DIRPRFX=${_tool}/ all
 .endfor
 
 _xi-mtree: .PHONY
 	${_+_}@${ECHODIR} "mtree populating ${XDDESTDIR}"
 	mkdir -p ${XDDESTDIR}
 	${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.root.dist \
 	    -p ${XDDESTDIR} >/dev/null
 	${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${XDDESTDIR}/usr >/dev/null
 	${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${XDDESTDIR}/usr/include >/dev/null
 .if defined(LIBCOMPAT)
 	${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \
 	    -p ${XDDESTDIR}/usr >/dev/null
 .endif
 .if ${MK_TESTS} != "no"
 	mkdir -p ${XDDESTDIR}${TESTSBASE}
 	${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${XDDESTDIR}${TESTSBASE} >/dev/null
 .endif
 
 .ORDER: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries
 xdev-install: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries .PHONY
 
 _xi-cross-tools: .PHONY
 	@echo "_xi-cross-tools"
 .for _tool in ${XDEVDIRS}
 	${_+_}@${ECHODIR} "===> xdev ${_tool} (install)"; \
 	cd ${.CURDIR}/${_tool}; \
 	${CDMAKE} DIRPRFX=${_tool}/ install DESTDIR=${XDDESTDIR}
 .endfor
 
 _xi-includes: .PHONY
 .if !defined(NO_OBJWALK)
 	${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 _obj \
 		DESTDIR=${XDDESTDIR}
 .endif
 	${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 includes \
 		DESTDIR=${XDDESTDIR}
 
 _xi-libraries: .PHONY
 	${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 libraries \
 		DESTDIR=${XDDESTDIR}
 
 xdev-links: .PHONY
 	${_+_}cd ${XDDESTDIR}/usr/bin; \
 	mkdir -p ../../../../usr/bin; \
 		for i in *; do \
 			ln -sf ../../${XDTP}/usr/bin/$$i \
 			    ../../../../usr/bin/${XDDIR}-$$i; \
 			ln -sf ../../${XDTP}/usr/bin/$$i \
 			    ../../../../usr/bin/${XDDIR}${_REVISION}-$$i; \
 		done
Index: projects/clang900-import/cddl/contrib/opensolaris/cmd/zfs/zfs.8
===================================================================
--- projects/clang900-import/cddl/contrib/opensolaris/cmd/zfs/zfs.8	(revision 352536)
+++ projects/clang900-import/cddl/contrib/opensolaris/cmd/zfs/zfs.8	(revision 352537)
@@ -1,3918 +1,3937 @@
 '\" te
 .\" Copyright (c) 2013, Martin Matuska <mm@FreeBSD.org>.
 .\" All Rights Reserved.
 .\"
 .\" The contents of this file are subject to the terms of the
 .\" Common Development and Distribution License (the "License").
 .\" You may not use this file except in compliance with the License.
 .\"
 .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 .\" or http://www.opensolaris.org/os/licensing.
 .\" See the License for the specific language governing permissions
 .\" and limitations under the License.
 .\"
 .\" When distributing Covered Code, include this CDDL HEADER in each
 .\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 .\" If applicable, add the following below this CDDL HEADER, with the
 .\" fields enclosed by brackets "[]" replaced with your own identifying
 .\" information: Portions Copyright [yyyy] [name of copyright owner]
 .\"
 .\" Copyright (c) 2010, Sun Microsystems, Inc. All Rights Reserved.
 .\" Copyright (c) 2011, 2014 by Delphix. All rights reserved.
 .\" Copyright (c) 2011, Pawel Jakub Dawidek <pjd@FreeBSD.org>
 .\" Copyright (c) 2012, Glen Barber <gjb@FreeBSD.org>
 .\" Copyright (c) 2012, Bryan Drewery <bdrewery@FreeBSD.org>
 .\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
 .\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
 .\" Copyright (c) 2013, Steven Hartland <smh@FreeBSD.org>
 .\" Copyright (c) 2016 Nexenta Systems, Inc. All Rights Reserved.
 .\" Copyright (c) 2014, Xin LI <delphij@FreeBSD.org>
 .\" Copyright (c) 2014-2015, The FreeBSD Foundation, All Rights Reserved.
 .\" Copyright 2018 Joyent, Inc.
 .\" Copyright (c) 2018 Datto Inc.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd February 15, 2018
 .Dt ZFS 8
 .Os
 .Sh NAME
 .Nm zfs
 .Nd configures ZFS file systems
 .Sh SYNOPSIS
 .Nm
 .Op Fl \&?
 .Nm
 .Cm create
 .Op Fl pu
 .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... Ar filesystem
 .Nm
 .Cm create
 .Op Fl ps
 .Op Fl b Ar blocksize
 .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
 .Fl V
 .Ar size volume
 .Nm
 .Cm destroy
 .Op Fl fnpRrv
 .Ar filesystem Ns | Ns Ar volume
 .Nm
 .Cm destroy
 .Op Fl dnpRrv
 .Sm off
 .Ar filesystem Ns | Ns volume
 .Ns @snap
 .Op % Ns Ar snap
 .Op , Ns Ar snap Op % Ns Ar snap
 .Op , Ns ...
 .Sm on
 .Nm
 .Cm destroy
 .Ar filesystem Ns | Ns Ar volume Ns # Ns Ar bookmark
 .Nm
 .Cm snapshot Ns | Ns Cm snap
 .Op Fl r
 .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
 .Ar filesystem@snapname Ns | Ns Ar volume@snapname
 .Ar filesystem@snapname Ns | Ns Ar volume@snapname Ns ...
 .Nm
 .Cm rollback
 .Op Fl rRf
 .Ar snapshot
 .Nm
 .Cm clone
 .Op Fl p
 .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
 .Ar snapshot filesystem Ns | Ns Ar volume
 .Nm
 .Cm promote
 .Ar clone-filesystem
 .Nm
 .Cm rename
 .Op Fl f
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
 .Nm
 .Cm rename
 .Op Fl f
 .Fl p
 .Ar filesystem Ns | Ns Ar volume
 .Ar filesystem Ns | Ns Ar volume
 .Nm
 .Cm rename
 .Fl r
 .Ar snapshot snapshot
 .Nm
 .Cm rename
 .Fl u
 .Op Fl p
 .Ar filesystem filesystem
 .Nm
 .Cm list
 .Op Fl r Ns | Ns Fl d Ar depth
 .Op Fl Hp
 .Op Fl o Ar property Ns Oo , Ns property Ns Oc Ns ...
 .Op Fl t Ar type Ns Oo , Ns type Ns Oc Ns ...
 .Oo Fl s Ar property Oc Ns ...
 .Oo Fl S Ar property Oc Ns ...
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot | Ns Ar bookmark Ns ...
 .Nm
 .Cm remap
 .Ar filesystem Ns | Ns Ar volume
 .Nm
 .Cm set
 .Ar property Ns = Ns Ar value Oo Ar property Ns = Ns Ar value Oc Ns ...
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns ...
 .Nm
 .Cm get
 .Op Fl r Ns | Ns Fl d Ar depth
 .Op Fl Hp
 .Op Fl o Ar all | field Ns Oo , Ns Ar field Oc Ns ...
 .Op Fl t Ar type Ns Oo Ns , Ar type Oc Ns ...
 .Op Fl s Ar source Ns Oo Ns , Ns Ar source Oc Ns ...
 .Ar all | property Ns Oo Ns , Ns Ar property Oc Ns ...
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns ...
 .Nm
 .Cm inherit
 .Op Fl rS
 .Ar property
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns ...
 .Nm
 .Cm upgrade
 .Op Fl v
 .Nm
 .Cm upgrade
 .Op Fl r
 .Op Fl V Ar version
 .Fl a | Ar filesystem
 .Nm
 .Cm userspace
 .Op Fl Hinp
 .Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns ...
 .Oo Fl s Ar field Oc Ns ...
 .Oo Fl S Ar field Oc Ns ...
 .Op Fl t Ar type Ns Oo Ns , Ns Ar type Oc Ns ...
 .Ar filesystem Ns | Ns Ar snapshot
 .Nm
 .Cm groupspace
 .Op Fl Hinp
 .Op Fl o Ar field Ns Oo , Ns field Oc Ns ...
 .Oo Fl s Ar field Oc Ns ...
 .Oo Fl S Ar field Oc Ns ...
 .Op Fl t Ar type Ns Oo Ns , Ns Ar type Oc Ns ...
 .Ar filesystem Ns | Ns Ar snapshot
 .Nm
 .Cm mount
 .Nm
 .Cm mount
 .Op Fl vO
 .Op Fl o Ar property Ns Oo , Ns Ar property Oc Ns ...
 .Fl a | Ar filesystem
 .Nm
 .Cm unmount Ns | Ns Cm umount
 .Op Fl f
 .Fl a | Ar filesystem Ns | Ns Ar mountpoint
 .Nm
 .Cm share
 .Fl a | Ar filesystem
 .Nm
 .Cm unshare
 .Fl a | Ar filesystem Ns | Ns Ar mountpoint
 .Nm
 .Cm bookmark
 .Ar snapshot
 .Ar bookmark
 .Nm
 .Cm send
 .Op Fl DLPRVcenpv
 .Op Fl i Ar snapshot | Fl I Ar snapshot
 .Ar snapshot
 .Nm
 .Cm send
-.Op Fl Lce
-.Op Fl i Ar snapshot Ns | Ns bookmark
+.Op Fl LPcenv
+.Op Fl i Ar snapshot Ns | Ns Ar bookmark
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
 .Nm
 .Cm send
 .Op Fl PVenv
 .Fl t Ar receive_resume_token
 .Nm
 .Cm receive Ns | Ns Cm recv
 .Op Fl vnsFu
 .Op Fl o Sy origin Ns = Ns Ar snapshot
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
 .Nm
 .Cm receive Ns | Ns Cm recv
 .Op Fl vnsFu
 .Op Fl d | e
 .Op Fl o Sy origin Ns = Ns Ar snapshot
 .Ar filesystem
 .Nm
 .Cm receive Ns | Ns Cm recv
 .Fl A
 .Ar filesystem Ns | Ns Ar volume
 .Nm
 .Cm allow
 .Ar filesystem Ns | Ns Ar volume
 .Nm
 .Cm allow
 .Op Fl ldug
 .Ar user Ns | Ns Ar group Ns Oo Ns , Ns Ar user Ns | Ns Ar group Oc Ns ...
 .Ar perm Ns | Ns Ar @setname Ns
 .Oo Ns , Ns Ar perm Ns | Ns Ar @setname Oc Ns ...
 .Ar filesystem Ns | Ns Ar volume
 .Nm
 .Cm allow
 .Op Fl ld
 .Fl e Ns | Ns Cm everyone
 .Ar perm Ns | Ns Ar @setname Ns Op Ns , Ns Ar perm Ns | Ns Ar @setname Ns
 .Ns ...
 .Ar filesystem Ns | Ns Ar volume
 .Nm
 .Cm allow
 .Fl c
 .Ar perm Ns | Ns Ar @setname Ns Op Ns , Ns Ar perm Ns | Ns Ar @setname Ns
 .Ns ...
 .Ar filesystem Ns | Ns Ar volume
 .Nm
 .Cm allow
 .Fl s
 .Ar @setname
 .Ar perm Ns | Ns Ar @setname Ns Op Ns , Ns Ar perm Ns | Ns Ar @setname Ns
 .Ns ...
 .Ar filesystem Ns | Ns Ar volume
 .Nm
 .Cm unallow
 .Op Fl rldug
 .Ar user Ns | Ns Ar group Ns Oo Ns , Ns Ar user Ns | Ns Ar group Oc Ns ...
 .Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns
 .Ns ... Oc
 .Ar filesystem Ns | Ns Ar volume
 .Nm
 .Cm unallow
 .Op Fl rld
 .Fl e Ns | Ns Cm everyone
 .Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns
 .Ns ... Oc
 .Ar filesystem Ns | Ns Ar volume
 .Nm
 .Cm unallow
 .Op Fl r
 .Fl c
 .Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns
 .Ns ... Oc
 .Ar filesystem Ns | Ns Ar volume
 .Nm
 .Cm unallow
 .Op Fl r
 .Fl s
 .Ar @setname
 .Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns
 .Ns ... Oc
 .Ar filesystem Ns | Ns Ar volume
 .Nm
 .Cm hold
 .Op Fl r
 .Ar tag snapshot Ns ...
 .Nm
 .Cm holds
 .Op Fl Hp
 .Op Fl r Ns | Ns Fl d Ar depth
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns
 .Ns ...
 .Nm
 .Cm release
 .Op Fl r
 .Ar tag snapshot Ns ...
 .Nm
 .Cm diff
 .Op Fl FHt
 .Ar snapshot
 .Op Ar snapshot Ns | Ns Ar filesystem
 .Nm
 .Cm program
 .Op Fl jn
 .Op Fl t Ar timeout
 .Op Fl m Ar memory_limit
 .Ar pool script
 .Op Ar arg1 No ...
 .Nm
 .Cm jail
 .Ar jailid Ns | Ns Ar jailname filesystem
 .Nm
 .Cm unjail
 .Ar jailid Ns | Ns Ar jailname filesystem
 .Sh DESCRIPTION
 The
 .Nm
 command configures
 .Tn ZFS
 datasets within a
 .Tn ZFS
 storage pool, as described in
 .Xr zpool 8 .
 A dataset is identified by a unique path within the
 .Tn ZFS
 namespace. For example:
 .Bd -ragged -offset 4n
 .No pool/ Ns Brq filesystem,volume,snapshot
 .Ed
 .Pp
 where the maximum length of a dataset name is
 .Dv MAXNAMELEN
 (256 bytes)
 and the maximum amount of nesting allowed in a path is 50 levels deep.
 .Pp
 A dataset can be one of the following:
 .Bl -hang -width 12n
 .It Sy file system
 A
 .Tn ZFS
 dataset of type
 .Em filesystem
 can be mounted within the standard system namespace and behaves like other file
 systems. While
 .Tn ZFS
 file systems are designed to be
 .Tn POSIX
 compliant, known issues exist that prevent compliance in some cases.
 Applications that depend on standards conformance might fail due to nonstandard
 behavior when checking file system free space.
 .It Sy volume
 A logical volume exported as a raw or block device. This type of dataset should
 only be used under special circumstances. File systems are typically used in
 most environments.
 .It Sy snapshot
 A read-only version of a file system or volume at a given point in time. It is
 specified as
 .Em filesystem@name
 or
 .Em volume@name .
 .El
 .Ss ZFS File System Hierarchy
 A
 .Tn ZFS
 storage pool is a logical collection of devices that provide space for
 datasets. A storage pool is also the root of the
 .Tn ZFS
 file system hierarchy.
 .Pp
 The root of the pool can be accessed as a file system, such as mounting and
 unmounting, taking snapshots, and setting properties. The physical storage
 characteristics, however, are managed by the
 .Xr zpool 8
 command.
 .Pp
 See
 .Xr zpool 8
 for more information on creating and administering pools.
 .Ss Snapshots
 A snapshot is a read-only copy of a file system or volume. Snapshots can be
 created extremely quickly, and initially consume no additional space within the
 pool. As data within the active dataset changes, the snapshot consumes more
 data than would otherwise be shared with the active dataset.
 .Pp
 Snapshots can have arbitrary names. Snapshots of volumes can be cloned or
 rolled back, but cannot be accessed independently.
 .Pp
 File system snapshots can be accessed under the
 .Pa \&.zfs/snapshot
 directory in the root of the file system. Snapshots are automatically mounted
 on demand and may be unmounted at regular intervals. The visibility of the
 .Pa \&.zfs
 directory can be controlled by the
 .Sy snapdir
 property.
 .Ss Clones
 A clone is a writable volume or file system whose initial contents are the same
 as another dataset. As with snapshots, creating a clone is nearly
 instantaneous, and initially consumes no additional space.
 .Pp
 Clones can only be created from a snapshot. When a snapshot is cloned, it
 creates an implicit dependency between the parent and child. Even though the
 clone is created somewhere else in the dataset hierarchy, the original snapshot
 cannot be destroyed as long as a clone exists. The
 .Sy origin
 property exposes this dependency, and the
 .Cm destroy
 command lists any such dependencies, if they exist.
 .Pp
 The clone parent-child dependency relationship can be reversed by using the
 .Cm promote
 subcommand. This causes the "origin" file system to become a clone of the
 specified file system, which makes it possible to destroy the file system that
 the clone was created from.
 .Ss Mount Points
 Creating a
 .Tn ZFS
 file system is a simple operation, so the number of file systems per system is
 likely to be numerous. To cope with this,
 .Tn ZFS
 automatically manages mounting and unmounting file systems without the need to
 edit the
 .Pa /etc/fstab
 file. All automatically managed file systems are mounted by
 .Tn ZFS
 at boot time.
 .Pp
 By default, file systems are mounted under
 .Pa /path ,
 where
 .Ar path
 is the name of the file system in the
 .Tn ZFS
 namespace. Directories are created and destroyed as needed.
 .Pp
 A file system can also have a mount point set in the
 .Sy mountpoint
 property. This directory is created as needed, and
 .Tn ZFS
 automatically mounts the file system when the
 .Qq Nm Cm mount Fl a
 command is invoked (without editing
 .Pa /etc/fstab ) .
 The
 .Sy mountpoint
 property can be inherited, so if
 .Em pool/home
 has a mount point of
 .Pa /home ,
 then
 .Em pool/home/user
 automatically inherits a mount point of
 .Pa /home/user .
 .Pp
 A file system
 .Sy mountpoint
 property of
 .Cm none
 prevents the file system from being mounted.
 .Pp
 If needed,
 .Tn ZFS
 file systems can also be managed with traditional tools
 .Pq Xr mount 8 , Xr umount 8 , Xr fstab 5 .
 If a file system's mount point is set to
 .Cm legacy ,
 .Tn ZFS
 makes no attempt to manage the file system, and the administrator is
 responsible for mounting and unmounting the file system.
 .Ss Jails
 .No A Tn ZFS
 dataset can be attached to a jail by using the
 .Qq Nm Cm jail
 subcommand. You cannot attach a dataset to one jail and the children of the
 same dataset to another jail. You can also not attach the root file system
 of the jail or any dataset which needs to be mounted before the zfs rc script
 is run inside the jail, as it would be attached unmounted until it is
 mounted from the rc script inside the jail. To allow management of the
 dataset from within a jail, the
 .Sy jailed
 property has to be set and the jail needs access to the
 .Pa /dev/zfs
 device. The
 .Sy quota
 property cannot be changed from within a jail. See
 .Xr jail 8
 for information on how to allow mounting
 .Tn ZFS
 datasets from within a jail.
 .Pp
 .No A Tn ZFS
 dataset can be detached from a jail using the
 .Qq Nm Cm unjail
 subcommand.
 .Pp
 After a dataset is attached to a jail and the jailed property is set, a jailed
 file system cannot be mounted outside the jail, since the jail administrator
 might have set the mount point to an unacceptable value.
 .Ss Deduplication
 Deduplication is the process for removing redundant data at the block-level,
 reducing the total amount of data stored. If a file system has the
 .Cm dedup
 property enabled, duplicate data blocks are removed synchronously. The result
 is that only unique data is stored and common components are shared among
 files.
 .Ss Native Properties
 Properties are divided into two types, native properties and user-defined (or
 "user") properties. Native properties either export internal statistics or
 control
 .Tn ZFS
 behavior. In addition, native properties are either editable or read-only. User
 properties have no effect on
 .Tn ZFS
 behavior, but you can use them to annotate datasets in a way that is meaningful
 in your environment. For more information about user properties, see the
 .Qq Sx User Properties
 section, below.
 .Pp
 Every dataset has a set of properties that export statistics about the dataset
 as well as control various behaviors. Properties are inherited from the parent
 unless overridden by the child. Some properties apply only to certain types of
 datasets (file systems, volumes, or snapshots).
 .Pp
 The values of numeric properties can be specified using human-readable suffixes
 (for example,
 .Sy k , KB , M , Gb ,
 and so forth, up to
 .Sy Z
 for zettabyte). The following are all valid (and equal) specifications:
 .Bd -ragged -offset 4n
 1536M, 1.5g, 1.50GB
 .Ed
 .Pp
 The values of non-numeric properties are case sensitive and must be lowercase,
 except for
 .Sy mountpoint , sharenfs , No and Sy sharesmb .
 .Pp
 The following native properties consist of read-only statistics about the
 dataset. These properties can be neither set, nor inherited. Native properties
 apply to all dataset types unless otherwise noted.
 .Bl -tag -width 2n
 .It Sy available
 The amount of space available to the dataset and all its children, assuming
 that there is no other activity in the pool. Because space is shared within a
 pool, availability can be limited by any number of factors, including physical
 pool size, quotas, reservations, or other datasets within the pool.
 .Pp
 This property can also be referred to by its shortened column name,
 .Sy avail .
 .It Sy compressratio
 For non-snapshots, the compression ratio achieved for the
 .Sy used
 space of this dataset, expressed as a multiplier.  The
 .Sy used
 property includes descendant datasets, and, for clones, does not include
 the space shared with the origin snapshot.  For snapshots, the
 .Sy compressratio
 is the same as the
 .Sy refcompressratio
 property. Compression can be turned on by running:
 .Qq Nm Cm set compression=on Ar dataset
 The default value is
 .Cm off .
 .It Sy createtxg
 The transaction group (txg) in which the dataset was created.
 Bookmarks have the same
 .Sy createtxg
 as the snapshot they are initially tied to.
 This property is suitable for ordering a list of snapshots,
 e.g. for incremental send and receive.
 .It Sy creation
 The time this dataset was created.
 .It Sy clones
 For snapshots, this property is a comma-separated list of filesystems or
 volumes which are clones of this snapshot.  The clones'
 .Sy origin
 property is this snapshot.  If the
 .Sy clones
 property is not empty, then this snapshot can not be destroyed (even with the
 .Fl r
 or
 .Fl f
 options).
 .It Sy defer_destroy
 This property is
 .Cm on
 if the snapshot has been marked for deferred destroy by using the
 .Qq Nm Cm destroy -d
 command. Otherwise, the property is
 .Cm off .
 .It Sy filesystem_count
 The total number of filesystems and volumes that exist under this location in the
 dataset tree.
 This value is only available when a
 .Sy filesystem_limit
 has
 been set somewhere in the tree under which the dataset resides.
 .It Sy guid
 The 64 bit GUID of this dataset or bookmark which does not change over its
 entire lifetime.
 When a snapshot is sent to another pool, the received snapshot has the same
 GUID.
 Thus, the
 .Sy guid
 is suitable to identify a snapshot across pools.
 .It Sy logicalreferenced
 The amount of space that is
 .Qq logically
 accessible by this dataset.
 See the
 .Sy referenced
 property.
 The logical space ignores the effect of the
 .Sy compression
 and
 .Sy copies
 properties, giving a quantity closer to the amount of data that applications
 see.
 However, it does include space consumed by metadata.
 .Pp
 This property can also be referred to by its shortened column name,
 .Sy lrefer .
 .It Sy logicalused
 The amount of space that is
 .Qq logically
 consumed by this dataset and all its descendents.
 See the
 .Sy used
 property.
 The logical space ignores the effect of the
 .Sy compression
 and
 .Sy copies
 properties, giving a quantity closer to the amount of data that applications
 see.
 .Pp
 This property can also be referred to by its shortened column name,
 .Sy lused .
 .It Sy mounted
 For file systems, indicates whether the file system is currently mounted. This
 property can be either
 .Cm yes
 or
 .Cm no .
 .It Sy origin
 For cloned file systems or volumes, the snapshot from which the clone was
 created. See also the
 .Sy clones
 property.
 .It Sy receive_resume_token
 For filesystems or volumes which have saved partially-completed state from
 .Sy zfs receive -s ,
 this opaque token can be provided to
 .Sy zfs send -t
 to resume and complete the
 .Sy zfs receive .
 .It Sy referenced
 The amount of data that is accessible by this dataset, which may or may not be
 shared with other datasets in the pool. When a snapshot or clone is created, it
 initially references the same amount of space as the file system or snapshot it
 was created from, since its contents are identical.
 .Pp
 This property can also be referred to by its shortened column name,
 .Sy refer .
 .It Sy refcompressratio
 The compression ratio achieved for the
 .Sy referenced
 space of this dataset, expressed as a multiplier.  See also the
 .Sy compressratio
 property.
 .It Sy snapshot_count
 The total number of snapshots that exist under this location in the dataset tree.
 This value is only available when a
 .Sy snapshot_limit
 has been set somewhere
 in the tree under which the dataset resides.
 .It Sy type
 The type of dataset:
 .Sy filesystem , volume , No or Sy snapshot .
 .It Sy used
 The amount of space consumed by this dataset and all its descendents. This is
 the value that is checked against this dataset's quota and reservation. The
 space used does not include this dataset's reservation, but does take into
 account the reservations of any descendent datasets. The amount of space that a
 dataset consumes from its parent, as well as the amount of space that are freed
 if this dataset is recursively destroyed, is the greater of its space used and
 its reservation.
 .Pp
 When snapshots (see the
 .Qq Sx Snapshots
 section) are created, their space is
 initially shared between the snapshot and the file system, and possibly with
 previous snapshots. As the file system changes, space that was previously
 shared becomes unique to the snapshot, and counted in the snapshot's space
 used. Additionally, deleting snapshots can increase the amount of space unique
 to (and used by) other snapshots.
 .Pp
 The amount of space used, available, or referenced does not take into account
 pending changes. Pending changes are generally accounted for within a few
 seconds. Committing a change to a disk using
 .Xr fsync 2
 or
 .Sy O_SYNC
 does not necessarily guarantee that the space usage information is updated
 immediately.
 .It Sy usedby*
 The
 .Sy usedby*
 properties decompose the
 .Sy used
 properties into the various reasons that space is used. Specifically,
 .Sy used No =
 .Sy usedbysnapshots + usedbydataset + usedbychildren + usedbyrefreservation .
 These properties are only available for datasets created
 with
 .Tn ZFS
 pool version 13 pools and higher.
 .It Sy usedbysnapshots
 The amount of space consumed by snapshots of this dataset. In particular, it is
 the amount of space that would be freed if all of this dataset's snapshots were
 destroyed. Note that this is not simply the sum of the snapshots'
 .Sy used
 properties because space can be shared by multiple snapshots.
 .It Sy usedbydataset
 The amount of space used by this dataset itself, which would be freed if the
 dataset were destroyed (after first removing any
 .Sy refreservation
 and destroying any necessary snapshots or descendents).
 .It Sy usedbychildren
 The amount of space used by children of this dataset, which would be freed if
 all the dataset's children were destroyed.
 .It Sy usedbyrefreservation
 The amount of space used by a
 .Sy refreservation
 set on this dataset, which would be freed if the
 .Sy refreservation
 was removed.
 .It Sy userused@ Ns Ar user
 The amount of space consumed by the specified user in this dataset. Space is
 charged to the owner of each file, as displayed by
 .Qq Nm ls Fl l .
 The amount of space charged is displayed by
 .Qq Nm du
 and
 .Qq Nm ls Fl s .
 See the
 .Qq Nm Cm userspace
 subcommand for more information.
 .Pp
 Unprivileged users can access only their own space usage. The root user, or a
 user who has been granted the
 .Sy userused
 privilege with
 .Qq Nm Cm allow ,
 can access everyone's usage.
 .Pp
 The
 .Sy userused@ Ns ...
 properties are not displayed by
 .Qq Nm Cm get all .
 The user's name must be appended after the
 .Sy @
 symbol, using one of the following forms:
 .Bl -bullet -offset 2n
 .It
 POSIX name (for example,
 .Em joe )
 .It
 POSIX numeric ID (for example,
 .Em 1001 )
 .El
 .It Sy userrefs
 This property is set to the number of user holds on this snapshot. User holds
 are set by using the
 .Qq Nm Cm hold
 command.
 .It Sy groupused@ Ns Ar group
 The amount of space consumed by the specified group in this dataset. Space is
 charged to the group of each file, as displayed by
 .Nm ls Fl l .
 See the
 .Sy userused@ Ns Ar user
 property for more information.
 .Pp
 Unprivileged users can only access their own groups' space usage. The root
 user, or a user who has been granted the
 .Sy groupused
 privilege with
 .Qq Nm Cm allow ,
 can access all groups' usage.
 .It Sy volblocksize Ns = Ns Ar blocksize
 For volumes, specifies the block size of the volume. The
 .Ar blocksize
 cannot be changed once the volume has been written, so it should be set at
 volume creation time. The default
 .Ar blocksize
 for volumes is 8 Kbytes. Any
 power of 2 from 512 bytes to 128 Kbytes is valid.
 .Pp
 This property can also be referred to by its shortened column name,
 .Sy volblock .
 .It Sy written
 The amount of
 .Sy referenced
 space written to this dataset since the previous snapshot.
 .It Sy written@ Ns Ar snapshot
 The amount of
 .Sy referenced
 space written to this dataset since the specified snapshot.  This is the space
 that is referenced by this dataset but was not referenced by the specified
 snapshot.
 .Pp
 The
 .Ar snapshot
 may be specified as a short snapshot name (just the part after the
 .Sy @ ) ,
 in which case it will be interpreted as a snapshot in the same filesystem as
 this dataset. The
 .Ar snapshot
 may be a full snapshot name
 .Pq Em filesystem@snapshot ,
 which for clones may be a snapshot in the origin's filesystem (or the origin of
 the origin's filesystem, etc).
 .El
 .Pp
 The following native properties can be used to change the behavior of a
 .Tn ZFS
 dataset.
 .Bl -tag -width 2n
 .It Xo
 .Sy aclinherit Ns = Ns Cm discard |
 .Cm noallow |
 .Cm restricted |
 .Cm passthrough |
 .Cm passthrough-x
 .Xc
 Controls how
 .Tn ACL
 entries are inherited when files and directories are created. A file system
 with an
 .Sy aclinherit
 property of
 .Cm discard
 does not inherit any
 .Tn ACL
 entries. A file system with an
 .Sy aclinherit
 property value of
 .Cm noallow
 only inherits inheritable
 .Tn ACL
 entries that specify "deny" permissions. The property value
 .Cm restricted
 (the default) removes the
 .Em write_acl
 and
 .Em write_owner
 permissions when the
 .Tn ACL
 entry is inherited. A file system with an
 .Sy aclinherit
 property value of
 .Cm passthrough
 inherits all inheritable
 .Tn ACL
 entries without any modifications made to the
 .Tn ACL
 entries when they are inherited. A file system with an
 .Sy aclinherit
 property value of
 .Cm passthrough-x
 has the same meaning as
 .Cm passthrough ,
 except that the
 .Em owner@ , group@ , No and Em everyone@ Tn ACE Ns s
 inherit the execute permission only if the file creation mode also requests the
 execute bit.
 .Pp
 When the property value is set to
 .Cm passthrough ,
 files are created with a mode determined by the inheritable
 .Tn ACE Ns s.
 If no inheritable
 .Tn ACE Ns s
 exist that affect the mode, then the mode is set in accordance to the requested
 mode from the application.
 .It Sy aclmode Ns = Ns Cm discard | groupmask | passthrough | restricted
 Controls how an
 .Tn ACL
 is modified during
 .Xr chmod 2 .
 A file system with an
 .Sy aclmode
 property of
 .Cm discard
 (the default) deletes all
 .Tn ACL
 entries that do not represent the mode of the file. An
 .Sy aclmode
 property of
 .Cm groupmask
 reduces permissions granted in all
 .Em ALLOW
 entries found in the
 .Tn ACL
 such that they are no greater than the group permissions specified by
 .Xr chmod 2 .
 A file system with an
 .Sy aclmode
 property of
 .Cm passthrough
 indicates that no changes are made to the
 .Tn ACL
 other than creating or updating the necessary
 .Tn ACL
 entries to represent the new mode of the file or directory.
 An
 .Sy aclmode
 property of
 .Cm restricted
 will cause the
 .Xr chmod 2
 operation to return an error when used on any file or directory which has
 a non-trivial
 .Tn ACL
 whose entries can not be represented by a mode.
 .Xr chmod 2
 is required to change the set user ID, set group ID, or sticky bits on a file
 or directory, as they do not have equivalent
 .Tn ACL
 entries.
 In order to use
 .Xr chmod 2
 on a file or directory with a non-trivial
 .Tn ACL
 when
 .Sy aclmode
 is set to
 .Cm restricted ,
 you must first remove all
 .Tn ACL
 entries which do not represent the current mode.
 .It Sy atime Ns = Ns Cm on | off
 Controls whether the access time for files is updated when they are read.
 Turning this property off avoids producing write traffic when reading files and
 can result in significant performance gains, though it might confuse mailers
 and other similar utilities. The default value is
 .Cm on .
 .It Sy canmount Ns = Ns Cm on | off | noauto
 If this property is set to
 .Cm off ,
 the file system cannot be mounted, and is ignored by
 .Qq Nm Cm mount Fl a .
 Setting this property to
 .Cm off
 is similar to setting the
 .Sy mountpoint
 property to
 .Cm none ,
 except that the dataset still has a normal
 .Sy mountpoint
 property, which can be inherited. Setting this property to
 .Cm off
 allows datasets to be used solely as a mechanism to inherit properties. One
 example of setting
 .Sy canmount Ns = Ns Cm off
 is to have two datasets with the same
 .Sy mountpoint ,
 so that the children of both datasets appear in the same directory, but might
 have different inherited characteristics.
 .Pp
 When the
 .Cm noauto
 value is set, a dataset can only be mounted and unmounted explicitly. The
 dataset is not mounted automatically when the dataset is created or imported,
 nor is it mounted by the
 .Qq Nm Cm mount Fl a
 command or unmounted by the
 .Qq Nm Cm umount Fl a
 command.
 .Pp
 This property is not inherited.
 .It Sy checksum Ns = Ns Cm on | off | fletcher2 | fletcher4 | sha256 | noparity | sha512 | skein
 Controls the checksum used to verify data integrity. The default value is
 .Cm on ,
 which automatically selects an appropriate algorithm (currently,
 .Cm fletcher4 ,
 but this may change in future releases). The value
 .Cm off
 disables integrity checking on user data.
 The value
 .Cm noparity
 not only
 disables integrity but also disables maintaining parity for user data.  This
 setting is used internally by a dump device residing on a RAID-Z pool and should
 not be used by any other dataset.
 Disabling checksums is
 .Em NOT
 a recommended practice.
 The
 .Sy sha512 ,
 and
 .Sy skein
 checksum algorithms require enabling the appropriate features on the pool.
 Please see
 .Xr zpool-features 7
 for more information on these algorithms.
 .Pp
 Changing this property affects only newly-written data.
 .Pp
 Salted checksum algorithms
 .Pq Cm edonr , skein
 are currently not supported for any filesystem on the boot pools.
 .It Sy compression Ns = Ns Cm on | off | lzjb | gzip | gzip- Ns Ar N | Cm zle | Cm lz4
 Controls the compression algorithm used for this dataset.
 Setting compression to
 .Cm on
 indicates that the current default compression algorithm should be used.
 The default balances compression and decompression speed, with compression
 ratio and is expected to work well on a wide variety of workloads.
 Unlike all other settings for this property, on does not select a fixed
 compression type.
 As new compression algorithms are added to ZFS and enabled on a pool, the
 default compression algorithm may change.
 The current default compression algorthm is either
 .Cm lzjb
 or, if the
 .Sy lz4_compress
 feature is enabled,
 .Cm lz4 .
 The
 .Cm lzjb
 compression algorithm is optimized for performance while providing decent data
 compression. Setting compression to
 .Cm on
 uses the
 .Cm lzjb
 compression algorithm. The
 .Cm gzip
 compression algorithm uses the same compression as the
 .Xr gzip 1
 command. You can specify the
 .Cm gzip
 level by using the value
 .Cm gzip- Ns Ar N
 where
 .Ar N
 is an integer from 1 (fastest) to 9 (best compression ratio). Currently,
 .Cm gzip
 is equivalent to
 .Cm gzip-6
 (which is also the default for
 .Xr gzip 1 ) .
 The
 .Cm zle
 compression algorithm compresses runs of zeros.
 .Pp
 The
 .Sy lz4
 compression algorithm is a high-performance replacement
 for the
 .Sy lzjb
 algorithm. It features significantly faster
 compression and decompression, as well as a moderately higher
 compression ratio than
 .Sy lzjb ,
 but can only be used on pools with
 the
 .Sy lz4_compress
 feature set to
 .Sy enabled .
 See
 .Xr zpool-features 7
 for details on ZFS feature flags and the
 .Sy lz4_compress
 feature.
 .Pp
 This property can also be referred to by its shortened column name
 .Cm compress .
 Changing this property affects only newly-written data.
 .It Sy copies Ns = Ns Cm 1 | 2 | 3
 Controls the number of copies of data stored for this dataset. These copies are
 in addition to any redundancy provided by the pool, for example, mirroring or
 RAID-Z. The copies are stored on different disks, if possible. The space used
 by multiple copies is charged to the associated file and dataset, changing the
 .Sy used
 property and counting against quotas and reservations.
 .Pp
 Changing this property only affects newly-written data. Therefore, set this
 property at file system creation time by using the
 .Fl o Cm copies= Ns Ar N
 option.
 .It Sy dedup Ns = Ns Cm on | off | verify | sha256 Ns Oo Cm ,verify Oc | Sy sha512 Ns Oo Cm ,verify Oc | Sy skein Ns Oo Cm ,verify Oc
 Configures deduplication for a dataset. The default value is
 .Cm off .
 The default deduplication checksum is
 .Cm sha256
 (this may change in the future).
 When
 .Sy dedup
 is enabled, the checksum defined here overrides the
 .Sy checksum
 property. Setting the value to
 .Cm verify
 has the same effect as the setting
 .Cm sha256,verify .
 .Pp
 If set to
 .Cm verify ,
 .Tn ZFS
 will do a byte-to-byte comparsion in case of two blocks having the same
 signature to make sure the block contents are identical.
 .It Sy devices Ns = Ns Cm on | off
 The
 .Sy devices
 property is currently not supported on
 .Fx .
 .It Sy exec Ns = Ns Cm on | off
 Controls whether processes can be executed from within this file system. The
 default value is
 .Cm on .
 .It Sy mlslabel Ns = Ns Ar label | Cm none
 The
 .Sy mlslabel
 property is currently not supported on
 .Fx .
 .It Sy filesystem_limit Ns = Ns Ar count | Cm none
 Limits the number of filesystems and volumes that can exist under this point in
 the dataset tree.
 The limit is not enforced if the user is allowed to change
 the limit.
 Setting a
 .Sy filesystem_limit
 on a descendent of a filesystem that
 already has a
 .Sy filesystem_limit
 does not override the ancestor's
 .Sy filesystem_limit ,
 but rather imposes an additional limit.
 This feature must be enabled to be used
 .Po see
 .Xr zpool-features 7
 .Pc .
 .It Sy mountpoint Ns = Ns Ar path | Cm none | legacy
 Controls the mount point used for this file system. See the
 .Qq Sx Mount Points
 section for more information on how this property is used.
 .Pp
 When the
 .Sy mountpoint
 property is changed for a file system, the file system and any children that
 inherit the mount point are unmounted. If the new value is
 .Cm legacy ,
 then they remain unmounted. Otherwise, they are automatically remounted in the
 new location if the property was previously
 .Cm legacy
 or
 .Cm none ,
 or if they were mounted before the property was changed. In addition, any
 shared file systems are unshared and shared in the new location.
 .It Sy nbmand Ns = Ns Cm on | off
 The
 .Sy nbmand
 property is currently not supported on
 .Fx .
 .It Sy primarycache Ns = Ns Cm all | none | metadata
 Controls what is cached in the primary cache (ARC). If this property is set to
 .Cm all ,
 then both user data and metadata is cached. If this property is set to
 .Cm none ,
 then neither user data nor metadata is cached. If this property is set to
 .Cm metadata ,
 then only metadata is cached. The default value is
 .Cm all .
 .It Sy quota Ns = Ns Ar size | Cm none
 Limits the amount of space a dataset and its descendents can consume. This
 property enforces a hard limit on the amount of space used. This includes all
 space consumed by descendents, including file systems and snapshots. Setting a
 quota on a descendent of a dataset that already has a quota does not override
 the ancestor's quota, but rather imposes an additional limit.
 .Pp
 Quotas cannot be set on volumes, as the
 .Sy volsize
 property acts as an implicit quota.
 .It Sy snapshot_limit Ns = Ns Ar count | Cm none
 Limits the number of snapshots that can be created on a dataset and its
 descendents.
 Setting a
 .Sy snapshot_limit
 on a descendent of a dataset that already
 has a
 .Sy snapshot_limit
 does not override the ancestor's
 .Sy snapshot_limit ,
 but
 rather imposes an additional limit.
 The limit is not enforced if the user is
 allowed to change the limit.
 For example, this means that recursive snapshots
 taken from the global zone are counted against each delegated dataset within
 a jail.
 This feature must be enabled to be used
 .Po see
 .Xr zpool-features 7
 .Pc .
 .It Sy userquota@ Ns Ar user Ns = Ns Ar size | Cm none
 Limits the amount of space consumed by the specified user.
 Similar to the
 .Sy refquota
 property, the
 .Sy userquota
 space calculation does not include space that is used by descendent datasets,
 such as snapshots and clones. User space consumption is identified by the
 .Sy userspace@ Ns Ar user
 property.
 .Pp
 Enforcement of user quotas may be delayed by several seconds. This delay means
 that a user might exceed their quota before the system notices that they are
 over quota and begins to refuse additional writes with the
 .Em EDQUOT
 error message. See the
 .Cm userspace
 subcommand for more information.
 .Pp
 Unprivileged users can only access their own groups' space usage. The root
 user, or a user who has been granted the
 .Sy userquota
 privilege with
 .Qq Nm Cm allow ,
 can get and set everyone's quota.
 .Pp
 This property is not available on volumes, on file systems before version 4, or
 on pools before version 15. The
 .Sy userquota@ Ns ...
 properties are not displayed by
 .Qq Nm Cm get all .
 The user's name must be appended after the
 .Sy @
 symbol, using one of the following forms:
 .Bl -bullet -offset 2n
 .It
 POSIX name (for example,
 .Em joe )
 .It
 POSIX numeric ID (for example,
 .Em 1001 )
 .El
 .It Sy groupquota@ Ns Ar group Ns = Ns Ar size | Cm none
 Limits the amount of space consumed by the specified group. Group space
 consumption is identified by the
 .Sy userquota@ Ns Ar user
 property.
 .Pp
 Unprivileged users can access only their own groups' space usage. The root
 user, or a user who has been granted the
 .Sy groupquota
 privilege with
 .Qq Nm Cm allow ,
 can get and set all groups' quotas.
 .It Sy readonly Ns = Ns Cm on | off
 Controls whether this dataset can be modified. The default value is
 .Cm off .
 .It Sy recordsize Ns = Ns Ar size
 Specifies a suggested block size for files in the file system. This property is
 designed solely for use with database workloads that access files in fixed-size
 records.
 .Tn ZFS
 automatically tunes block sizes according to internal algorithms optimized for
 typical access patterns.
 .Pp
 For databases that create very large files but access them in small random
 chunks, these algorithms may be suboptimal. Specifying a
 .Sy recordsize
 greater than or equal to the record size of the database can result in
 significant performance gains. Use of this property for general purpose file
 systems is strongly discouraged, and may adversely affect performance.
 .Pp
 The size specified must be a power of two greater than or equal to 512 and less
 than or equal to 128 Kbytes.
 If the
 .Sy large_blocks
 feature is enabled on the pool, the size may be up to 1 Mbyte.
 See
 .Xr zpool-features 7
 for details on ZFS feature flags.
 .Pp
 Changing the file system's
 .Sy recordsize
 affects only files created afterward; existing files are unaffected.
 .Pp
 This property can also be referred to by its shortened column name,
 .Sy recsize .
 .It Sy redundant_metadata Ns = Ns Cm all | most
 Controls what types of metadata are stored redundantly.
 ZFS stores an extra copy of metadata, so that if a single block is corrupted,
 the amount of user data lost is limited.
 This extra copy is in addition to any redundancy provided at the pool level
 .Pq e.g. by mirroring or RAID-Z ,
 and is in addition to an extra copy specified by the
 .Sy copies
 property
 .Pq up to a total of 3 copies .
 For example if the pool is mirrored,
 .Cm copies Ns = Ns Ar 2 ,
 and
 .Cm redundant_metadata Ns = Ns Ar most ,
 then ZFS
 stores 6 copies of most metadata, and 4 copies of data and some
 metadata.
 .Pp
 When set to
 .Cm all ,
 ZFS stores an extra copy of all metadata.
 If a
 single on-disk block is corrupt, at worst a single block of user data
 .Po which is
 .Cm recordsize
 bytes long
 can be lost.
 .Pc
 .Pp
 When set to
 .Cm most ,
 ZFS stores an extra copy of most types of
 metadata.
 This can improve performance of random writes, because less
 metadata must be written.
 In practice, at worst about 100 blocks
 .Po of
 .Cm recordsize
 bytes each
 .Pc
 of user data can be lost if a single
 on-disk block is corrupt.
 The exact behavior of which metadata blocks
 are stored redundantly may change in future releases.
 .Pp
 The default value is
 .Cm all .
 .It Sy refquota Ns = Ns Ar size | Cm none
 Limits the amount of space a dataset can consume. This property enforces a hard
 limit on the amount of space used. This hard limit does not include space used
 by descendents, including file systems and snapshots.
 .It Sy refreservation Ns = Ns Ar size | Cm none | Cm auto
 The minimum amount of space guaranteed to a dataset, not including its
 descendents. When the amount of space used is below this value, the dataset is
 treated as if it were taking up the amount of space specified by
 .Sy refreservation .
 The
 .Sy refreservation
 reservation is accounted for in the parent datasets' space used, and counts
 against the parent datasets' quotas and reservations.
 .Pp
 If
 .Sy refreservation
 is set, a snapshot is only allowed if there is enough free pool space outside
 of this reservation to accommodate the current number of "referenced" bytes in
 the dataset.
 .Pp
 If
 .Sy refreservation
 is set to
 .Sy auto ,
 a volume is thick provisioned or not sparse.
 .Sy refreservation Ns = Cm auto
 is only supported on volumes.
 See
 .Sy volsize
 in the Native Properties
 section for more information about sparse volumes.
 .Pp
 This property can also be referred to by its shortened column name,
 .Sy refreserv .
 .It Sy reservation Ns = Ns Ar size | Cm none
 The minimum amount of space guaranteed to a dataset and its descendents. When
 the amount of space used is below this value, the dataset is treated as if it
 were taking up the amount of space specified by its reservation. Reservations
 are accounted for in the parent datasets' space used, and count against the
 parent datasets' quotas and reservations.
 .Pp
 This property can also be referred to by its shortened column name,
 .Sy reserv .
 .It Sy secondarycache Ns = Ns Cm all | none | metadata
 Controls what is cached in the secondary cache (L2ARC). If this property is set
 to
 .Cm all ,
 then both user data and metadata is cached. If this property is set to
 .Cm none ,
 then neither user data nor metadata is cached. If this property is set to
 .Cm metadata ,
 then only metadata is cached. The default value is
 .Cm all .
 .It Sy setuid Ns = Ns Cm on | off
 Controls whether the
 .No set- Ns Tn UID
 bit is respected for the file system. The default value is
 .Cm on .
 .It Sy sharesmb Ns = Ns Cm on | off | Ar opts
 The
 .Sy sharesmb
 property currently has no effect on
 .Fx .
 .It Sy sharenfs Ns = Ns Cm on | off | Ar opts
 Controls whether the file system is shared via
 .Tn NFS ,
 and what options are used. A file system with a
 .Sy sharenfs
 property of
 .Cm off
 is managed the traditional way via
 .Xr exports 5 .
 Otherwise, the file system is automatically shared and unshared with the
 .Qq Nm Cm share
 and
 .Qq Nm Cm unshare
 commands. If the property is set to
 .Cm on
 no
 .Tn NFS
 export options are used. Otherwise,
 .Tn NFS
 export options are equivalent to the contents of this property. The export
 options may be comma-separated. See
 .Xr exports 5
 for a list of valid options.
 .Pp
 When the
 .Sy sharenfs
 property is changed for a dataset, the
 .Xr mountd 8
 daemon is reloaded.
 .It Sy logbias Ns = Ns Cm latency | throughput
 Provide a hint to
 .Tn ZFS
 about handling of synchronous requests in this dataset.
 If
 .Sy logbias
 is set to
 .Cm latency
 (the default),
 .Tn ZFS
 will use pool log devices (if configured) to handle the requests at low
 latency. If
 .Sy logbias
 is set to
 .Cm throughput ,
 .Tn ZFS
 will not use configured pool log devices.
 .Tn ZFS
 will instead optimize synchronous operations for global pool throughput and
 efficient use of resources.
 .It Sy snapdir Ns = Ns Cm hidden | visible
 Controls whether the
 .Pa \&.zfs
 directory is hidden or visible in the root of the file system as discussed in
 the
 .Qq Sx Snapshots
 section. The default value is
 .Cm hidden .
 .It Sy sync Ns = Ns Cm standard | always | disabled
 Controls the behavior of synchronous requests (e.g.
 .Xr fsync 2 ,
 O_DSYNC). This property accepts the following values:
 .Bl -tag -offset 4n -width 8n
 .It Sy standard
 This is the POSIX specified behavior of ensuring all synchronous requests are
 written to stable storage and all devices are flushed to ensure data is not
 cached by device controllers (this is the default).
 .It Sy always
 All file system transactions are written and flushed before their system calls
 return. This has a large performance penalty.
 .It Sy disabled
 Disables synchronous requests. File system transactions are only committed to
 stable storage periodically. This option will give the highest performance.
 However, it is very dangerous as
 .Tn ZFS
 would be ignoring the synchronous transaction demands of applications such as
 databases or
 .Tn NFS .
 Administrators should only use this option when the risks are understood.
 .El
 .It Sy volsize Ns = Ns Ar size
 For volumes, specifies the logical size of the volume. By default, creating a
 volume establishes a reservation of equal size. For storage pools with a
 version number of 9 or higher, a
 .Sy refreservation
 is set instead. Any changes to
 .Sy volsize
 are reflected in an equivalent change to the reservation (or
 .Sy refreservation ) .
 The
 .Sy volsize
 can only be set to a multiple of
 .Cm volblocksize ,
 and cannot be zero.
 .Pp
 The reservation is kept equal to the volume's logical size to prevent
 unexpected behavior for consumers. Without the reservation, the volume could
 run out of space, resulting in undefined behavior or data corruption, depending
 on how the volume is used. These effects can also occur when the volume size is
 changed while it is in use (particularly when shrinking the size). Extreme care
 should be used when adjusting the volume size.
 .Pp
 Though not recommended, a "sparse volume" (also known as "thin provisioned")
 can be created by specifying the
 .Fl s
 option to the
 .Qq Nm Cm create Fl V
 command, or by changing the value of the
 .Sy refreservation
 property, or
 .Sy reservation
 property on pool 
 .Po
 version 8 or earlier
 .Pc
 after the volume has been created.
 A "sparse volume" is a volume where the value of
 .Sy refreservation
 is less then the size of the volume plus the space required to store its
 metadata.
 Consequently, writes to a sparse volume can fail with
 .Sy ENOSPC
 when the pool is low on space. For a sparse volume, changes to
 .Sy volsize
 are not reflected in the
 .Sy refreservation .
 A volume that is not sparse is said to be "thick provisioned".
 A sparse volume can become thick provisioned by setting
 .Sy refreservation
 to
 .Sy auto .
 .It Sy volmode Ns = Ns Cm default | geom | dev | none
 This property specifies how volumes should be exposed to the OS.
 Setting it to
 .Sy geom
 exposes volumes as
 .Xr geom 4
 providers, providing maximal functionality.
 Setting it to
 .Sy dev
 exposes volumes only as cdev device in devfs.
 Such volumes can be accessed only as raw disk device files, i.e. they
 can not be partitioned, mounted, participate in RAIDs, etc, but they
 are faster, and in some use scenarios with untrusted consumer, such as
 NAS or VM storage, can be more safe.
 Volumes with property set to
 .Sy none
 are not exposed outside ZFS, but can be snapshoted, cloned, replicated, etc,
 that can be suitable for backup purposes.
 Value
 .Sy default
 means that volumes exposition is controlled by system-wide sysctl/tunable
 .Va vfs.zfs.vol.mode ,
 where
 .Sy geom ,
 .Sy dev
 and
 .Sy none
 are encoded as 1, 2 and 3 respectively.
 The default values is
 .Sy geom .
 This property can be changed any time, but so far it is processed only
 during volume creation and pool import.
 .It Sy vscan Ns = Ns Cm off | on
 The
 .Sy vscan
 property is currently not supported on
 .Fx .
 .It Sy xattr Ns = Ns Cm off | on
 The
 .Sy xattr
 property is currently not supported on
 .Fx .
 .It Sy jailed Ns = Ns Cm off | on
 Controls whether the dataset is managed from a jail. See the
 .Qq Sx Jails
 section for more information. The default value is
 .Cm off .
 .El
 .Pp
 The following three properties cannot be changed after the file system is
 created, and therefore, should be set when the file system is created. If the
 properties are not set with the
 .Qq Nm Cm create
 or
 .Nm zpool Cm create
 commands, these properties are inherited from the parent dataset. If the parent
 dataset lacks these properties due to having been created prior to these
 features being supported, the new file system will have the default values for
 these properties.
 .Bl -tag -width 4n
 .It Sy casesensitivity Ns = Ns Cm sensitive | insensitive | mixed
 Indicates whether the file name matching algorithm used by the file system
 should be case-sensitive, case-insensitive, or allow a combination of both
 styles of matching. The default value for the
 .Sy casesensitivity
 property is
 .Cm sensitive .
 Traditionally, UNIX and POSIX file systems have case-sensitive file names.
 .Pp
 The
 .Cm mixed
 value for the
 .Sy casesensitivity
 property indicates that the
 file system can support requests for both case-sensitive and case-insensitive
 matching behavior.
 .It Sy normalization Ns = Ns Cm none | formC | formD | formKC | formKD
 Indicates whether the file system should perform a
 .Sy unicode
 normalization of file names whenever two file names are compared, and which
 normalization algorithm should be used. File names are always stored
 unmodified, names are normalized as part of any comparison process. If this
 property is set to a legal value other than
 .Cm none ,
 and the
 .Sy utf8only
 property was left unspecified, the
 .Sy utf8only
 property is automatically set to
 .Cm on .
 The default value of the
 .Sy normalization
 property is
 .Cm none .
 This property cannot be changed after the file system is created.
 .It Sy utf8only Ns = Ns Cm on | off
 Indicates whether the file system should reject file names that include
 characters that are not present in the
 .Sy UTF-8
 character code set. If this property is explicitly set to
 .Cm off ,
 the normalization property must either not be explicitly set or be set to
 .Cm none .
 The default value for the
 .Sy utf8only
 property is
 .Cm off .
 This property cannot be changed after the file system is created.
 .El
 .Pp
 The
 .Sy casesensitivity , normalization , No and Sy utf8only
 properties are also new permissions that can be assigned to non-privileged
 users by using the
 .Tn ZFS
 delegated administration feature.
 .Ss Temporary Mount Point Properties
 When a file system is mounted, either through
 .Xr mount 8
 for legacy mounts or the
 .Qq Nm Cm mount
 command for normal file systems, its mount options are set according to its
 properties. The correlation between properties and mount options is as follows:
 .Bl -column -offset 4n "PROPERTY" "MOUNT OPTION"
 .It "PROPERTY	MOUNT OPTION"
 .It "atime	atime/noatime"
 .It "exec	exec/noexec"
 .It "readonly	ro/rw"
 .It "setuid	suid/nosuid"
 .El
 .Pp
 In addition, these options can be set on a per-mount basis using the
 .Fl o
 option, without affecting the property that is stored on disk. The values
 specified on the command line override the values stored in the dataset. These
 properties are reported as "temporary" by the
 .Qq Nm Cm get
 command. If the properties are changed while the dataset is mounted, the new
 setting overrides any temporary settings.
 .Ss User Properties
 In addition to the standard native properties,
 .Tn ZFS
 supports arbitrary user properties. User properties have no effect on
 .Tn ZFS
 behavior, but applications or administrators can use them to annotate datasets
 (file systems, volumes, and snapshots).
 .Pp
 User property names must contain a colon
 .Pq Sy \&:
 character to distinguish them from native properties. They may contain
 lowercase letters, numbers, and the following punctuation characters: colon
 .Pq Sy \&: ,
 dash
 .Pq Sy \&- ,
 period
 .Pq Sy \&.
 and underscore
 .Pq Sy \&_ .
 The expected convention is that the property name is divided into two portions
 such as
 .Em module Ns Sy \&: Ns Em property ,
 but this namespace is not enforced by
 .Tn ZFS .
 User property names can be at most 256 characters, and cannot begin with a dash
 .Pq Sy \&- .
 .Pp
 When making programmatic use of user properties, it is strongly suggested to
 use a reversed
 .Tn DNS
 domain name for the
 .Ar module
 component of property names to reduce the chance that two
 independently-developed packages use the same property name for different
 purposes. Property names beginning with
 .Em com.sun
 are reserved for use by Sun Microsystems.
 .Pp
 The values of user properties are arbitrary strings, are always inherited, and
 are never validated. All of the commands that operate on properties
 .Po
 .Qq Nm Cm list ,
 .Qq Nm Cm get ,
 .Qq Nm Cm set
 and so forth
 .Pc
 can be used to manipulate both native properties and user properties. Use the
 .Qq Nm Cm inherit
 command to clear a user property. If the property is not defined in any parent
 dataset, it is removed entirely. Property values are limited to 1024
 characters.
 .Sh SUBCOMMANDS
 All subcommands that modify state are logged persistently to the pool in their
 original form.
 .Bl -tag -width 2n
 .It Xo
 .Nm
 .Op Fl \&?
 .Xc
 .Pp
 Displays a help message.
 .It Xo
 .Nm
 .Cm create
 .Op Fl pu
 .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
 .Ar filesystem
 .Xc
 .Pp
 Creates a new
 .Tn ZFS
 file system. The file system is automatically mounted according to the
 .Sy mountpoint
 property inherited from the parent.
 .Bl -tag -width indent
 .It Fl p
 Creates all the non-existing parent datasets. Datasets created in this manner
 are automatically mounted according to the
 .Sy mountpoint
 property inherited from their parent. Any property specified on the command
 line using the
 .Fl o
 option is ignored. If the target filesystem already exists, the operation
 completes successfully.
 .It Fl u
 Newly created file system is not mounted.
 .It Fl o Ar property Ns = Ns Ar value
 Sets the specified property as if the command
 .Qq Nm Cm set Ar property Ns = Ns Ar value
 was invoked at the same time the dataset was created. Any editable
 .Tn ZFS
 property can also be set at creation time. Multiple
 .Fl o
 options can be specified. An error results if the same property is specified in
 multiple
 .Fl o
 options.
 .El
 .It Xo
 .Nm
 .Cm create
 .Op Fl ps
 .Op Fl b Ar blocksize
 .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
 .Fl V
 .Ar size volume
 .Xc
 .Pp
 Creates a volume of the given size. The volume is exported as a block device in
 .Pa /dev/zvol/path ,
 where
 .Ar path
 is the name of the volume in the
 .Tn ZFS
 namespace. The size represents the logical size as exported by the device. By
 default, a reservation of equal size is created.
 .Pp
 .Ar size
 is automatically rounded up to the nearest 128 Kbytes to ensure that
 the volume has an integral number of blocks regardless of
 .Ar blocksize .
 .Bl -tag -width indent
 .It Fl p
 Creates all the non-existing parent datasets. Datasets created in this manner
 are automatically mounted according to the
 .Sy mountpoint
 property inherited from their parent. Any property specified on the command
 line using the
 .Fl o
 option is ignored. If the target filesystem already exists, the operation
 completes successfully.
 .It Fl s
 Creates a sparse volume with no reservation. See
 .Sy volsize
 in the
 .Qq Sx Native Properties
 section for more information about sparse volumes.
 .It Fl b Ar blocksize
 Equivalent to
 .Fl o Cm volblocksize Ns = Ns Ar blocksize .
 If this option is specified in conjunction with
 .Fl o Cm volblocksize ,
 the resulting behavior is undefined.
 .It Fl o Ar property Ns = Ns Ar value
 Sets the specified property as if the
 .Qq Nm Cm set Ar property Ns = Ns Ar value
 command was invoked at the same time the dataset was created. Any editable
 .Tn ZFS
 property can also be set at creation time. Multiple
 .Fl o
 options can be specified. An error results if the same property is specified in
 multiple
 .Fl o
 options.
 .El
 .It Xo
 .Nm
 .Cm destroy
 .Op Fl fnpRrv
 .Ar filesystem Ns | Ns Ar volume
 .Xc
 .Pp
 Destroys the given dataset. By default, the command unshares any file systems
 that are currently shared, unmounts any file systems that are currently
 mounted, and refuses to destroy a dataset that has active dependents (children
 or clones).
 .Bl -tag -width indent
 .It Fl r
 Recursively destroy all children.
 .It Fl R
 Recursively destroy all dependents, including cloned file systems outside the
 target hierarchy.
 .It Fl f
 Force an unmount of any file systems using the
 .Qq Nm Cm unmount Fl f
 command. This option has no effect on non-file systems or unmounted file
 systems.
 .It Fl n
 Do a dry-run ("No-op") deletion. No data will be deleted. This is useful in
 conjunction with the
 .Fl v
 or
 .Fl p
 flags to determine what data would be deleted.
 .It Fl p
 Print machine-parsable verbose information about the deleted data.
 .It Fl v
 Print verbose information about the deleted data.
 .El
 .Pp
 Extreme care should be taken when applying either the
 .Fl r
 or the
 .Fl R
 options, as they can destroy large portions of a pool and cause unexpected
 behavior for mounted file systems in use.
 .It Xo
 .Nm
 .Cm destroy
 .Op Fl dnpRrv
 .Sm off
 .Ar snapshot
 .Op % Ns Ar snapname
 .Op , Ns ...
 .Sm on
 .Xc
 .Pp
 The given snapshots are destroyed immediately if and only if the
 .Qq Nm Cm destroy
 command without the
 .Fl d
 option would have destroyed it. Such immediate destruction would occur, for
 example, if the snapshot had no clones and the user-initiated reference count
 were zero.
 .Pp
 If a snapshot does not qualify for immediate destruction, it is marked for
 deferred deletion. In this state, it exists as a usable, visible snapshot until
 both of the preconditions listed above are met, at which point it is destroyed.
 .Pp
 An inclusive range of snapshots may be specified by separating the
 first and last snapshots with a percent sign
 .Pq Sy % .
 The first and/or last snapshots may be left blank, in which case the
 filesystem's oldest or newest snapshot will be implied.
 .Pp
 Multiple snapshots
 (or ranges of snapshots) of the same filesystem or volume may be specified
 in a comma-separated list of snapshots.
 Only the snapshot's short name (the
 part after the
 .Sy @ )
 should be specified when using a range or comma-separated list to identify
 multiple snapshots.
 .Bl -tag -width indent
 .It Fl r
 Destroy (or mark for deferred deletion) all snapshots with this name in
 descendent file systems.
 .It Fl R
 Recursively destroy all clones of these snapshots, including the clones,
 snapshots, and children.
 If this flag is specified, the
 .Fl d
 flag will have no effect.
 .It Fl n
 Do a dry-run ("No-op") deletion. No data will be deleted. This is useful in
 conjunction with the
 .Fl v
 or
 .Fl p
 flags to determine what data would be deleted.
 .It Fl p
 Print machine-parsable verbose information about the deleted data.
 .It Fl v
 Print verbose information about the deleted data.
 .It Fl d
 Defer snapshot deletion.
 .El
 .Pp
 Extreme care should be taken when applying either the
 .Fl r
 or the
 .Fl R
 options, as they can destroy large portions of a pool and cause unexpected
 behavior for mounted file systems in use.
 .It Xo
 .Nm
 .Cm destroy
 .Ar filesystem Ns | Ns Ar volume Ns # Ns Ar bookmark
 .Xc
 .Pp
 The given bookmark is destroyed.
 .It Xo
 .Nm
 .Cm snapshot Ns | Ns Cm snap
 .Op Fl r
 .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
 .Ar filesystem@snapname Ns | Ns volume@snapname
 .Ar filesystem@snapname Ns | Ns volume@snapname Ns ...
 .Xc
 .Pp
 Creates snapshots with the given names. All previous modifications by
 successful system calls to the file system are part of the snapshots.
 Snapshots are taken atomically, so that all snapshots correspond to the same
 moment in time. See the
 .Qq Sx Snapshots
 section for details.
 .Bl -tag -width indent
 .It Fl r
 Recursively create snapshots of all descendent datasets
 .It Fl o Ar property Ns = Ns Ar value
 Sets the specified property; see
 .Qq Nm Cm create
 for details.
 .El
 .It Xo
 .Nm
 .Cm rollback
 .Op Fl rRf
 .Ar snapshot
 .Xc
 .Pp
 Roll back the given dataset to a previous snapshot. When a dataset is rolled
 back, all data that has changed since the snapshot is discarded, and the
 dataset reverts to the state at the time of the snapshot. By default, the
 command refuses to roll back to a snapshot other than the most recent one. In
 order to do so, all intermediate snapshots and bookmarks must be destroyed
 by specifying the
 .Fl r
 option.
 .Pp
 The
 .Fl rR
 options do not recursively destroy the child snapshots of a
 recursive snapshot.
 Only direct snapshots of the specified filesystem
 are destroyed by either of these options.
 To completely roll back a
 recursive snapshot, you must rollback the individual child snapshots.
 .Bl -tag -width indent
 .It Fl r
 Destroy any snapshots and bookmarks more recent than the one specified.
 .It Fl R
 Destroy any more recent snapshots and bookmarks, as well as any clones of those
 snapshots.
 .It Fl f
 Used with the
 .Fl R
 option to force an unmount of any clone file systems that are to be destroyed.
 .El
 .It Xo
 .Nm
 .Cm clone
 .Op Fl p
 .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
 .Ar snapshot filesystem Ns | Ns Ar volume
 .Xc
 .Pp
 Creates a clone of the given snapshot. See the
 .Qq Sx Clones
 section for details. The target dataset can be located anywhere in the
 .Tn ZFS
 hierarchy, and is created as the same type as the original.
 .Bl -tag -width indent
 .It Fl p
 Creates all the non-existing parent datasets. Datasets created in this manner
 are automatically mounted according to the
 .Sy mountpoint
 property inherited from their parent. If the target filesystem or volume
 already exists, the operation completes successfully.
 .It Fl o Ar property Ns = Ns Ar value
 Sets the specified property; see
 .Qq Nm Cm create
 for details.
 .El
 .It Xo
 .Nm
 .Cm promote
 .Ar clone-filesystem
 .Xc
 .Pp
 Promotes a clone file system to no longer be dependent on its "origin"
 snapshot. This makes it possible to destroy the file system that the clone was
 created from. The clone parent-child dependency relationship is reversed, so
 that the origin file system becomes a clone of the specified file system.
 .Pp
 The snapshot that was cloned, and any snapshots previous to this snapshot, are
 now owned by the promoted clone. The space they use moves from the origin file
 system to the promoted clone, so enough space must be available to accommodate
 these snapshots. No new space is consumed by this operation, but the space
 accounting is adjusted. The promoted clone must not have any conflicting
 snapshot names of its own. The
 .Cm rename
 subcommand can be used to rename any conflicting snapshots.
 .It Xo
 .Nm
 .Cm rename
 .Op Fl f
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
 .Xc
 .It Xo
 .Nm
 .Cm rename
 .Op Fl f
 .Fl p
 .Ar filesystem Ns | Ns Ar volume
 .Ar filesystem Ns | Ns Ar volume
 .Xc
 .It Xo
 .Nm
 .Cm rename
 .Fl u
 .Op Fl p
 .Ar filesystem filesystem
 .Xc
 .Pp
 Renames the given dataset. The new target can be located anywhere in the
 .Tn ZFS
 hierarchy, with the exception of snapshots. Snapshots can only be renamed
 within the parent file system or volume. When renaming a snapshot, the parent
 file system of the snapshot does not need to be specified as part of the second
 argument. Renamed file systems can inherit new mount points, in which case they
 are unmounted and remounted at the new mount point.
 .Bl -tag -width indent
 .It Fl p
 Creates all the nonexistent parent datasets. Datasets created in this manner
 are automatically mounted according to the
 .Sy mountpoint
 property inherited from their parent.
 .It Fl u
 Do not remount file systems during rename. If a file system's
 .Sy mountpoint
 property is set to
 .Cm legacy
 or
 .Cm none ,
 file system is not unmounted even if this option is not given.
 .It Fl f
 Force unmount any filesystems that need to be unmounted in the process.
 This flag has no effect if used together with the
 .Fl u
 flag.
 .El
 .It Xo
 .Nm
 .Cm rename
 .Fl r
 .Ar snapshot snapshot
 .Xc
 .Pp
 Recursively rename the snapshots of all descendent datasets. Snapshots are the
 only dataset that can be renamed recursively.
 .It Xo
 .Nm
 .Cm list
 .Op Fl r Ns | Ns Fl d Ar depth
 .Op Fl Hp
 .Op Fl o Ar property Ns Oo , Ns Ar property Oc Ns ...
 .Op Fl t Ar type Ns Oo , Ns Ar type Oc Ns ...
 .Oo Fl s Ar property Oc Ns ...
 .Oo Fl S Ar property Oc Ns ...
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns ...
 .Xc
 .Pp
 Lists the property information for the given datasets in tabular form. If
 specified, you can list property information by the absolute pathname or the
 relative pathname. By default, all file systems and volumes are displayed.
 Snapshots are displayed if the
 .Sy listsnaps
 property is
 .Cm on
 (the default is
 .Cm off ) .
 The following fields are displayed,
 .Sy name , used , available , referenced , mountpoint .
 .Bl -tag -width indent
 .It Fl r
 Recursively display any children of the dataset on the command line.
 .It Fl d Ar depth
 Recursively display any children of the dataset, limiting the recursion to
 .Ar depth .
 A depth of
 .Sy 1
 will display only the dataset and its direct children.
 .It Fl H
 Used for scripting mode. Do not print headers and separate fields by a single
 tab instead of arbitrary white space.
 .It Fl p
 Display numbers in parsable (exact) values.
 .It Fl o Ar property Ns Oo , Ns Ar property Oc Ns ...
 A comma-separated list of properties to display. The property must be:
 .Bl -bullet -offset 2n
 .It
 One of the properties described in the
 .Qq Sx Native Properties
 section
 .It
 A user property
 .It
 The value
 .Cm name
 to display the dataset name
 .It
 The value
 .Cm space
 to display space usage properties on file systems and volumes. This is a
 shortcut for specifying
 .Fl o
 .Sy name,avail,used,usedsnap,usedds,usedrefreserv,usedchild
 .Fl t
 .Sy filesystem,volume
 syntax.
 .El
 .It Fl t Ar type Ns Oo , Ns Ar type Oc Ns ...
 A comma-separated list of types to display, where
 .Ar type
 is one of
 .Sy filesystem , snapshot , snap , volume , bookmark , No or Sy all .
 For example, specifying
 .Fl t Cm snapshot
 displays only snapshots.
 .It Fl s Ar property
 A property for sorting the output by column in ascending order based on the
 value of the property. The property must be one of the properties described in
 the
 .Qq Sx Properties
 section, or the special value
 .Cm name
 to sort by the dataset name. Multiple properties can be specified at one time
 using multiple
 .Fl s
 property options. Multiple
 .Fl s
 options are evaluated from left to right in decreasing order of importance.
 .Pp
 The following is a list of sorting criteria:
 .Bl -bullet -offset 2n
 .It
 Numeric types sort in numeric order.
 .It
 String types sort in alphabetical order.
 .It
 Types inappropriate for a row sort that row to the literal bottom, regardless
 of the specified ordering.
 .It
 If no sorting options are specified the existing behavior of
 .Qq Nm Cm list
 is preserved.
 .El
 .It Fl S Ar property
 Same as the
 .Fl s
 option, but sorts by property in descending order.
 .El
 .It Xo
 .Nm
 .Cm set
 .Ar property Ns = Ns Ar value Oo Ar property Ns = Ns Ar value Oc Ns ...
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
 .Xc
 .Pp
 Sets the property or list of properties to the given value(s) for each dataset.
 Only some properties can be edited. See the "Properties" section for more
 information on what properties can be set and acceptable values. Numeric values
 can be specified as exact values, or in a human-readable form with a suffix of
 .Sy B , K , M , G , T , P , E , Z
 (for bytes, kilobytes, megabytes, gigabytes, terabytes, petabytes, exabytes, or
 zettabytes, respectively). User properties can be set on snapshots. For more
 information, see the
 .Qq Sx User Properties
 section.
 .It Xo
 .Nm
 .Cm get
 .Op Fl r Ns | Ns Fl d Ar depth
 .Op Fl Hp
 .Op Fl o Ar all | field Ns Oo , Ns Ar field Oc Ns ...
 .Op Fl t Ar type Ns Oo , Ns Ar type Oc Ns ...
 .Op Fl s Ar source Ns Oo , Ns Ar source Oc Ns ...
 .Ar all | property Ns Oo , Ns Ar property Oc Ns ...
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns | Ns Ar bookmark Ns ...
 .Xc
 .Pp
 Displays properties for the given datasets. If no datasets are specified, then
 the command displays properties for all datasets on the system. For each
 property, the following columns are displayed:
 .Pp
 .Bl -hang -width "property" -offset indent -compact
 .It name
 Dataset name
 .It property
 Property name
 .It value
 Property value
 .It source
 Property source. Can either be local, default, temporary, inherited, received,
 or none
 (\&-).
 .El
 .Pp
 All columns except the
 .Sy RECEIVED
 column are displayed by default. The columns to display can be specified
 by using the
 .Fl o
 option. This command takes a comma-separated list of properties as described in
 the
 .Qq Sx Native Properties
 and
 .Qq Sx User Properties
 sections.
 .Pp
 The special value
 .Cm all
 can be used to display all properties that apply to the given dataset's type
 (filesystem, volume, snapshot, or bookmark).
 .Bl -tag -width indent
 .It Fl r
 Recursively display properties for any children.
 .It Fl d Ar depth
 Recursively display any children of the dataset, limiting the recursion to
 .Ar depth .
 A depth of
 .Sy 1
 will display only the dataset and its direct children.
 .It Fl H
 Display output in a form more easily parsed by scripts. Any headers are
 omitted, and fields are explicitly separated by a single tab instead of an
 arbitrary amount of space.
 .It Fl p
 Display numbers in parsable (exact) values.
 .It Fl o Cm all | Ar field Ns Oo , Ns Ar field Oc Ns ...
 A comma-separated list of columns to display. Supported values are
 .Sy name,property,value,received,source .
 Default values are
 .Sy name,property,value,source .
 The keyword
 .Cm all
 specifies all columns.
 .It Fl t Ar type Ns Oo , Ns Ar type Oc Ns ...
 A comma-separated list of types to display, where
 .Ar type
 is one of
 .Sy filesystem , snapshot , volume , No or Sy all .
 For example, specifying
 .Fl t Cm snapshot
 displays only snapshots.
 .It Fl s Ar source Ns Oo , Ns Ar source Oc Ns ...
 A comma-separated list of sources to display. Those properties coming from a
 source other than those in this list are ignored. Each source must be one of
 the following:
 .Sy local,default,inherited,temporary,received,none .
 The default value is all sources.
 .El
 .It Xo
 .Nm
 .Cm inherit
 .Op Fl rS
 .Ar property
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns ...
 .Xc
 .Pp
 Clears the specified property, causing it to be inherited from an ancestor,
 restored to default if no ancestor has the property set, or with the
 .Fl S
 option reverted to the received value if one exists.
 See the
 .Qq Sx Properties
 section for a listing of default values, and details on which properties can be
 inherited.
 .Bl -tag -width indent
 .It Fl r
 Recursively inherit the given property for all children.
 .It Fl S
 Revert the property to the received value if one exists; otherwise operate as
 if the
 .Fl S
 option was not specified.
 .El
 .It Xo
 .Nm
 .Cm remap
 .Ar filesystem Ns | Ns Ar volume
 .Xc
 .Pp
 Remap the indirect blocks in the given fileystem or volume so that they no
 longer reference blocks on previously removed vdevs and we can eventually
 shrink the size of the indirect mapping objects for the previously removed
 vdevs. Note that remapping all blocks might not be possible and that
 references from snapshots will still exist and cannot be remapped.
 .It Xo
 .Nm
 .Cm upgrade
 .Op Fl v
 .Xc
 .Pp
 Displays a list of file systems that are not the most recent version.
 .Bl -tag -width indent
 .It Fl v
 Displays
 .Tn ZFS
 filesystem versions supported by the current software. The current
 .Tn ZFS
 filesystem version and all previous supported versions are displayed, along
 with an explanation of the features provided with each version.
 .El
 .It Xo
 .Nm
 .Cm upgrade
 .Op Fl r
 .Op Fl V Ar version
 .Fl a | Ar filesystem
 .Xc
 .Pp
 Upgrades file systems to a new on-disk version. Once this is done, the file
 systems will no longer be accessible on systems running older versions of the
 software.
 .Qq Nm Cm send
 streams generated from new snapshots of these file systems cannot be accessed
 on systems running older versions of the software.
 .Pp
 In general, the file system version is independent of the pool version. See
 .Xr zpool 8
 for information on the
 .Nm zpool Cm upgrade
 command.
 .Pp
 In some cases, the file system version and the pool version are interrelated
 and the pool version must be upgraded before the file system version can be
 upgraded.
 .Bl -tag -width indent
 .It Fl r
 Upgrade the specified file system and all descendent file systems.
 .It Fl V Ar version
 Upgrade to the specified
 .Ar version .
 If the
 .Fl V
 flag is not specified, this command upgrades to the most recent version. This
 option can only be used to increase the version number, and only up to the most
 recent version supported by this software.
 .It Fl a
 Upgrade all file systems on all imported pools.
 .It Ar filesystem
 Upgrade the specified file system.
 .El
 .It Xo
 .Nm
 .Cm userspace
 .Op Fl Hinp
 .Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns ...
 .Oo Fl s Ar field Oc Ns ...
 .Oo Fl S Ar field Oc Ns ...
 .Op Fl t Ar type Ns Oo , Ns Ar type Oc Ns ...
 .Ar filesystem Ns | Ns Ar snapshot
 .Xc
 .Pp
 Displays space consumed by, and quotas on, each user in the specified
 filesystem or snapshot. This corresponds to the
 .Sy userused@ Ns Ar user
 and
 .Sy userquota@ Ns Ar user
 properties.
 .Bl -tag -width indent
 .It Fl n
 Print numeric ID instead of user/group name.
 .It Fl H
 Do not print headers, use tab-delimited output.
 .It Fl p
 Use exact (parsable) numeric output.
 .It Fl o Ar field Ns Oo , Ns Ar field Oc Ns ...
 Display only the specified fields from the following set:
 .Sy type,name,used,quota .
 The default is to display all fields.
 .It Fl s Ar field
 Sort output by this field. The
 .Fl s
 and
 .Fl S
 flags may be specified multiple times to sort first by one field, then by
 another. The default is
 .Fl s Cm type Fl s Cm name .
 .It Fl S Ar field
 Sort by this field in reverse order. See
 .Fl s .
 .It Fl t Ar type Ns Oo , Ns Ar type Oc Ns ...
 Print only the specified types from the following set:
 .Sy all,posixuser,smbuser,posixgroup,smbgroup .
 .Pp
 The default is
 .Fl t Cm posixuser,smbuser .
 .Pp
 The default can be changed to include group types.
 .It Fl i
 Translate SID to POSIX ID. This flag currently has no effect on
 .Fx .
 .El
 .It Xo
 .Nm
 .Cm groupspace
 .Op Fl Hinp
 .Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns ...
 .Oo Fl s Ar field Oc Ns ...
 .Oo Fl S Ar field Oc Ns ...
 .Op Fl t Ar type Ns Oo , Ns Ar type Oc Ns ...
 .Ar filesystem Ns | Ns Ar snapshot
 .Xc
 .Pp
 Displays space consumed by, and quotas on, each group in the specified
 filesystem or snapshot. This subcommand is identical to
 .Qq Nm Cm userspace ,
 except that the default types to display are
 .Fl t Sy posixgroup,smbgroup .
 .It Xo
 .Nm
 .Cm mount
 .Xc
 .Pp
 Displays all
 .Tn ZFS
 file systems currently mounted.
 .Bl -tag -width indent
 .It Fl f
 .El
 .It Xo
 .Nm
 .Cm mount
 .Op Fl vO
 .Op Fl o Ar property Ns Oo , Ns Ar property Oc Ns ...
 .Fl a | Ar filesystem
 .Xc
 .Pp
 Mounts
 .Tn ZFS
 file systems.
 .Bl -tag -width indent
 .It Fl v
 Report mount progress.
 .It Fl O
 Perform an overlay mount. Overlay mounts are not supported on
 .Fx .
 .It Fl o Ar property Ns Oo , Ns Ar property Oc Ns ...
 An optional, comma-separated list of mount options to use temporarily for the
 duration of the mount. See the
 .Qq Sx Temporary Mount Point Properties
 section for details.
 .It Fl a
 Mount all available
 .Tn ZFS
 file systems.
 This command may be executed on
 .Fx
 system startup by
 .Pa /etc/rc.d/zfs .
 For more information, see variable
 .Va zfs_enable
 in
 .Xr rc.conf 5 .
 .It Ar filesystem
 Mount the specified filesystem.
 .El
 .It Xo
 .Nm
 .Cm unmount Ns | Ns Cm umount
 .Op Fl f
 .Fl a | Ar filesystem Ns | Ns Ar mountpoint
 .Xc
 .Pp
 Unmounts currently mounted
 .Tn ZFS
 file systems.
 .Bl -tag -width indent
 .It Fl f
 Forcefully unmount the file system, even if it is currently in use.
 .It Fl a
 Unmount all available
 .Tn ZFS
 file systems.
 .It Ar filesystem | mountpoint
 Unmount the specified filesystem. The command can also be given a path to a
 .Tn ZFS
 file system mount point on the system.
 .El
 .It Xo
 .Nm
 .Cm share
 .Fl a | Ar filesystem
 .Xc
 .Pp
 Shares
 .Tn ZFS
 file systems that have the
 .Sy sharenfs
 property set.
 .Bl -tag -width indent
 .It Fl a
 Share all
 .Tn ZFS
 file systems that have the
 .Sy sharenfs
 property set.
 This command may be executed on
 .Fx
 system startup by
 .Pa /etc/rc.d/zfs .
 For more information, see variable
 .Va zfs_enable
 in
 .Xr rc.conf 5 .
 .It Ar filesystem
 Share the specified filesystem according to the
 .Tn sharenfs
 property. File systems are shared when the
 .Tn sharenfs
 property is set.
 .El
 .It Xo
 .Nm
 .Cm unshare
 .Fl a | Ar filesystem Ns | Ns Ar mountpoint
 .Xc
 .Pp
 Unshares
 .Tn ZFS
 file systems that have the
 .Tn sharenfs
 property set.
 .Bl -tag -width indent
 .It Fl a
 Unshares
 .Tn ZFS
 file systems that have the
 .Sy sharenfs
 property set.
 This command may be executed on
 .Fx
 system shutdown by
 .Pa /etc/rc.d/zfs .
 For more information, see variable
 .Va zfs_enable
 in
 .Xr rc.conf 5 .
 .It Ar filesystem | mountpoint
 Unshare the specified filesystem. The command can also be given a path to a
 .Tn ZFS
 file system shared on the system.
 .El
 .It Xo
 .Nm
 .Cm bookmark
 .Ar snapshot
 .Ar bookmark
 .Xc
 .Pp
 Creates a bookmark of the given snapshot.
 Bookmarks mark the point in time
 when the snapshot was created, and can be used as the incremental source for
 a
 .Qq Nm Cm send
 command.
 .Pp
 This feature must be enabled to be used.
 See
 .Xr zpool-features 7
 for details on ZFS feature flags and the
 .Sy bookmark
 feature.
 .It Xo
 .Nm
 .Cm send
 .Op Fl DLPRVcenpv
 .Op Fl i Ar snapshot | Fl I Ar snapshot
 .Ar snapshot
 .Xc
 .Pp
 Creates a stream representation of the last
 .Ar snapshot
 argument (not part of
 .Fl i
 or
 .Fl I )
 which is written to standard output. The output can be redirected to
 a file or to a different system (for example, using
 .Xr ssh 1 ) .
 By default, a full stream is generated.
 .Bl -tag -width indent
 .It Fl i Ar snapshot
 Generate an incremental stream from the first
 .Ar snapshot Pq the incremental source
 to the second
 .Ar snapshot Pq the incremental target .
 The incremental source can be specified as the last component of the
 snapshot name
 .Pq the Em @ No character and following
 and
 it is assumed to be from the same file system as the incremental target.
 .Pp
 If the destination is a clone, the source may be the origin snapshot, which
 must be fully specified (for example,
 .Cm pool/fs@origin ,
 not just
 .Cm @origin ) .
 .It Fl I Ar snapshot
 Generate a stream package that sends all intermediary snapshots from the first
 .Ar snapshot
 to the second
 .Ar snapshot .
 For example,
 .Ic -I @a fs@d
 is similar to
 .Ic -i @a fs@b; -i @b fs@c; -i @c fs@d .
 The incremental
 source may be specified as with the
 .Fl i
 option.
 .It Fl R, -replicate
 Generate a replication stream package, which will replicate the specified
 filesystem, and all descendent file systems, up to the named snapshot. When
 received, all properties, snapshots, descendent file systems, and clones are
 preserved.
 .Pp
 If the
 .Fl i
 or
 .Fl I
 flags are used in conjunction with the
 .Fl R
 flag, an incremental replication stream is generated. The current values of
 properties, and current snapshot and file system names are set when the stream
 is received. If the
 .Fl F
 flag is specified when this stream is received, snapshots and file systems that
 do not exist on the sending side are destroyed.
 .It Fl D, -dedup
 Generate a deduplicated stream. Blocks which would have been sent multiple
 times in the send stream will only be sent once.  The receiving system must
 also support this feature to receive a deduplicated stream.  This flag can
 be used regardless of the dataset's
 .Sy dedup
 property, but performance will be much better if the filesystem uses a
 dedup-capable checksum (eg.
 .Sy sha256 ) .
 .It Fl L, -large-block
 Generate a stream which may contain blocks larger than 128KB.
 This flag
 has no effect if the
 .Sy large_blocks
 pool feature is disabled, or if the
 .Sy recordsize
 property of this filesystem has never been set above 128KB.
 The receiving system must have the
 .Sy large_blocks
 pool feature enabled as well.
 See
 .Xr zpool-features 7
 for details on ZFS feature flags and the
 .Sy large_blocks
 feature.
 .It Fl e, -embed
 Generate a more compact stream by using WRITE_EMBEDDED records for blocks
 which are stored more compactly on disk by the
 .Sy embedded_data
 pool
 feature.
 This flag has no effect if the
 .Sy embedded_data
 feature is
 disabled.
 The receiving system must have the
 .Sy embedded_data
 feature
 enabled.
 If the
 .Sy lz4_compress
 feature is active on the sending system,
 then the receiving system must have that feature enabled as well.
 See
 .Xr zpool-features 7
 for details on ZFS feature flags and the
 .Sy embedded_data
 feature.
 .It Fl c, -compressed
 Generate a more compact stream by using compressed WRITE records for blocks
 which are compressed on disk and in memory (see the
 .Sy compression
 property for details).
 If the
 .Sy lz4_compress
 feature is active on the sending system, then the receiving system must have that
 feature enabled as well. If the
 .Sy large_blocks
 feature is enabled on the sending system but the
 .Fl L
 option is not supplied in conjunction with
 .Fl c
 then the data will be decompressed before sending so it can be split
 into smaller block sizes.
 .It Fl p, -props
 Include the dataset's properties in the stream. This flag is implicit when
 .Fl R
 is specified. The receiving system must also support this feature.
 .It Fl n, -dryrun
 Do a dry-run ("No-op") send.  Do not generate any actual send data.  This is
 useful in conjunction with the
 .Fl v
 or
 .Fl P
 flags to determine what data will be sent.
 In this case, the verbose output will be written to
 standard output (contrast with a non-dry-run, where the stream is written
 to standard output and the verbose output goes to standard error).
 .It Fl P, -parsable
 Print machine-parsable verbose information about the stream package generated.
 .It Fl v, -verbose
 Print verbose information about the stream package generated.
 This information includes a per-second report of how much data has been sent.
 .It Fl V
 Set the process title to a per-second report of how much data has been sent.
 .El
 .Pp
 The format of the stream is committed. You will be able to receive your streams
 on future versions of
 .Tn ZFS .
 .It Xo
 .Nm
 .Cm send
-.Op Fl Lce
+.Op Fl LPcenv
 .Op Fl i Ar snapshot Ns | Ns Ar bookmark
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
 .Xc
 .Pp
 Generate a send stream, which may be of a filesystem, and may be
 incremental from a bookmark.
 If the destination is a filesystem or volume,
 the pool must be read-only, or the filesystem must not be mounted.
 When the
 stream generated from a filesystem or volume is received, the default snapshot
 name will be
 .Pq --head-- .
 .Bl -tag -width indent
-.It Fl i Ar snapshot Ns | Ns bookmark
+.It Fl i Ar snapshot Ns | Ns Ar bookmark
 Generate an incremental send stream.
 The incremental source must be an earlier
 snapshot in the destination's history.
 It will commonly be an earlier
 snapshot in the destination's filesystem, in which case it can be
 specified as the last component of the name
 .Pq the Em # No or Em @ No character and following .
 .Pp
 If the incremental target is a clone, the incremental source can
 be the origin snapshot, or an earlier snapshot in the origin's filesystem,
 or the origin's origin, etc.
+.It Fl n, -dryrun
+Do a dry-run
+.Pq Qq No-op
+send.
+Do not generate any actual send data.
+This is useful in conjunction with the
+.Fl v
+or
+.Fl P
+flags to determine what data will be sent.
+In this case, the verbose output will be written to standard output
+.Po contrast with a non-dry-run, where the stream is written to standard output
+and the verbose output goes to standard error
+.Pc .
+.It Fl v, -verbose
+Print verbose information about the stream package generated.
+This information includes a per-second report of how much data has been sent.
 .It Fl L, -large-block
 Generate a stream which may contain blocks larger than 128KB.
 This flag
 has no effect if the
 .Sy large_blocks
 pool feature is disabled, or if the
 .Sy recordsize
 property of this filesystem has never been set above 128KB.
 The receiving system must have the
 .Sy large_blocks
 pool feature enabled as well.
 See
 .Xr zpool-features 7
 for details on ZFS feature flags and the
 .Sy large_blocks
 feature.
+.It Fl P, -parsable
+Print machine-parsable verbose information about the stream package generated.
 .It Fl c, -compressed
 Generate a more compact stream by using compressed WRITE records for blocks
 which are compressed on disk and in memory (see the
 .Sy compression
 property for details).  If the
 .Sy lz4_compress
 feature is active on the sending system, then the receiving system must have
 that feature enabled as well. If the
 .Sy large_blocks
 feature is enabled on the sending system but the
 .Fl L
 option is not supplied in conjunction with
 .Fl c
 then the data will be decompressed before sending so it can be split
 into smaller block sizes.
 .It Fl e, -embed
 Generate a more compact stream by using WRITE_EMBEDDED records for blocks
 which are stored more compactly on disk by the
 .Sy embedded_data
 pool
 feature.
 This flag has no effect if the
 .Sy embedded_data
 feature is
 disabled.
 The receiving system must have the
 .Sy embedded_data
 feature
 enabled.
 If the
 .Sy lz4_compress
 feature is active on the sending system,
 then the receiving system must have that feature enabled as well.
 See
 .Xr zpool-features 7
 for details on ZFS feature flags and the
 .Sy embedded_data
 feature.
 .El
 .It Xo
 .Nm
 .Cm send
 .Op Fl Penv
 .Fl t
 .Ar receive_resume_token
 .Xc
 Creates a send stream which resumes an interrupted receive.  The
 .Ar receive_resume_token
 is the value of this property on the filesystem
 or volume that was being received into.  See the documentation for
 .Sy zfs receive -s
 for more details.
 .It Xo
 .Nm
 .Cm receive Ns | Ns Cm recv
 .Op Fl vnsFu
 .Op Fl o Sy origin Ns = Ns Ar snapshot
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
 .Xc
 .It Xo
 .Nm
 .Cm receive Ns | Ns Cm recv
 .Op Fl vnsFu
 .Op Fl d | e
 .Op Fl o Sy origin Ns = Ns Ar snapshot
 .Ar filesystem
 .Xc
 .Pp
 Creates a snapshot whose contents are as specified in the stream provided on
 standard input. If a full stream is received, then a new file system is created
 as well. Streams are created using the
 .Qq Nm Cm send
 subcommand, which by default creates a full stream.
 .Qq Nm Cm recv
 can be used as an alias for
 .Qq Nm Cm receive .
 .Pp
 If an incremental stream is received, then the destination file system must
 already exist, and its most recent snapshot must match the incremental stream's
 source. For
 .Sy zvol Ns s,
 the destination device link is destroyed and recreated, which means the
 .Sy zvol
 cannot be accessed during the
 .Sy receive
 operation.
 .Pp
 When a snapshot replication package stream that is generated by using the
 .Qq Nm Cm send Fl R
 command is received, any snapshots that do not exist on the sending location
 are destroyed by using the
 .Qq Nm Cm destroy Fl d
 command.
 .Pp
 The name of the snapshot (and file system, if a full stream is received) that
 this subcommand creates depends on the argument type and the
 .Fl d
 or
 .Fl e
 option.
 .Pp
 If the argument is a snapshot name, the specified
 .Ar snapshot
 is created. If the argument is a file system or volume name, a snapshot with
 the same name as the sent snapshot is created within the specified
 .Ar filesystem
 or
 .Ar volume .
 If the
 .Fl d
 or
 .Fl e
 option is specified, the snapshot name is determined by appending the sent
 snapshot's name to the specified
 .Ar filesystem .
 If the
 .Fl d
 option is specified, all but the pool name of the sent snapshot path is
 appended (for example,
 .Sy b/c@1
 appended from sent snapshot
 .Sy a/b/c@1 ) ,
 and if the
 .Fl e
 option is specified, only the tail of the sent snapshot path is appended (for
 example,
 .Sy c@1
 appended from sent snapshot
 .Sy a/b/c@1 ) .
 In the case of
 .Fl d ,
 any file systems needed to replicate the path of the sent snapshot are created
 within the specified file system.
 .Bl -tag -width indent
 .It Fl d
 Use the full sent snapshot path without the first element (without pool name)
 to determine the name of the new snapshot as described in the paragraph above.
 .It Fl e
 Use only the last element of the sent snapshot path to determine the name of
 the new snapshot as described in the paragraph above.
 .It Fl u
 File system that is associated with the received stream is not mounted.
 .It Fl v
 Print verbose information about the stream and the time required to perform the
 receive operation.
 .It Fl n
 Do not actually receive the stream. This can be useful in conjunction with the
 .Fl v
 option to verify the name the receive operation would use.
 .It Fl o Sy origin Ns = Ns Ar snapshot
 Forces the stream to be received as a clone of the given snapshot.
 If the stream is a full send stream, this will create the filesystem
 described by the stream as a clone of the specified snapshot. Which
 snapshot was specified will not affect the success or failure of the
 receive, as long as the snapshot does exist.  If the stream is an
 incremental send stream, all the normal verification will be performed.
 .It Fl F
 Force a rollback of the file system to the most recent snapshot before
 performing the receive operation. If receiving an incremental replication
 stream (for example, one generated by
 .Qq Nm Cm send Fl R Bro Fl i | Fl I Brc ) ,
 destroy snapshots and file systems that do not exist on the sending side.
 .It Fl s
 If the receive is interrupted, save the partially received state, rather
 than deleting it.  Interruption may be due to premature termination of
 the stream
 .Po e.g. due to network failure or failure of the remote system
 if the stream is being read over a network connection
 .Pc ,
 a checksum error in the stream, termination of the
 .Nm zfs Cm receive
 process, or unclean shutdown of the system.
 .Pp
 The receive can be resumed with a stream generated by
 .Nm zfs Cm send Fl t Ar token ,
 where the
 .Ar token
 is the value of the
 .Sy receive_resume_token
 property of the filesystem or volume which is received into.
 .Pp
 To use this flag, the storage pool must have the
 .Sy extensible_dataset
 feature enabled.  See
 .Xr zpool-features 5
 for details on ZFS feature flags.
 .El
 .It Xo
 .Nm
 .Cm receive Ns | Ns Cm recv
 .Fl A
 .Ar filesystem Ns | Ns Ar volume
 .Xc
 Abort an interrupted
 .Nm zfs Cm receive Fl s ,
 deleting its saved partially received state.
 .It Xo
 .Nm
 .Cm allow
 .Ar filesystem Ns | Ns Ar volume
 .Xc
 .Pp
 Displays permissions that have been delegated on the specified filesystem or
 volume. See the other forms of
 .Qq Nm Cm allow
 for more information.
 .It Xo
 .Nm
 .Cm allow
 .Op Fl ldug
 .Ar user Ns | Ns Ar group Ns Oo Ns , Ns Ar user Ns | Ns Ar group Oc Ns ...
 .Ar perm Ns | Ns Ar @setname Ns
 .Oo Ns , Ns Ar perm Ns | Ns Ar @setname Oc Ns ...
 .Ar filesystem Ns | Ns Ar volume
 .Xc
 .It Xo
 .Nm
 .Cm allow
 .Op Fl ld
 .Fl e Ns | Ns Cm everyone
 .Ar perm Ns | Ns Ar @setname Ns Op Ns , Ns Ar perm Ns | Ns Ar @setname Ns
 .Ns ...
 .Ar filesystem Ns | Ns Ar volume
 .Xc
 .Pp
 Delegates
 .Tn ZFS
 administration permission for the file systems to non-privileged users.
 .Bl -tag -width indent
 .It Xo
 .Op Fl ug
 .Ar user Ns | Ns Ar group Ns Oo , Ar user Ns | Ns Ar group Oc Ns ...
 .Xc
 Specifies to whom the permissions are delegated. Multiple entities can be
 specified as a comma-separated list. If neither of the
 .Fl ug
 options are specified, then the argument is interpreted preferentially as the
 keyword
 .Cm everyone ,
 then as a user name, and lastly as a group name. To specify
 a user or group named
 .Qq everyone ,
 use the
 .Fl u
 or
 .Fl g
 options. To specify a group with the same name as a user, use the
 .Fl g
 option.
 .It Op Fl e Ns | Ns Cm everyone
 Specifies that the permissions be delegated to
 .Qq everyone .
 .It Xo
 .Ar perm Ns | Ns Ar @setname Ns Oo , Ns Ar perm Ns | Ns Ar @setname Oc Ns ...
 .Xc
 The permissions to delegate. Multiple permissions
 may be specified as a comma-separated list. Permission names are the same as
 .Tn ZFS
 subcommand and property names. See the property list below. Property set names,
 which begin with an at sign
 .Pq Sy @ ,
 may be specified. See the
 .Fl s
 form below for details.
 .It Xo
 .Op Fl ld
 .Ar filesystem Ns | Ns Ar volume
 .Xc
 Specifies where the permissions are delegated. If neither of the
 .Fl ld
 options are specified, or both are, then the permissions are allowed for the
 file system or volume, and all of its descendents. If only the
 .Fl l
 option is used, then is allowed "locally" only for the specified file system.
 If only the
 .Fl d
 option is used, then is allowed only for the descendent file systems.
 .El
 .Pp
 Permissions are generally the ability to use a
 .Tn ZFS
 subcommand or change a
 .Tn ZFS
 property. The following permissions are available:
 .Bl -column -offset 4n "secondarycache" "subcommand"
 .It NAME Ta TYPE Ta NOTES
 .It allow Ta subcommand Ta Must Xo
 also have the permission that is being allowed
 .Xc
 .It clone Ta subcommand Ta Must Xo
 also have the 'create' ability and 'mount' ability in the origin file system
 .Xc
 .It create Ta subcommand Ta Must also have the 'mount' ability
 .It destroy Ta subcommand Ta Must also have the 'mount' ability
 .It diff Ta subcommand Ta Allows lookup of paths within a dataset given an
 object number, and the ability to create snapshots necessary to 'zfs diff'
 .It hold Ta subcommand Ta Allows adding a user hold to a snapshot
 .It mount Ta subcommand Ta Allows mount/umount of Tn ZFS No datasets
 .It promote Ta subcommand Ta Must Xo
 also have the 'mount' and 'promote' ability in the origin file system
 .Xc
 .It receive Ta subcommand Ta Must also have the 'mount' and 'create' ability
 .It release Ta subcommand Ta Allows Xo
 releasing a user hold which might destroy the snapshot
 .Xc
 .It rename Ta subcommand Ta Must Xo
 also have the 'mount' and 'create' ability in the new parent
 .Xc
 .It rollback Ta subcommand Ta Must also have the 'mount' ability
 .It send Ta subcommand
 .It share Ta subcommand Ta Allows Xo
 sharing file systems over the
 .Tn NFS
 protocol
 .Xc
 .It snapshot Ta subcommand Ta Must also have the 'mount' ability
 .It groupquota Ta other Ta Allows accessing any groupquota@... property
 .It groupused Ta other Ta Allows reading any groupused@... property
 .It userprop Ta other Ta Allows changing any user property
 .It userquota Ta other Ta Allows accessing any userquota@... property
 .It userused Ta other Ta Allows reading any userused@... property
 .It aclinherit Ta property
 .It aclmode Ta property
 .It atime Ta property
 .It canmount Ta property
 .It casesensitivity Ta property
 .It checksum Ta property
 .It compression Ta property
 .It copies Ta property
 .It dedup Ta property
 .It devices Ta property
 .It exec Ta property
 .It filesystem_limit Ta property
 .It logbias Ta property
 .It jailed Ta property
 .It mlslabel Ta property
 .It mountpoint Ta property
 .It nbmand Ta property
 .It normalization Ta property
 .It primarycache Ta property
 .It quota Ta property
 .It readonly Ta property
 .It recordsize Ta property
 .It refquota Ta property
 .It refreservation Ta property
 .It reservation Ta property
 .It secondarycache Ta property
 .It setuid Ta property
 .It sharenfs Ta property
 .It sharesmb Ta property
 .It snapdir Ta property
 .It snapshot_limit Ta property
 .It sync Ta property
 .It utf8only Ta property
 .It version Ta property
 .It volblocksize Ta property
 .It volsize Ta property
 .It vscan Ta property
 .It xattr Ta property
 .El
 .It Xo
 .Nm
 .Cm allow
 .Fl c
 .Ar perm Ns | Ns Ar @setname Ns Op Ns , Ns Ar perm Ns | Ns Ar @setname Ns
 .Ns ...
 .Ar filesystem Ns | Ns Ar volume
 .Xc
 .Pp
 Sets "create time" permissions. These permissions are granted (locally) to the
 creator of any newly-created descendent file system.
 .It Xo
 .Nm
 .Cm allow
 .Fl s
 .Ar @setname
 .Ar perm Ns | Ns Ar @setname Ns Op Ns , Ns Ar perm Ns | Ns Ar @setname Ns
 .Ns ...
 .Ar filesystem Ns | Ns Ar volume
 .Xc
 .Pp
 Defines or adds permissions to a permission set. The set can be used by other
 .Qq Nm Cm allow
 commands for the specified file system and its descendents. Sets are evaluated
 dynamically, so changes to a set are immediately reflected. Permission sets
 follow the same naming restrictions as ZFS file systems, but the name must
 begin with an "at sign"
 .Pq Sy @ ,
 and can be no more than 64 characters long.
 .It Xo
 .Nm
 .Cm unallow
 .Op Fl rldug
 .Ar user Ns | Ns Ar group Ns Oo Ns , Ns Ar user Ns | Ns Ar group Oc Ns ...
 .Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns
 .Ns ... Oc
 .Ar filesystem Ns | Ns Ar volume
 .Xc
 .It Xo
 .Nm
 .Cm unallow
 .Op Fl rld
 .Fl e Ns | Ns Cm everyone
 .Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns
 .Ns ... Oc
 .Ar filesystem Ns | Ns Ar volume
 .Xc
 .It Xo
 .Nm
 .Cm unallow
 .Op Fl r
 .Fl c
 .Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns
 .Ns ... Oc
 .Ar filesystem Ns | Ns Ar volume
 .Xc
 .Pp
 Removes permissions that were granted with the
 .Qq Nm Cm allow
 command. No permissions are explicitly denied, so other permissions granted are
 still in effect. For example, if the permission is granted by an ancestor. If
 no permissions are specified, then all permissions for the specified
 .Ar user , group , No or everyone
 are removed. Specifying
 .Cm everyone
 .Po or using the Fl e
 option
 .Pc only removes the permissions that were granted to everyone ,
 not all permissions for every user and group. See the
 .Qq Nm Cm allow
 command for a description of the
 .Fl ldugec
 options.
 .Bl -tag -width indent
 .It Fl r
 Recursively remove the permissions from this file system and all descendents.
 .El
 .It Xo
 .Nm
 .Cm unallow
 .Op Fl r
 .Fl s
 .Ar @setname
 .Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns
 .Ns ... Oc
 .Ar filesystem Ns | Ns Ar volume
 .Xc
 .Pp
 Removes permissions from a permission set. If no permissions are specified,
 then all permissions are removed, thus removing the set entirely.
 .It Xo
 .Nm
 .Cm hold
 .Op Fl r
 .Ar tag snapshot Ns ...
 .Xc
 .Pp
 Adds a single reference, named with the
 .Ar tag
 argument, to the specified snapshot or snapshots. Each snapshot has its own tag
 namespace, and tags must be unique within that space.
 .Pp
 If a hold exists on a snapshot, attempts to destroy that snapshot by using the
 .Qq Nm Cm destroy
 command returns
 .Em EBUSY .
 .Bl -tag -width indent
 .It Fl r
 Specifies that a hold with the given tag is applied recursively to the
 snapshots of all descendent file systems.
 .El
 .It Xo
 .Nm
 .Cm holds
 .Op Fl Hp
 .Op Fl r Ns | Ns Fl d Ar depth
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns
 .Ns ...
 .Xc
 .Pp
 Lists all existing user references for the given dataset or datasets.
 .Bl -tag -width indent
 .It Fl H
 Used for scripting mode. Do not print headers and separate fields by a single
 tab instead of arbitrary white space.
 .It Fl p
 Display numbers in parsable (exact) values.
 .It Fl r
 Lists the holds that are set on the descendent snapshots of the named datasets
 or snapshots, in addition to listing the holds on the named snapshots, if any.
 .It Fl d Ar depth
 Recursively display any holds on the named snapshots, or descendent snapshots of
 the named datasets or snapshots, limiting the recursion to
 .Ar depth .
 .El
 .It Xo
 .Nm
 .Cm release
 .Op Fl r
 .Ar tag snapshot Ns ...
 .Xc
 .Pp
 Removes a single reference, named with the
 .Ar tag
 argument, from the specified snapshot or snapshots. The tag must already exist
 for each snapshot.
 .Bl -tag -width indent
 .It Fl r
 Recursively releases a hold with the given tag on the snapshots of all
 descendent file systems.
 .El
 .It Xo
 .Nm
 .Cm diff
 .Op Fl FHt
 .Ar snapshot
 .Op Ar snapshot Ns | Ns Ar filesystem
 .Xc
 .Pp
 Display the difference between a snapshot of a given filesystem and another
 snapshot of that filesystem from a later time or the current contents of the
 filesystem.  The first column is a character indicating the type of change,
 the other columns indicate pathname, new pathname
 .Pq in case of rename ,
 change in link count, and optionally file type and/or change time.
 .Pp
 The types of change are:
 .Bl -column -offset 2n indent
 .It \&- Ta path was removed
 .It \&+ Ta path was added
 .It \&M Ta path was modified
 .It \&R Ta path was renamed
 .El
 .Bl -tag -width indent
 .It Fl F
 Display an indication of the type of file, in a manner similar to the
 .Fl F
 option of
 .Xr ls 1 .
 .Bl -column -offset 2n indent
 .It \&B Ta block device
 .It \&C Ta character device
 .It \&F Ta regular file
 .It \&/ Ta directory
 .It \&@ Ta symbolic link
 .It \&= Ta socket
 .It \&> Ta door (not supported on Fx )
 .It \&| Ta named pipe (not supported on Fx )
 .It \&P Ta event port (not supported on Fx )
 .El
 .It Fl H
 Give more parsable tab-separated output, without header lines and without
 arrows.
 .It Fl t
 Display the path's inode change time as the first column of output.
 .El
 .It Xo
 .Nm
 .Cm program
 .Op Fl jn
 .Op Fl t Ar timeout
 .Op Fl m Ar memory_limit
 .Ar pool script
 .Op Ar arg1 No ...
 .Xc
 .Pp
 Executes
 .Ar script
 as a ZFS channel program on
 .Ar pool .
 The ZFS channel
 program interface allows ZFS administrative operations to be run
 programmatically via a Lua script.
 The entire script is executed atomically, with no other administrative
 operations taking effect concurrently.
 A library of ZFS calls is made available to channel program scripts.
 Channel programs may only be run with root privileges.
 .Pp
 For full documentation of the ZFS channel program interface, see the manual
 page for
 .Xr zfs-program 8 .
 .Bl -tag -width indent
 .It Fl j
 Display channel program output in JSON format.
 When this flag is specified and standard output is empty -
 channel program encountered an error.
 The details of such an error will be printed to standard error in plain text.
 .It Fl n
 Executes a read-only channel program, which runs faster.
 The program cannot change on-disk state by calling functions from
 the zfs.sync submodule.
 The program can be used to gather information such as properties and
 determining if changes would succeed (zfs.check.*).
 Without this flag, all pending changes must be synced to disk before
 a channel program can complete.
 .It Fl t Ar timeout
 Execution time limit, in milliseconds.
 If a channel program executes for longer than the provided timeout, it will
 be stopped and an error will be returned.
 The default timeout is 1000 ms, and can be set to a maximum of 10000 ms.
 .It Fl m Ar memory-limit
 Memory limit, in bytes.
 If a channel program attempts to allocate more memory than the given limit,
 it will be stopped and an error returned.
 The default memory limit is 10 MB, and can be set to a maximum of 100 MB.
 .Pp
 All remaining argument strings are passed directly to the channel program as
 arguments.
 See
 .Xr zfs-program 8
 for more information.
 .El
 .It Xo
 .Nm
 .Cm jail
 .Ar jailid filesystem
 .Xc
 .Pp
 Attaches the specified
 .Ar filesystem
 to the jail identified by JID
 .Ar jailid .
 From now on this file system tree can be managed from within a jail if the
 .Sy jailed
 property has been set. To use this functionality, the jail needs the
 .Va allow.mount
 and
 .Va allow.mount.zfs
 parameters set to 1 and the
 .Va enforce_statfs
 parameter set to a value lower than 2.
 .Pp
 See
 .Xr jail 8
 for more information on managing jails and configuring the parameters above.
 .It Xo
 .Nm
 .Cm unjail
 .Ar jailid filesystem
 .Xc
 .Pp
 Detaches the specified
 .Ar filesystem
 from the jail identified by JID
 .Ar jailid .
 .El
 .Sh EXIT STATUS
 The following exit values are returned:
 .Bl -tag -offset 2n -width 2n
 .It 0
 Successful completion.
 .It 1
 An error occurred.
 .It 2
 Invalid command line options were specified.
 .El
 .Sh EXAMPLES
 .Bl -tag -width 0n
 .It Sy Example 1 No Creating a Tn ZFS No File System Hierarchy
 .Pp
 The following commands create a file system named
 .Em pool/home
 and a file system named
 .Em pool/home/bob .
 The mount point
 .Pa /home
 is set for the parent file system, and is automatically inherited by the child
 file system.
 .Bd -literal -offset 2n
 .Li # Ic zfs create pool/home
 .Li # Ic zfs set mountpoint=/home pool/home
 .Li # Ic zfs create pool/home/bob
 .Ed
 .It Sy Example 2 No Creating a Tn ZFS No Snapshot
 .Pp
 The following command creates a snapshot named
 .Sy yesterday .
 This snapshot is mounted on demand in the
 .Pa \&.zfs/snapshot
 directory at the root of the
 .Em pool/home/bob
 file system.
 .Bd -literal -offset 2n
 .Li # Ic zfs snapshot pool/home/bob@yesterday
 .Ed
 .It Sy Example 3 No Creating and Destroying Multiple Snapshots
 .Pp
 The following command creates snapshots named
 .Em yesterday
 of
 .Em pool/home
 and all of its descendent file systems. Each snapshot is mounted on demand in
 the
 .Pa \&.zfs/snapshot
 directory at the root of its file system. The second command destroys the newly
 created snapshots.
 .Bd -literal -offset 2n
 .Li # Ic zfs snapshot -r pool/home@yesterday
 .Li # Ic zfs destroy -r pool/home@yesterday
 .Ed
 .It Sy Example 4 No Disabling and Enabling File System Compression
 .Pp
 The following command disables the
 .Sy compression
 property for all file systems under
 .Em pool/home .
 The next command explicitly enables
 .Sy compression
 for
 .Em pool/home/anne .
 .Bd -literal -offset 2n
 .Li # Ic zfs set compression=off pool/home
 .Li # Ic zfs set compression=on pool/home/anne
 .Ed
 .It Sy Example 5 No Listing Tn ZFS No Datasets
 .Pp
 The following command lists all active file systems and volumes in the system.
 Snapshots are displayed if the
 .Sy listsnaps
 property is
 .Cm on .
 The default is
 .Cm off .
 See
 .Xr zpool 8
 for more information on pool properties.
 .Bd -literal -offset 2n
 .Li # Ic zfs list
    NAME                      USED  AVAIL  REFER  MOUNTPOINT
    pool                      450K   457G    18K  /pool
    pool/home                 315K   457G    21K  /home
    pool/home/anne             18K   457G    18K  /home/anne
    pool/home/bob             276K   457G   276K  /home/bob
 .Ed
 .It Sy Example 6 No Setting a Quota on a Tn ZFS No File System
 .Pp
 The following command sets a quota of 50 Gbytes for
 .Em pool/home/bob .
 .Bd -literal -offset 2n
 .Li # Ic zfs set quota=50G pool/home/bob
 .Ed
 .It Sy Example 7 No Listing Tn ZFS No Properties
 .Pp
 The following command lists all properties for
 .Em pool/home/bob .
 .Bd -literal -offset 2n
 .Li # Ic zfs get all pool/home/bob
 NAME           PROPERTY              VALUE                  SOURCE
 pool/home/bob  type                  filesystem             -
 pool/home/bob  creation              Tue Jul 21 15:53 2009  -
 pool/home/bob  used                  21K                    -
 pool/home/bob  available             20.0G                  -
 pool/home/bob  referenced            21K                    -
 pool/home/bob  compressratio         1.00x                  -
 pool/home/bob  mounted               yes                    -
 pool/home/bob  quota                 20G                    local
 pool/home/bob  reservation           none                   default
 pool/home/bob  recordsize            128K                   default
 pool/home/bob  mountpoint            /home/bob              default
 pool/home/bob  sharenfs              off                    default
 pool/home/bob  checksum              on                     default
 pool/home/bob  compression           on                     local
 pool/home/bob  atime                 on                     default
 pool/home/bob  devices               on                     default
 pool/home/bob  exec                  on                     default
 pool/home/bob  filesystem_limit      none                   default
 pool/home/bob  setuid                on                     default
 pool/home/bob  readonly              off                    default
 pool/home/bob  jailed                off                    default
 pool/home/bob  snapdir               hidden                 default
 pool/home/bob  snapshot_limit        none                   default
 pool/home/bob  aclmode               discard                default
 pool/home/bob  aclinherit            restricted             default
 pool/home/bob  canmount              on                     default
 pool/home/bob  xattr                 on                     default
 pool/home/bob  copies                1                      default
 pool/home/bob  version               5                      -
 pool/home/bob  utf8only              off                    -
 pool/home/bob  normalization         none                   -
 pool/home/bob  casesensitivity       sensitive              -
 pool/home/bob  vscan                 off                    default
 pool/home/bob  nbmand                off                    default
 pool/home/bob  sharesmb              off                    default
 pool/home/bob  refquota              none                   default
 pool/home/bob  refreservation        none                   default
 pool/home/bob  primarycache          all                    default
 pool/home/bob  secondarycache        all                    default
 pool/home/bob  usedbysnapshots       0                      -
 pool/home/bob  usedbydataset         21K                    -
 pool/home/bob  usedbychildren        0                      -
 pool/home/bob  usedbyrefreservation  0                      -
 pool/home/bob  logbias               latency                default
 pool/home/bob  dedup                 off                    default
 pool/home/bob  mlslabel                                     -
 pool/home/bob  sync                  standard               default
 pool/home/bob  refcompressratio      1.00x                  -
 .Ed
 .Pp
 The following command gets a single property value.
 .Bd -literal -offset 2n
 .Li # Ic zfs get -H -o value compression pool/home/bob
 on
 .Ed
 .Pp
 The following command lists all properties with local settings for
 .Em pool/home/bob .
 .Bd -literal -offset 2n
 .Li # Ic zfs get -s local -o name,property,value all pool/home/bob
 NAME           PROPERTY              VALUE
 pool/home/bob  quota                 20G
 pool/home/bob  compression           on
 .Ed
 .It Sy Example 8 No Rolling Back a Tn ZFS No File System
 .Pp
 The following command reverts the contents of
 .Em pool/home/anne
 to the snapshot named
 .Em yesterday ,
 deleting all intermediate snapshots.
 .Bd -literal -offset 2n
 .Li # Ic zfs rollback -r pool/home/anne@yesterday
 .Ed
 .It Sy Example 9 No Creating a Tn ZFS No Clone
 .Pp
 The following command creates a writable file system whose initial contents are
 the same as
 .Em pool/home/bob@yesterday .
 .Bd -literal -offset 2n
 .Li # Ic zfs clone pool/home/bob@yesterday pool/clone
 .Ed
 .It Sy Example 10 No Promoting a Tn ZFS No Clone
 .Pp
 The following commands illustrate how to test out changes to a file system, and
 then replace the original file system with the changed one, using clones, clone
 promotion, and renaming:
 .Bd -literal -offset 2n
 .Li # Ic zfs create pool/project/production
 .Ed
 .Pp
 Populate
 .Pa /pool/project/production
 with data and continue with the following commands:
 .Bd -literal -offset 2n
 .Li # Ic zfs snapshot pool/project/production@today
 .Li # Ic zfs clone pool/project/production@today pool/project/beta
 .Ed
 .Pp
 Now make changes to
 .Pa /pool/project/beta
 and continue with the following commands:
 .Bd -literal -offset 2n
 .Li # Ic zfs promote pool/project/beta
 .Li # Ic zfs rename pool/project/production pool/project/legacy
 .Li # Ic zfs rename pool/project/beta pool/project/production
 .Ed
 .Pp
 Once the legacy version is no longer needed, it can be destroyed.
 .Bd -literal -offset 2n
 .Li # Ic zfs destroy pool/project/legacy
 .Ed
 .It Sy Example 11 No Inheriting Tn ZFS No Properties
 .Pp
 The following command causes
 .Em pool/home/bob
 and
 .Em pool/home/anne
 to inherit the
 .Sy checksum
 property from their parent.
 .Bd -literal -offset 2n
 .Li # Ic zfs inherit checksum pool/home/bob pool/home/anne
 .Ed
 .It Sy Example 12 No Remotely Replicating Tn ZFS No Data
 .Pp
 The following commands send a full stream and then an incremental stream to a
 remote machine, restoring them into
 .Sy poolB/received/fs@a
 and
 .Sy poolB/received/fs@b ,
 respectively.
 .Sy poolB
 must contain the file system
 .Sy poolB/received ,
 and must not initially contain
 .Sy poolB/received/fs .
 .Bd -literal -offset 2n
 .Li # Ic zfs send pool/fs@a | ssh host zfs receive poolB/received/fs@a
 .Li # Ic zfs send -i a pool/fs@b | ssh host zfs receive poolB/received/fs
 .Ed
 .It Xo
 .Sy Example 13
 Using the
 .Qq zfs receive -d
 Option
 .Xc
 .Pp
 The following command sends a full stream of
 .Sy poolA/fsA/fsB@snap
 to a remote machine, receiving it into
 .Sy poolB/received/fsA/fsB@snap .
 The
 .Sy fsA/fsB@snap
 portion of the received snapshot's name is determined from the name of the sent
 snapshot.
 .Sy poolB
 must contain the file system
 .Sy poolB/received .
 If
 .Sy poolB/received/fsA
 does not exist, it is created as an empty file system.
 .Bd -literal -offset 2n
 .Li # Ic zfs send poolA/fsA/fsB@snap | ssh host zfs receive -d poolB/received
 .Ed
 .It Sy Example 14 No Setting User Properties
 .Pp
 The following example sets the user-defined
 .Sy com.example:department
 property for a dataset.
 .Bd -literal -offset 2n
 .Li # Ic zfs set com.example:department=12345 tank/accounting
 .Ed
 .It Sy Example 15 No Performing a Rolling Snapshot
 .Pp
 The following example shows how to maintain a history of snapshots with a
 consistent naming scheme. To keep a week's worth of snapshots, the user
 destroys the oldest snapshot, renames the remaining snapshots, and then creates
 a new snapshot, as follows:
 .Bd -literal -offset 2n
 .Li # Ic zfs destroy -r pool/users@7daysago
 .Li # Ic zfs rename -r pool/users@6daysago @7daysago
 .Li # Ic zfs rename -r pool/users@5daysago @6daysago
 .Li # Ic zfs rename -r pool/users@4daysago @5daysago
 .Li # Ic zfs rename -r pool/users@3daysago @4daysago
 .Li # Ic zfs rename -r pool/users@2daysago @3daysago
 .Li # Ic zfs rename -r pool/users@yesterday @2daysago
 .Li # Ic zfs rename -r pool/users@today @yesterday
 .Li # Ic zfs snapshot -r pool/users@today
 .Ed
 .It Xo
 .Sy Example 16
 Setting
 .Qq sharenfs
 Property Options on a ZFS File System
 .Xc
 .Pp
 The following command shows how to set
 .Sy sharenfs
 property options to enable root access for a specific network on the
 .Em tank/home
 file system. The contents of the
 .Sy sharenfs
 property are valid
 .Xr exports 5
 options.
 .Bd -literal -offset 2n
 .Li # Ic zfs set sharenfs="maproot=root,network 192.168.0.0/24" tank/home
 .Ed
 .Pp
 Another way to write this command with the same result is:
 .Bd -literal -offset 2n
 .Li # Ic set zfs sharenfs="-maproot=root -network 192.168.0.0/24" tank/home
 .Ed
 .It Xo
 .Sy Example 17
 Delegating
 .Tn ZFS
 Administration Permissions on a
 .Tn ZFS
 Dataset
 .Xc
 .Pp
 The following example shows how to set permissions so that user
 .Em cindys
 can create, destroy, mount, and take snapshots on
 .Em tank/cindys .
 The permissions on
 .Em tank/cindys
 are also displayed.
 .Bd -literal -offset 2n
 .Li # Ic zfs allow cindys create,destroy,mount,snapshot tank/cindys
 .Li # Ic zfs allow tank/cindys
 ---- Permissions on tank/cindys --------------------------------------
 Local+Descendent permissions:
         user cindys create,destroy,mount,snapshot
 .Ed
 .It Sy Example 18 No Delegating Create Time Permissions on a Tn ZFS No Dataset
 .Pp
 The following example shows how to grant anyone in the group
 .Em staff
 to create file systems in
 .Em tank/users .
 This syntax also allows staff members to destroy their own file systems, but
 not destroy anyone else's file system. The permissions on
 .Em tank/users
 are also displayed.
 .Bd -literal -offset 2n
 .Li # Ic zfs allow staff create,mount tank/users
 .Li # Ic zfs allow -c destroy tank/users
 .Li # Ic zfs allow tank/users
 ---- Permissions on tank/users ---------------------------------------
 Permission sets:
         destroy
 Local+Descendent permissions:
         group staff create,mount
 .Ed
 .It Xo
 .Sy Example 19
 Defining and Granting a Permission Set on a
 .Tn ZFS
 Dataset
 .Xc
 .Pp
 The following example shows how to define and grant a permission set on the
 .Em tank/users
 file system. The permissions on
 .Em tank/users
 are also displayed.
 .Bd -literal -offset 2n
 .Li # Ic zfs allow -s @pset create,destroy,snapshot,mount tank/users
 .Li # Ic zfs allow staff @pset tank/users
 .Li # Ic zfs allow tank/users
 ---- Permissions on tank/users ---------------------------------------
 Permission sets:
         @pset create,destroy,mount,snapshot
 Local+Descendent permissions:
         group staff @pset
 .Ed
 .It Sy Example 20 No Delegating Property Permissions on a Tn ZFS No Dataset
 .Pp
 The following example shows to grant the ability to set quotas and reservations
 on the
 .Sy users/home
 file system. The permissions on
 .Sy users/home
 are also displayed.
 .Bd -literal -offset 2n
 .Li # Ic zfs allow cindys quota,reservation users/home
 .Li # Ic zfs allow users/home
 ---- Permissions on users/home ---------------------------------------
 Local+Descendent permissions:
         user cindys quota,reservation
 .Li # Ic su - cindys
 .Li cindys% Ic zfs set quota=10G users/home/marks
 .Li cindys% Ic zfs get quota users/home/marks
 NAME              PROPERTY  VALUE  SOURCE
 users/home/marks  quota     10G    local
 .Ed
 .It Sy Example 21 No Removing ZFS Delegated Permissions on a Tn ZFS No Dataset
 .Pp
 The following example shows how to remove the snapshot permission from the
 .Em staff
 group on the
 .Em tank/users
 file system. The permissions on
 .Em tank/users
 are also displayed.
 .Bd -literal -offset 2n
 .Li # Ic zfs unallow staff snapshot tank/users
 .Li # Ic zfs allow tank/users
 ---- Permissions on tank/users ---------------------------------------
 Permission sets:
         @pset create,destroy,mount,snapshot
 Local+Descendent permissions:
         group staff @pset
 .Ed
 .It Sy Example 22 Showing the differences between a snapshot and a ZFS Dataset
 .Pp
 The following example shows how to see what has changed between a prior
 snapshot of a ZFS Dataset and its current state.  The
 .Fl F
 option is used to indicate type information for the files affected.
 .Bd -literal -offset 2n
 .Li # Ic zfs diff tank/test@before tank/test
 M       /       /tank/test/
 M       F       /tank/test/linked      (+1)
 R       F       /tank/test/oldname -> /tank/test/newname
 -       F       /tank/test/deleted
 +       F       /tank/test/created
 M       F       /tank/test/modified
 .Ed
 .El
 .Sh SEE ALSO
 .Xr chmod 2 ,
 .Xr fsync 2 ,
 .Xr exports 5 ,
 .Xr fstab 5 ,
 .Xr rc.conf 5 ,
 .Xr jail 8 ,
 .Xr mount 8 ,
 .Xr umount 8 ,
 .Xr zpool 8
 .Sh AUTHORS
 This manual page is a
 .Xr mdoc 7
 reimplementation of the
 .Tn OpenSolaris
 manual page
 .Em zfs(1M) ,
 modified and customized for
 .Fx
 and licensed under the
 Common Development and Distribution License
 .Pq Tn CDDL .
 .Pp
 The
 .Xr mdoc 7
 implementation of this manual page was initially written by
 .An Martin Matuska Aq mm@FreeBSD.org .
Index: projects/clang900-import/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
===================================================================
--- projects/clang900-import/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c	(revision 352536)
+++ projects/clang900-import/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c	(revision 352537)
@@ -1,7546 +1,7537 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  * Copyright 2012 Milan Jurik. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
  * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
  * Copyright (c) 2013 Steven Hartland.  All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
  * Copyright 2016 Nexenta Systems, Inc.
  * Copyright (c) 2018 Datto Inc.
  */
 
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
 #include <getopt.h>
 #include <libgen.h>
 #include <libintl.h>
 #include <libuutil.h>
 #include <libnvpair.h>
 #include <locale.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <zone.h>
 #include <grp.h>
 #include <pwd.h>
 #include <signal.h>
 #include <sys/debug.h>
 #include <sys/list.h>
 #include <sys/mntent.h>
 #include <sys/mnttab.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
 #include <sys/fs/zfs.h>
 #include <sys/types.h>
 #include <time.h>
 #include <err.h>
 #include <jail.h>
 
 #include <libzfs.h>
 #include <libzfs_core.h>
 #include <zfs_prop.h>
 #include <zfs_deleg.h>
 #include <libuutil.h>
 #ifdef illumos
 #include <aclutils.h>
 #include <directory.h>
 #include <idmap.h>
 #include <libshare.h>
 #endif
 
 #include "zfs_iter.h"
 #include "zfs_util.h"
 #include "zfs_comutil.h"
 
 libzfs_handle_t *g_zfs;
 
 static FILE *mnttab_file;
 static char history_str[HIS_MAX_RECORD_LEN];
 static boolean_t log_history = B_TRUE;
 
 static int zfs_do_clone(int argc, char **argv);
 static int zfs_do_create(int argc, char **argv);
 static int zfs_do_destroy(int argc, char **argv);
 static int zfs_do_get(int argc, char **argv);
 static int zfs_do_inherit(int argc, char **argv);
 static int zfs_do_list(int argc, char **argv);
 static int zfs_do_mount(int argc, char **argv);
 static int zfs_do_rename(int argc, char **argv);
 static int zfs_do_rollback(int argc, char **argv);
 static int zfs_do_set(int argc, char **argv);
 static int zfs_do_upgrade(int argc, char **argv);
 static int zfs_do_snapshot(int argc, char **argv);
 static int zfs_do_unmount(int argc, char **argv);
 static int zfs_do_share(int argc, char **argv);
 static int zfs_do_unshare(int argc, char **argv);
 static int zfs_do_send(int argc, char **argv);
 static int zfs_do_receive(int argc, char **argv);
 static int zfs_do_promote(int argc, char **argv);
 static int zfs_do_userspace(int argc, char **argv);
 static int zfs_do_allow(int argc, char **argv);
 static int zfs_do_unallow(int argc, char **argv);
 static int zfs_do_hold(int argc, char **argv);
 static int zfs_do_holds(int argc, char **argv);
 static int zfs_do_release(int argc, char **argv);
 static int zfs_do_diff(int argc, char **argv);
 static int zfs_do_jail(int argc, char **argv);
 static int zfs_do_unjail(int argc, char **argv);
 static int zfs_do_bookmark(int argc, char **argv);
 static int zfs_do_remap(int argc, char **argv);
 static int zfs_do_channel_program(int argc, char **argv);
 
 /*
  * Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
  */
 
 #ifdef DEBUG
 const char *
 _umem_debug_init(void)
 {
 	return ("default,verbose"); /* $UMEM_DEBUG setting */
 }
 
 const char *
 _umem_logging_init(void)
 {
 	return ("fail,contents"); /* $UMEM_LOGGING setting */
 }
 #endif
 
 typedef enum {
 	HELP_CLONE,
 	HELP_CREATE,
 	HELP_DESTROY,
 	HELP_GET,
 	HELP_INHERIT,
 	HELP_UPGRADE,
 	HELP_JAIL,
 	HELP_UNJAIL,
 	HELP_LIST,
 	HELP_MOUNT,
 	HELP_PROMOTE,
 	HELP_RECEIVE,
 	HELP_RENAME,
 	HELP_ROLLBACK,
 	HELP_SEND,
 	HELP_SET,
 	HELP_SHARE,
 	HELP_SNAPSHOT,
 	HELP_UNMOUNT,
 	HELP_UNSHARE,
 	HELP_ALLOW,
 	HELP_UNALLOW,
 	HELP_USERSPACE,
 	HELP_GROUPSPACE,
 	HELP_HOLD,
 	HELP_HOLDS,
 	HELP_RELEASE,
 	HELP_DIFF,
 	HELP_REMAP,
 	HELP_BOOKMARK,
 	HELP_CHANNEL_PROGRAM,
 } zfs_help_t;
 
 typedef struct zfs_command {
 	const char	*name;
 	int		(*func)(int argc, char **argv);
 	zfs_help_t	usage;
 } zfs_command_t;
 
 /*
  * Master command table.  Each ZFS command has a name, associated function, and
  * usage message.  The usage messages need to be internationalized, so we have
  * to have a function to return the usage message based on a command index.
  *
  * These commands are organized according to how they are displayed in the usage
  * message.  An empty command (one with a NULL name) indicates an empty line in
  * the generic usage message.
  */
 static zfs_command_t command_table[] = {
 	{ "create",	zfs_do_create,		HELP_CREATE		},
 	{ "destroy",	zfs_do_destroy,		HELP_DESTROY		},
 	{ NULL },
 	{ "snapshot",	zfs_do_snapshot,	HELP_SNAPSHOT		},
 	{ "rollback",	zfs_do_rollback,	HELP_ROLLBACK		},
 	{ "clone",	zfs_do_clone,		HELP_CLONE		},
 	{ "promote",	zfs_do_promote,		HELP_PROMOTE		},
 	{ "rename",	zfs_do_rename,		HELP_RENAME		},
 	{ "bookmark",	zfs_do_bookmark,	HELP_BOOKMARK		},
 	{ "program",    zfs_do_channel_program, HELP_CHANNEL_PROGRAM    },
 	{ NULL },
 	{ "list",	zfs_do_list,		HELP_LIST		},
 	{ NULL },
 	{ "set",	zfs_do_set,		HELP_SET		},
 	{ "get",	zfs_do_get,		HELP_GET		},
 	{ "inherit",	zfs_do_inherit,		HELP_INHERIT		},
 	{ "upgrade",	zfs_do_upgrade,		HELP_UPGRADE		},
 	{ "userspace",	zfs_do_userspace,	HELP_USERSPACE		},
 	{ "groupspace",	zfs_do_userspace,	HELP_GROUPSPACE		},
 	{ NULL },
 	{ "mount",	zfs_do_mount,		HELP_MOUNT		},
 	{ "unmount",	zfs_do_unmount,		HELP_UNMOUNT		},
 	{ "share",	zfs_do_share,		HELP_SHARE		},
 	{ "unshare",	zfs_do_unshare,		HELP_UNSHARE		},
 	{ NULL },
 	{ "send",	zfs_do_send,		HELP_SEND		},
 	{ "receive",	zfs_do_receive,		HELP_RECEIVE		},
 	{ NULL },
 	{ "allow",	zfs_do_allow,		HELP_ALLOW		},
 	{ NULL },
 	{ "unallow",	zfs_do_unallow,		HELP_UNALLOW		},
 	{ NULL },
 	{ "hold",	zfs_do_hold,		HELP_HOLD		},
 	{ "holds",	zfs_do_holds,		HELP_HOLDS		},
 	{ "release",	zfs_do_release,		HELP_RELEASE		},
 	{ "diff",	zfs_do_diff,		HELP_DIFF		},
 	{ NULL },
 	{ "jail",	zfs_do_jail,		HELP_JAIL		},
 	{ "unjail",	zfs_do_unjail,		HELP_UNJAIL		},
 	{ "remap",	zfs_do_remap,		HELP_REMAP		},
 };
 
 #define	NCOMMAND	(sizeof (command_table) / sizeof (command_table[0]))
 
 zfs_command_t *current_command;
 
 static const char *
 get_usage(zfs_help_t idx)
 {
 	switch (idx) {
 	case HELP_CLONE:
 		return (gettext("\tclone [-p] [-o property=value] ... "
 		    "<snapshot> <filesystem|volume>\n"));
 	case HELP_CREATE:
 		return (gettext("\tcreate [-pu] [-o property=value] ... "
 		    "<filesystem>\n"
 		    "\tcreate [-ps] [-b blocksize] [-o property=value] ... "
 		    "-V <size> <volume>\n"));
 	case HELP_DESTROY:
 		return (gettext("\tdestroy [-fnpRrv] <filesystem|volume>\n"
 		    "\tdestroy [-dnpRrv] "
 		    "<filesystem|volume>@<snap>[%<snap>][,...]\n"
 		    "\tdestroy <filesystem|volume>#<bookmark>\n"));
 	case HELP_GET:
 		return (gettext("\tget [-rHp] [-d max] "
 		    "[-o \"all\" | field[,...]]\n"
 		    "\t    [-t type[,...]] [-s source[,...]]\n"
 		    "\t    <\"all\" | property[,...]> "
 		    "[filesystem|volume|snapshot|bookmark] ...\n"));
 	case HELP_INHERIT:
 		return (gettext("\tinherit [-rS] <property> "
 		    "<filesystem|volume|snapshot> ...\n"));
 	case HELP_UPGRADE:
 		return (gettext("\tupgrade [-v]\n"
 		    "\tupgrade [-r] [-V version] <-a | filesystem ...>\n"));
 	case HELP_JAIL:
 		return (gettext("\tjail <jailid|jailname> <filesystem>\n"));
 	case HELP_UNJAIL:
 		return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
 	case HELP_LIST:
 		return (gettext("\tlist [-Hp] [-r|-d max] [-o property[,...]] "
 		    "[-s property]...\n\t    [-S property]... [-t type[,...]] "
 		    "[filesystem|volume|snapshot] ...\n"));
 	case HELP_MOUNT:
 		return (gettext("\tmount\n"
 		    "\tmount [-vO] [-o opts] <-a | filesystem>\n"));
 	case HELP_PROMOTE:
 		return (gettext("\tpromote <clone-filesystem>\n"));
 	case HELP_RECEIVE:
 		return (gettext("\treceive|recv [-vnsFu] <filesystem|volume|"
 		    "snapshot>\n"
 		    "\treceive|recv [-vnsFu] [-o origin=<snapshot>] [-d | -e] "
 		    "<filesystem>\n"
 		    "\treceive|recv -A <filesystem|volume>\n"));
 	case HELP_RENAME:
 		return (gettext("\trename [-f] <filesystem|volume|snapshot> "
 		    "<filesystem|volume|snapshot>\n"
 		    "\trename [-f] -p <filesystem|volume> <filesystem|volume>\n"
 		    "\trename -r <snapshot> <snapshot>\n"
 		    "\trename -u [-p] <filesystem> <filesystem>"));
 	case HELP_ROLLBACK:
 		return (gettext("\trollback [-rRf] <snapshot>\n"));
 	case HELP_SEND:
 		return (gettext("\tsend [-DnPpRvLec] [-[iI] snapshot] "
 		    "<snapshot>\n"
-		    "\tsend [-Le] [-i snapshot|bookmark] "
+		    "\tsend [-LPcenv] [-i snapshot|bookmark] "
 		    "<filesystem|volume|snapshot>\n"
 		    "\tsend [-nvPe] -t <receive_resume_token>\n"));
 	case HELP_SET:
 		return (gettext("\tset <property=value> ... "
 		    "<filesystem|volume|snapshot> ...\n"));
 	case HELP_SHARE:
 		return (gettext("\tshare <-a | filesystem>\n"));
 	case HELP_SNAPSHOT:
 		return (gettext("\tsnapshot|snap [-r] [-o property=value] ... "
 		    "<filesystem|volume>@<snap> ...\n"));
 	case HELP_UNMOUNT:
 		return (gettext("\tunmount|umount [-f] "
 		    "<-a | filesystem|mountpoint>\n"));
 	case HELP_UNSHARE:
 		return (gettext("\tunshare "
 		    "<-a | filesystem|mountpoint>\n"));
 	case HELP_ALLOW:
 		return (gettext("\tallow <filesystem|volume>\n"
 		    "\tallow [-ldug] "
 		    "<\"everyone\"|user|group>[,...] <perm|@setname>[,...]\n"
 		    "\t    <filesystem|volume>\n"
 		    "\tallow [-ld] -e <perm|@setname>[,...] "
 		    "<filesystem|volume>\n"
 		    "\tallow -c <perm|@setname>[,...] <filesystem|volume>\n"
 		    "\tallow -s @setname <perm|@setname>[,...] "
 		    "<filesystem|volume>\n"));
 	case HELP_UNALLOW:
 		return (gettext("\tunallow [-rldug] "
 		    "<\"everyone\"|user|group>[,...]\n"
 		    "\t    [<perm|@setname>[,...]] <filesystem|volume>\n"
 		    "\tunallow [-rld] -e [<perm|@setname>[,...]] "
 		    "<filesystem|volume>\n"
 		    "\tunallow [-r] -c [<perm|@setname>[,...]] "
 		    "<filesystem|volume>\n"
 		    "\tunallow [-r] -s @setname [<perm|@setname>[,...]] "
 		    "<filesystem|volume>\n"));
 	case HELP_USERSPACE:
 		return (gettext("\tuserspace [-Hinp] [-o field[,...]] "
 		    "[-s field] ...\n"
 		    "\t    [-S field] ... [-t type[,...]] "
 		    "<filesystem|snapshot>\n"));
 	case HELP_GROUPSPACE:
 		return (gettext("\tgroupspace [-Hinp] [-o field[,...]] "
 		    "[-s field] ...\n"
 		    "\t    [-S field] ... [-t type[,...]] "
 		    "<filesystem|snapshot>\n"));
 	case HELP_HOLD:
 		return (gettext("\thold [-r] <tag> <snapshot> ...\n"));
 	case HELP_HOLDS:
 		return (gettext("\tholds [-Hp] [-r|-d depth] "
 		    "<filesystem|volume|snapshot> ...\n"));
 	case HELP_RELEASE:
 		return (gettext("\trelease [-r] <tag> <snapshot> ...\n"));
 	case HELP_DIFF:
 		return (gettext("\tdiff [-FHt] <snapshot> "
 		    "[snapshot|filesystem]\n"));
 	case HELP_REMAP:
 		return (gettext("\tremap <filesystem | volume>\n"));
 	case HELP_BOOKMARK:
 		return (gettext("\tbookmark <snapshot> <bookmark>\n"));
 	case HELP_CHANNEL_PROGRAM:
 		return (gettext("\tprogram [-jn] [-t <instruction limit>] "
 		    "[-m <memory limit (b)>] <pool> <program file> "
 		    "[lua args...]\n"));
 	}
 
 	abort();
 	/* NOTREACHED */
 }
 
 void
 nomem(void)
 {
 	(void) fprintf(stderr, gettext("internal error: out of memory\n"));
 	exit(1);
 }
 
 /*
  * Utility function to guarantee malloc() success.
  */
 
 void *
 safe_malloc(size_t size)
 {
 	void *data;
 
 	if ((data = calloc(1, size)) == NULL)
 		nomem();
 
 	return (data);
 }
 
 void *
 safe_realloc(void *data, size_t size)
 {
 	void *newp;
 	if ((newp = realloc(data, size)) == NULL) {
 		free(data);
 		nomem();
 	}
 
 	return (newp);
 }
 
 static char *
 safe_strdup(char *str)
 {
 	char *dupstr = strdup(str);
 
 	if (dupstr == NULL)
 		nomem();
 
 	return (dupstr);
 }
 
 /*
  * Callback routine that will print out information for each of
  * the properties.
  */
 static int
 usage_prop_cb(int prop, void *cb)
 {
 	FILE *fp = cb;
 
 	(void) fprintf(fp, "\t%-15s ", zfs_prop_to_name(prop));
 
 	if (zfs_prop_readonly(prop))
 		(void) fprintf(fp, " NO    ");
 	else
 		(void) fprintf(fp, "YES    ");
 
 	if (zfs_prop_inheritable(prop))
 		(void) fprintf(fp, "  YES   ");
 	else
 		(void) fprintf(fp, "   NO   ");
 
 	if (zfs_prop_values(prop) == NULL)
 		(void) fprintf(fp, "-\n");
 	else
 		(void) fprintf(fp, "%s\n", zfs_prop_values(prop));
 
 	return (ZPROP_CONT);
 }
 
 /*
  * Display usage message.  If we're inside a command, display only the usage for
  * that command.  Otherwise, iterate over the entire command table and display
  * a complete usage message.
  */
 static void
 usage(boolean_t requested)
 {
 	int i;
 	boolean_t show_properties = B_FALSE;
 	FILE *fp = requested ? stdout : stderr;
 
 	if (current_command == NULL) {
 
 		(void) fprintf(fp, gettext("usage: zfs command args ...\n"));
 		(void) fprintf(fp,
 		    gettext("where 'command' is one of the following:\n\n"));
 
 		for (i = 0; i < NCOMMAND; i++) {
 			if (command_table[i].name == NULL)
 				(void) fprintf(fp, "\n");
 			else
 				(void) fprintf(fp, "%s",
 				    get_usage(command_table[i].usage));
 		}
 
 		(void) fprintf(fp, gettext("\nEach dataset is of the form: "
 		    "pool/[dataset/]*dataset[@name]\n"));
 	} else {
 		(void) fprintf(fp, gettext("usage:\n"));
 		(void) fprintf(fp, "%s", get_usage(current_command->usage));
 	}
 
 	if (current_command != NULL &&
 	    (strcmp(current_command->name, "set") == 0 ||
 	    strcmp(current_command->name, "get") == 0 ||
 	    strcmp(current_command->name, "inherit") == 0 ||
 	    strcmp(current_command->name, "list") == 0))
 		show_properties = B_TRUE;
 
 	if (show_properties) {
 		(void) fprintf(fp,
 		    gettext("\nThe following properties are supported:\n"));
 
 		(void) fprintf(fp, "\n\t%-14s %s  %s   %s\n\n",
 		    "PROPERTY", "EDIT", "INHERIT", "VALUES");
 
 		/* Iterate over all properties */
 		(void) zprop_iter(usage_prop_cb, fp, B_FALSE, B_TRUE,
 		    ZFS_TYPE_DATASET);
 
 		(void) fprintf(fp, "\t%-15s ", "userused@...");
 		(void) fprintf(fp, " NO       NO   <size>\n");
 		(void) fprintf(fp, "\t%-15s ", "groupused@...");
 		(void) fprintf(fp, " NO       NO   <size>\n");
 		(void) fprintf(fp, "\t%-15s ", "userquota@...");
 		(void) fprintf(fp, "YES       NO   <size> | none\n");
 		(void) fprintf(fp, "\t%-15s ", "groupquota@...");
 		(void) fprintf(fp, "YES       NO   <size> | none\n");
 		(void) fprintf(fp, "\t%-15s ", "written@<snap>");
 		(void) fprintf(fp, " NO       NO   <size>\n");
 
 		(void) fprintf(fp, gettext("\nSizes are specified in bytes "
 		    "with standard units such as K, M, G, etc.\n"));
 		(void) fprintf(fp, gettext("\nUser-defined properties can "
 		    "be specified by using a name containing a colon (:).\n"));
 		(void) fprintf(fp, gettext("\nThe {user|group}{used|quota}@ "
 		    "properties must be appended with\n"
 		    "a user or group specifier of one of these forms:\n"
 		    "    POSIX name      (eg: \"matt\")\n"
 		    "    POSIX id        (eg: \"126829\")\n"
 		    "    SMB name@domain (eg: \"matt@sun\")\n"
 		    "    SMB SID         (eg: \"S-1-234-567-89\")\n"));
 	} else {
 		(void) fprintf(fp,
 		    gettext("\nFor the property list, run: %s\n"),
 		    "zfs set|get");
 		(void) fprintf(fp,
 		    gettext("\nFor the delegated permission list, run: %s\n"),
 		    "zfs allow|unallow");
 	}
 
 	/*
 	 * See comments at end of main().
 	 */
 	if (getenv("ZFS_ABORT") != NULL) {
 		(void) printf("dumping core by request\n");
 		abort();
 	}
 
 	exit(requested ? 0 : 2);
 }
 
 /*
  * Take a property=value argument string and add it to the given nvlist.
  * Modifies the argument inplace.
  */
 static int
 parseprop(nvlist_t *props, char *propname)
 {
 	char *propval, *strval;
 
 	if ((propval = strchr(propname, '=')) == NULL) {
 		(void) fprintf(stderr, gettext("missing "
 		    "'=' for property=value argument\n"));
 		return (-1);
 	}
 	*propval = '\0';
 	propval++;
 	if (nvlist_lookup_string(props, propname, &strval) == 0) {
 		(void) fprintf(stderr, gettext("property '%s' "
 		    "specified multiple times\n"), propname);
 		return (-1);
 	}
 	if (nvlist_add_string(props, propname, propval) != 0)
 		nomem();
 	return (0);
 }
 
 static int
 parse_depth(char *opt, int *flags)
 {
 	char *tmp;
 	int depth;
 
 	depth = (int)strtol(opt, &tmp, 0);
 	if (*tmp) {
 		(void) fprintf(stderr,
 		    gettext("%s is not an integer\n"), opt);
 		usage(B_FALSE);
 	}
 	if (depth < 0) {
 		(void) fprintf(stderr,
 		    gettext("Depth can not be negative.\n"));
 		usage(B_FALSE);
 	}
 	*flags |= (ZFS_ITER_DEPTH_LIMIT|ZFS_ITER_RECURSE);
 	return (depth);
 }
 
 #define	PROGRESS_DELAY 2		/* seconds */
 
 static char *pt_reverse = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
 static time_t pt_begin;
 static char *pt_header = NULL;
 static boolean_t pt_shown;
 
 static void
 start_progress_timer(void)
 {
 	pt_begin = time(NULL) + PROGRESS_DELAY;
 	pt_shown = B_FALSE;
 }
 
 static void
 set_progress_header(char *header)
 {
 	assert(pt_header == NULL);
 	pt_header = safe_strdup(header);
 	if (pt_shown) {
 		(void) printf("%s: ", header);
 		(void) fflush(stdout);
 	}
 }
 
 static void
 update_progress(char *update)
 {
 	if (!pt_shown && time(NULL) > pt_begin) {
 		int len = strlen(update);
 
 		(void) printf("%s: %s%*.*s", pt_header, update, len, len,
 		    pt_reverse);
 		(void) fflush(stdout);
 		pt_shown = B_TRUE;
 	} else if (pt_shown) {
 		int len = strlen(update);
 
 		(void) printf("%s%*.*s", update, len, len, pt_reverse);
 		(void) fflush(stdout);
 	}
 }
 
 static void
 finish_progress(char *done)
 {
 	if (pt_shown) {
 		(void) printf("%s\n", done);
 		(void) fflush(stdout);
 	}
 	free(pt_header);
 	pt_header = NULL;
 }
 
 /*
  * Check if the dataset is mountable and should be automatically mounted.
  */
 static boolean_t
 should_auto_mount(zfs_handle_t *zhp)
 {
 	if (!zfs_prop_valid_for_type(ZFS_PROP_CANMOUNT, zfs_get_type(zhp)))
 		return (B_FALSE);
 	return (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON);
 }
 
 /*
  * zfs clone [-p] [-o prop=value] ... <snap> <fs | vol>
  *
  * Given an existing dataset, create a writable copy whose initial contents
  * are the same as the source.  The newly created dataset maintains a
  * dependency on the original; the original cannot be destroyed so long as
  * the clone exists.
  *
  * The '-p' flag creates all the non-existing ancestors of the target first.
  */
 static int
 zfs_do_clone(int argc, char **argv)
 {
 	zfs_handle_t *zhp = NULL;
 	boolean_t parents = B_FALSE;
 	nvlist_t *props;
 	int ret = 0;
 	int c;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	/* check options */
 	while ((c = getopt(argc, argv, "o:p")) != -1) {
 		switch (c) {
 		case 'o':
 			if (parseprop(props, optarg) != 0)
 				return (1);
 			break;
 		case 'p':
 			parents = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			goto usage;
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing source dataset "
 		    "argument\n"));
 		goto usage;
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing target dataset "
 		    "argument\n"));
 		goto usage;
 	}
 	if (argc > 2) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		goto usage;
 	}
 
 	/* open the source dataset */
 	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
 		return (1);
 
 	if (parents && zfs_name_valid(argv[1], ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_VOLUME)) {
 		/*
 		 * Now create the ancestors of the target dataset.  If the
 		 * target already exists and '-p' option was used we should not
 		 * complain.
 		 */
 		if (zfs_dataset_exists(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM |
 		    ZFS_TYPE_VOLUME))
 			return (0);
 		if (zfs_create_ancestors(g_zfs, argv[1]) != 0)
 			return (1);
 	}
 
 	/* pass to libzfs */
 	ret = zfs_clone(zhp, argv[1], props);
 
 	/* create the mountpoint if necessary */
 	if (ret == 0) {
 		zfs_handle_t *clone;
 
 		clone = zfs_open(g_zfs, argv[1], ZFS_TYPE_DATASET);
 		if (clone != NULL) {
 			/*
 			 * If the user doesn't want the dataset
 			 * automatically mounted, then skip the mount/share
 			 * step.
 			 */
 			if (should_auto_mount(clone)) {
 				if ((ret = zfs_mount(clone, NULL, 0)) != 0) {
 					(void) fprintf(stderr, gettext("clone "
 					    "successfully created, "
 					    "but not mounted\n"));
 				} else if ((ret = zfs_share(clone)) != 0) {
 					(void) fprintf(stderr, gettext("clone "
 					    "successfully created, "
 					    "but not shared\n"));
 				}
 			}
 			zfs_close(clone);
 		}
 	}
 
 	zfs_close(zhp);
 	nvlist_free(props);
 
 	return (!!ret);
 
 usage:
 	if (zhp)
 		zfs_close(zhp);
 	nvlist_free(props);
 	usage(B_FALSE);
 	return (-1);
 }
 
 /*
  * zfs create [-pu] [-o prop=value] ... fs
  * zfs create [-ps] [-b blocksize] [-o prop=value] ... -V vol size
  *
  * Create a new dataset.  This command can be used to create filesystems
  * and volumes.  Snapshot creation is handled by 'zfs snapshot'.
  * For volumes, the user must specify a size to be used.
  *
  * The '-s' flag applies only to volumes, and indicates that we should not try
  * to set the reservation for this volume.  By default we set a reservation
  * equal to the size for any volume.  For pools with SPA_VERSION >=
  * SPA_VERSION_REFRESERVATION, we set a refreservation instead.
  *
  * The '-p' flag creates all the non-existing ancestors of the target first.
  *
  * The '-u' flag prevents mounting of newly created file system.
  */
 static int
 zfs_do_create(int argc, char **argv)
 {
 	zfs_type_t type = ZFS_TYPE_FILESYSTEM;
 	zfs_handle_t *zhp = NULL;
 	uint64_t volsize = 0;
 	int c;
 	boolean_t noreserve = B_FALSE;
 	boolean_t bflag = B_FALSE;
 	boolean_t parents = B_FALSE;
 	boolean_t nomount = B_FALSE;
 	int ret = 1;
 	nvlist_t *props;
 	uint64_t intval;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":V:b:so:pu")) != -1) {
 		switch (c) {
 		case 'V':
 			type = ZFS_TYPE_VOLUME;
 			if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) {
 				(void) fprintf(stderr, gettext("bad volume "
 				    "size '%s': %s\n"), optarg,
 				    libzfs_error_description(g_zfs));
 				goto error;
 			}
 
 			if (nvlist_add_uint64(props,
 			    zfs_prop_to_name(ZFS_PROP_VOLSIZE), intval) != 0)
 				nomem();
 			volsize = intval;
 			break;
 		case 'p':
 			parents = B_TRUE;
 			break;
 		case 'b':
 			bflag = B_TRUE;
 			if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) {
 				(void) fprintf(stderr, gettext("bad volume "
 				    "block size '%s': %s\n"), optarg,
 				    libzfs_error_description(g_zfs));
 				goto error;
 			}
 
 			if (nvlist_add_uint64(props,
 			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
 			    intval) != 0)
 				nomem();
 			break;
 		case 'o':
 			if (parseprop(props, optarg) != 0)
 				goto error;
 			break;
 		case 's':
 			noreserve = B_TRUE;
 			break;
 		case 'u':
 			nomount = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing size "
 			    "argument\n"));
 			goto badusage;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			goto badusage;
 		}
 	}
 
 	if ((bflag || noreserve) && type != ZFS_TYPE_VOLUME) {
 		(void) fprintf(stderr, gettext("'-s' and '-b' can only be "
 		    "used when creating a volume\n"));
 		goto badusage;
 	}
 	if (nomount && type != ZFS_TYPE_FILESYSTEM) {
 		(void) fprintf(stderr, gettext("'-u' can only be "
 		    "used when creating a file system\n"));
 		goto badusage;
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc == 0) {
 		(void) fprintf(stderr, gettext("missing %s argument\n"),
 		    zfs_type_to_name(type));
 		goto badusage;
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		goto badusage;
 	}
 
 	if (type == ZFS_TYPE_VOLUME && !noreserve) {
 		zpool_handle_t *zpool_handle;
 		nvlist_t *real_props = NULL;
 		uint64_t spa_version;
 		char *p;
 		zfs_prop_t resv_prop;
 		char *strval;
 		char msg[1024];
 
 		if ((p = strchr(argv[0], '/')) != NULL)
 			*p = '\0';
 		zpool_handle = zpool_open(g_zfs, argv[0]);
 		if (p != NULL)
 			*p = '/';
 		if (zpool_handle == NULL)
 			goto error;
 		spa_version = zpool_get_prop_int(zpool_handle,
 		    ZPOOL_PROP_VERSION, NULL);
 		if (spa_version >= SPA_VERSION_REFRESERVATION)
 			resv_prop = ZFS_PROP_REFRESERVATION;
 		else
 			resv_prop = ZFS_PROP_RESERVATION;
 
 		(void) snprintf(msg, sizeof (msg),
 		    gettext("cannot create '%s'"), argv[0]);
 		if (props && (real_props = zfs_valid_proplist(g_zfs, type,
 		    props, 0, NULL, zpool_handle, msg)) == NULL) {
 			zpool_close(zpool_handle);
 			goto error;
 		}
 		zpool_close(zpool_handle);
 
 		volsize = zvol_volsize_to_reservation(volsize, real_props);
 		nvlist_free(real_props);
 
 		if (nvlist_lookup_string(props, zfs_prop_to_name(resv_prop),
 		    &strval) != 0) {
 			if (nvlist_add_uint64(props,
 			    zfs_prop_to_name(resv_prop), volsize) != 0) {
 				nvlist_free(props);
 				nomem();
 			}
 		}
 	}
 
 	if (parents && zfs_name_valid(argv[0], type)) {
 		/*
 		 * Now create the ancestors of target dataset.  If the target
 		 * already exists and '-p' option was used we should not
 		 * complain.
 		 */
 		if (zfs_dataset_exists(g_zfs, argv[0], type)) {
 			ret = 0;
 			goto error;
 		}
 		if (zfs_create_ancestors(g_zfs, argv[0]) != 0)
 			goto error;
 	}
 
 	/* pass to libzfs */
 	if (zfs_create(g_zfs, argv[0], type, props) != 0)
 		goto error;
 
 	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL)
 		goto error;
 
 	ret = 0;
 
 	/*
 	 * Mount and/or share the new filesystem as appropriate.  We provide a
 	 * verbose error message to let the user know that their filesystem was
 	 * in fact created, even if we failed to mount or share it.
 	 * If the user doesn't want the dataset automatically mounted,
 	 * then skip the mount/share step altogether.
 	 */
 	if (!nomount && should_auto_mount(zhp)) {
 		if (zfs_mount(zhp, NULL, 0) != 0) {
 			(void) fprintf(stderr, gettext("filesystem "
 			    "successfully created, but not mounted\n"));
 			ret = 1;
 		} else if (zfs_share(zhp) != 0) {
 			(void) fprintf(stderr, gettext("filesystem "
 			    "successfully created, but not shared\n"));
 			ret = 1;
 		}
 	}
 
 error:
 	if (zhp)
 		zfs_close(zhp);
 	nvlist_free(props);
 	return (ret);
 badusage:
 	nvlist_free(props);
 	usage(B_FALSE);
 	return (2);
 }
 
 /*
  * zfs destroy [-rRf] <fs, vol>
  * zfs destroy [-rRd] <snap>
  *
  *	-r	Recursively destroy all children
  *	-R	Recursively destroy all dependents, including clones
  *	-f	Force unmounting of any dependents
  *	-d	If we can't destroy now, mark for deferred destruction
  *
  * Destroys the given dataset.  By default, it will unmount any filesystems,
  * and refuse to destroy a dataset that has any dependents.  A dependent can
  * either be a child, or a clone of a child.
  */
 typedef struct destroy_cbdata {
 	boolean_t	cb_first;
 	boolean_t	cb_force;
 	boolean_t	cb_recurse;
 	boolean_t	cb_error;
 	boolean_t	cb_doclones;
 	zfs_handle_t	*cb_target;
 	boolean_t	cb_defer_destroy;
 	boolean_t	cb_verbose;
 	boolean_t	cb_parsable;
 	boolean_t	cb_dryrun;
 	nvlist_t	*cb_nvl;
 	nvlist_t	*cb_batchedsnaps;
 
 	/* first snap in contiguous run */
 	char		*cb_firstsnap;
 	/* previous snap in contiguous run */
 	char		*cb_prevsnap;
 	int64_t		cb_snapused;
 	char		*cb_snapspec;
 	char		*cb_bookmark;
 } destroy_cbdata_t;
 
 /*
  * Check for any dependents based on the '-r' or '-R' flags.
  */
 static int
 destroy_check_dependent(zfs_handle_t *zhp, void *data)
 {
 	destroy_cbdata_t *cbp = data;
 	const char *tname = zfs_get_name(cbp->cb_target);
 	const char *name = zfs_get_name(zhp);
 
 	if (strncmp(tname, name, strlen(tname)) == 0 &&
 	    (name[strlen(tname)] == '/' || name[strlen(tname)] == '@')) {
 		/*
 		 * This is a direct descendant, not a clone somewhere else in
 		 * the hierarchy.
 		 */
 		if (cbp->cb_recurse)
 			goto out;
 
 		if (cbp->cb_first) {
 			(void) fprintf(stderr, gettext("cannot destroy '%s': "
 			    "%s has children\n"),
 			    zfs_get_name(cbp->cb_target),
 			    zfs_type_to_name(zfs_get_type(cbp->cb_target)));
 			(void) fprintf(stderr, gettext("use '-r' to destroy "
 			    "the following datasets:\n"));
 			cbp->cb_first = B_FALSE;
 			cbp->cb_error = B_TRUE;
 		}
 
 		(void) fprintf(stderr, "%s\n", zfs_get_name(zhp));
 	} else {
 		/*
 		 * This is a clone.  We only want to report this if the '-r'
 		 * wasn't specified, or the target is a snapshot.
 		 */
 		if (!cbp->cb_recurse &&
 		    zfs_get_type(cbp->cb_target) != ZFS_TYPE_SNAPSHOT)
 			goto out;
 
 		if (cbp->cb_first) {
 			(void) fprintf(stderr, gettext("cannot destroy '%s': "
 			    "%s has dependent clones\n"),
 			    zfs_get_name(cbp->cb_target),
 			    zfs_type_to_name(zfs_get_type(cbp->cb_target)));
 			(void) fprintf(stderr, gettext("use '-R' to destroy "
 			    "the following datasets:\n"));
 			cbp->cb_first = B_FALSE;
 			cbp->cb_error = B_TRUE;
 			cbp->cb_dryrun = B_TRUE;
 		}
 
 		(void) fprintf(stderr, "%s\n", zfs_get_name(zhp));
 	}
 
 out:
 	zfs_close(zhp);
 	return (0);
 }
 
 static int
 destroy_callback(zfs_handle_t *zhp, void *data)
 {
 	destroy_cbdata_t *cb = data;
 	const char *name = zfs_get_name(zhp);
 
 	if (cb->cb_verbose) {
 		if (cb->cb_parsable) {
 			(void) printf("destroy\t%s\n", name);
 		} else if (cb->cb_dryrun) {
 			(void) printf(gettext("would destroy %s\n"),
 			    name);
 		} else {
 			(void) printf(gettext("will destroy %s\n"),
 			    name);
 		}
 	}
 
 	/*
 	 * Ignore pools (which we've already flagged as an error before getting
 	 * here).
 	 */
 	if (strchr(zfs_get_name(zhp), '/') == NULL &&
 	    zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
 		zfs_close(zhp);
 		return (0);
 	}
 	if (cb->cb_dryrun) {
 		zfs_close(zhp);
 		return (0);
 	}
 
 	/*
 	 * We batch up all contiguous snapshots (even of different
 	 * filesystems) and destroy them with one ioctl.  We can't
 	 * simply do all snap deletions and then all fs deletions,
 	 * because we must delete a clone before its origin.
 	 */
 	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) {
 		fnvlist_add_boolean(cb->cb_batchedsnaps, name);
 	} else {
 		int error = zfs_destroy_snaps_nvl(g_zfs,
 		    cb->cb_batchedsnaps, B_FALSE);
 		fnvlist_free(cb->cb_batchedsnaps);
 		cb->cb_batchedsnaps = fnvlist_alloc();
 
 		if (error != 0 ||
 		    zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 ||
 		    zfs_destroy(zhp, cb->cb_defer_destroy) != 0) {
 			zfs_close(zhp);
 			return (-1);
 		}
 	}
 
 	zfs_close(zhp);
 	return (0);
 }
 
 static int
 destroy_print_cb(zfs_handle_t *zhp, void *arg)
 {
 	destroy_cbdata_t *cb = arg;
 	const char *name = zfs_get_name(zhp);
 	int err = 0;
 
 	if (nvlist_exists(cb->cb_nvl, name)) {
 		if (cb->cb_firstsnap == NULL)
 			cb->cb_firstsnap = strdup(name);
 		if (cb->cb_prevsnap != NULL)
 			free(cb->cb_prevsnap);
 		/* this snap continues the current range */
 		cb->cb_prevsnap = strdup(name);
 		if (cb->cb_firstsnap == NULL || cb->cb_prevsnap == NULL)
 			nomem();
 		if (cb->cb_verbose) {
 			if (cb->cb_parsable) {
 				(void) printf("destroy\t%s\n", name);
 			} else if (cb->cb_dryrun) {
 				(void) printf(gettext("would destroy %s\n"),
 				    name);
 			} else {
 				(void) printf(gettext("will destroy %s\n"),
 				    name);
 			}
 		}
 	} else if (cb->cb_firstsnap != NULL) {
 		/* end of this range */
 		uint64_t used = 0;
 		err = lzc_snaprange_space(cb->cb_firstsnap,
 		    cb->cb_prevsnap, &used);
 		cb->cb_snapused += used;
 		free(cb->cb_firstsnap);
 		cb->cb_firstsnap = NULL;
 		free(cb->cb_prevsnap);
 		cb->cb_prevsnap = NULL;
 	}
 	zfs_close(zhp);
 	return (err);
 }
 
 static int
 destroy_print_snapshots(zfs_handle_t *fs_zhp, destroy_cbdata_t *cb)
 {
 	int err = 0;
 	assert(cb->cb_firstsnap == NULL);
 	assert(cb->cb_prevsnap == NULL);
 	err = zfs_iter_snapshots_sorted(fs_zhp, destroy_print_cb, cb);
 	if (cb->cb_firstsnap != NULL) {
 		uint64_t used = 0;
 		if (err == 0) {
 			err = lzc_snaprange_space(cb->cb_firstsnap,
 			    cb->cb_prevsnap, &used);
 		}
 		cb->cb_snapused += used;
 		free(cb->cb_firstsnap);
 		cb->cb_firstsnap = NULL;
 		free(cb->cb_prevsnap);
 		cb->cb_prevsnap = NULL;
 	}
 	return (err);
 }
 
 static int
 snapshot_to_nvl_cb(zfs_handle_t *zhp, void *arg)
 {
 	destroy_cbdata_t *cb = arg;
 	int err = 0;
 
 	/* Check for clones. */
 	if (!cb->cb_doclones && !cb->cb_defer_destroy) {
 		cb->cb_target = zhp;
 		cb->cb_first = B_TRUE;
 		err = zfs_iter_dependents(zhp, B_TRUE,
 		    destroy_check_dependent, cb);
 	}
 
 	if (err == 0) {
 		if (nvlist_add_boolean(cb->cb_nvl, zfs_get_name(zhp)))
 			nomem();
 	}
 	zfs_close(zhp);
 	return (err);
 }
 
 static int
 gather_snapshots(zfs_handle_t *zhp, void *arg)
 {
 	destroy_cbdata_t *cb = arg;
 	int err = 0;
 
 	err = zfs_iter_snapspec(zhp, cb->cb_snapspec, snapshot_to_nvl_cb, cb);
 	if (err == ENOENT)
 		err = 0;
 	if (err != 0)
 		goto out;
 
 	if (cb->cb_verbose) {
 		err = destroy_print_snapshots(zhp, cb);
 		if (err != 0)
 			goto out;
 	}
 
 	if (cb->cb_recurse)
 		err = zfs_iter_filesystems(zhp, gather_snapshots, cb);
 
 out:
 	zfs_close(zhp);
 	return (err);
 }
 
 static int
 destroy_clones(destroy_cbdata_t *cb)
 {
 	nvpair_t *pair;
 	for (pair = nvlist_next_nvpair(cb->cb_nvl, NULL);
 	    pair != NULL;
 	    pair = nvlist_next_nvpair(cb->cb_nvl, pair)) {
 		zfs_handle_t *zhp = zfs_open(g_zfs, nvpair_name(pair),
 		    ZFS_TYPE_SNAPSHOT);
 		if (zhp != NULL) {
 			boolean_t defer = cb->cb_defer_destroy;
 			int err = 0;
 
 			/*
 			 * We can't defer destroy non-snapshots, so set it to
 			 * false while destroying the clones.
 			 */
 			cb->cb_defer_destroy = B_FALSE;
 			err = zfs_iter_dependents(zhp, B_FALSE,
 			    destroy_callback, cb);
 			cb->cb_defer_destroy = defer;
 			zfs_close(zhp);
 			if (err != 0)
 				return (err);
 		}
 	}
 	return (0);
 }
 
 static int
 zfs_do_destroy(int argc, char **argv)
 {
 	destroy_cbdata_t cb = { 0 };
 	int rv = 0;
 	int err = 0;
 	int c;
 	zfs_handle_t *zhp = NULL;
 	char *at, *pound;
 	zfs_type_t type = ZFS_TYPE_DATASET;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "vpndfrR")) != -1) {
 		switch (c) {
 		case 'v':
 			cb.cb_verbose = B_TRUE;
 			break;
 		case 'p':
 			cb.cb_verbose = B_TRUE;
 			cb.cb_parsable = B_TRUE;
 			break;
 		case 'n':
 			cb.cb_dryrun = B_TRUE;
 			break;
 		case 'd':
 			cb.cb_defer_destroy = B_TRUE;
 			type = ZFS_TYPE_SNAPSHOT;
 			break;
 		case 'f':
 			cb.cb_force = B_TRUE;
 			break;
 		case 'r':
 			cb.cb_recurse = B_TRUE;
 			break;
 		case 'R':
 			cb.cb_recurse = B_TRUE;
 			cb.cb_doclones = B_TRUE;
 			break;
 		case '?':
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc == 0) {
 		(void) fprintf(stderr, gettext("missing dataset argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	at = strchr(argv[0], '@');
 	pound = strchr(argv[0], '#');
 	if (at != NULL) {
 
 		/* Build the list of snaps to destroy in cb_nvl. */
 		cb.cb_nvl = fnvlist_alloc();
 
 		*at = '\0';
 		zhp = zfs_open(g_zfs, argv[0],
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 		if (zhp == NULL)
 			return (1);
 
 		cb.cb_snapspec = at + 1;
 		if (gather_snapshots(zfs_handle_dup(zhp), &cb) != 0 ||
 		    cb.cb_error) {
 			rv = 1;
 			goto out;
 		}
 
 		if (nvlist_empty(cb.cb_nvl)) {
 			(void) fprintf(stderr, gettext("could not find any "
 			    "snapshots to destroy; check snapshot names.\n"));
 			rv = 1;
 			goto out;
 		}
 
 		if (cb.cb_verbose) {
 			char buf[16];
 			zfs_nicenum(cb.cb_snapused, buf, sizeof (buf));
 			if (cb.cb_parsable) {
 				(void) printf("reclaim\t%llu\n",
 				    cb.cb_snapused);
 			} else if (cb.cb_dryrun) {
 				(void) printf(gettext("would reclaim %s\n"),
 				    buf);
 			} else {
 				(void) printf(gettext("will reclaim %s\n"),
 				    buf);
 			}
 		}
 
 		if (!cb.cb_dryrun) {
 			if (cb.cb_doclones) {
 				cb.cb_batchedsnaps = fnvlist_alloc();
 				err = destroy_clones(&cb);
 				if (err == 0) {
 					err = zfs_destroy_snaps_nvl(g_zfs,
 					    cb.cb_batchedsnaps, B_FALSE);
 				}
 				if (err != 0) {
 					rv = 1;
 					goto out;
 				}
 			}
 			if (err == 0) {
 				err = zfs_destroy_snaps_nvl(g_zfs, cb.cb_nvl,
 				    cb.cb_defer_destroy);
 			}
 		}
 
 		if (err != 0)
 			rv = 1;
 	} else if (pound != NULL) {
 		int err;
 		nvlist_t *nvl;
 
 		if (cb.cb_dryrun) {
 			(void) fprintf(stderr,
 			    "dryrun is not supported with bookmark\n");
 			return (-1);
 		}
 
 		if (cb.cb_defer_destroy) {
 			(void) fprintf(stderr,
 			    "defer destroy is not supported with bookmark\n");
 			return (-1);
 		}
 
 		if (cb.cb_recurse) {
 			(void) fprintf(stderr,
 			    "recursive is not supported with bookmark\n");
 			return (-1);
 		}
 
 		if (!zfs_bookmark_exists(argv[0])) {
 			(void) fprintf(stderr, gettext("bookmark '%s' "
 			    "does not exist.\n"), argv[0]);
 			return (1);
 		}
 
 		nvl = fnvlist_alloc();
 		fnvlist_add_boolean(nvl, argv[0]);
 
 		err = lzc_destroy_bookmarks(nvl, NULL);
 		if (err != 0) {
 			(void) zfs_standard_error(g_zfs, err,
 			    "cannot destroy bookmark");
 		}
 
 		nvlist_free(cb.cb_nvl);
 
 		return (err);
 	} else {
 		/* Open the given dataset */
 		if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL)
 			return (1);
 
 		cb.cb_target = zhp;
 
 		/*
 		 * Perform an explicit check for pools before going any further.
 		 */
 		if (!cb.cb_recurse && strchr(zfs_get_name(zhp), '/') == NULL &&
 		    zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
 			(void) fprintf(stderr, gettext("cannot destroy '%s': "
 			    "operation does not apply to pools\n"),
 			    zfs_get_name(zhp));
 			(void) fprintf(stderr, gettext("use 'zfs destroy -r "
 			    "%s' to destroy all datasets in the pool\n"),
 			    zfs_get_name(zhp));
 			(void) fprintf(stderr, gettext("use 'zpool destroy %s' "
 			    "to destroy the pool itself\n"), zfs_get_name(zhp));
 			rv = 1;
 			goto out;
 		}
 
 		/*
 		 * Check for any dependents and/or clones.
 		 */
 		cb.cb_first = B_TRUE;
 		if (!cb.cb_doclones &&
 		    zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent,
 		    &cb) != 0) {
 			rv = 1;
 			goto out;
 		}
 
 		if (cb.cb_error) {
 			rv = 1;
 			goto out;
 		}
 
 		cb.cb_batchedsnaps = fnvlist_alloc();
 		if (zfs_iter_dependents(zhp, B_FALSE, destroy_callback,
 		    &cb) != 0) {
 			rv = 1;
 			goto out;
 		}
 
 		/*
 		 * Do the real thing.  The callback will close the
 		 * handle regardless of whether it succeeds or not.
 		 */
 		err = destroy_callback(zhp, &cb);
 		zhp = NULL;
 		if (err == 0) {
 			err = zfs_destroy_snaps_nvl(g_zfs,
 			    cb.cb_batchedsnaps, cb.cb_defer_destroy);
 		}
 		if (err != 0)
 			rv = 1;
 	}
 
 out:
 	fnvlist_free(cb.cb_batchedsnaps);
 	fnvlist_free(cb.cb_nvl);
 	if (zhp != NULL)
 		zfs_close(zhp);
 	return (rv);
 }
 
 static boolean_t
 is_recvd_column(zprop_get_cbdata_t *cbp)
 {
 	int i;
 	zfs_get_column_t col;
 
 	for (i = 0; i < ZFS_GET_NCOLS &&
 	    (col = cbp->cb_columns[i]) != GET_COL_NONE; i++)
 		if (col == GET_COL_RECVD)
 			return (B_TRUE);
 	return (B_FALSE);
 }
 
 /*
  * zfs get [-rHp] [-o all | field[,field]...] [-s source[,source]...]
  *	< all | property[,property]... > < fs | snap | vol > ...
  *
  *	-r	recurse over any child datasets
  *	-H	scripted mode.  Headers are stripped, and fields are separated
  *		by tabs instead of spaces.
  *	-o	Set of fields to display.  One of "name,property,value,
  *		received,source". Default is "name,property,value,source".
  *		"all" is an alias for all five.
  *	-s	Set of sources to allow.  One of
  *		"local,default,inherited,received,temporary,none".  Default is
  *		all six.
  *	-p	Display values in parsable (literal) format.
  *
  *  Prints properties for the given datasets.  The user can control which
  *  columns to display as well as which property types to allow.
  */
 
 /*
  * Invoked to display the properties for a single dataset.
  */
 static int
 get_callback(zfs_handle_t *zhp, void *data)
 {
 	char buf[ZFS_MAXPROPLEN];
 	char rbuf[ZFS_MAXPROPLEN];
 	zprop_source_t sourcetype;
 	char source[ZFS_MAX_DATASET_NAME_LEN];
 	zprop_get_cbdata_t *cbp = data;
 	nvlist_t *user_props = zfs_get_user_props(zhp);
 	zprop_list_t *pl = cbp->cb_proplist;
 	nvlist_t *propval;
 	char *strval;
 	char *sourceval;
 	boolean_t received = is_recvd_column(cbp);
 
 	for (; pl != NULL; pl = pl->pl_next) {
 		char *recvdval = NULL;
 		/*
 		 * Skip the special fake placeholder.  This will also skip over
 		 * the name property when 'all' is specified.
 		 */
 		if (pl->pl_prop == ZFS_PROP_NAME &&
 		    pl == cbp->cb_proplist)
 			continue;
 
 		if (pl->pl_prop != ZPROP_INVAL) {
 			if (zfs_prop_get(zhp, pl->pl_prop, buf,
 			    sizeof (buf), &sourcetype, source,
 			    sizeof (source),
 			    cbp->cb_literal) != 0) {
 				if (pl->pl_all)
 					continue;
 				if (!zfs_prop_valid_for_type(pl->pl_prop,
 				    ZFS_TYPE_DATASET)) {
 					(void) fprintf(stderr,
 					    gettext("No such property '%s'\n"),
 					    zfs_prop_to_name(pl->pl_prop));
 					continue;
 				}
 				sourcetype = ZPROP_SRC_NONE;
 				(void) strlcpy(buf, "-", sizeof (buf));
 			}
 
 			if (received && (zfs_prop_get_recvd(zhp,
 			    zfs_prop_to_name(pl->pl_prop), rbuf, sizeof (rbuf),
 			    cbp->cb_literal) == 0))
 				recvdval = rbuf;
 
 			zprop_print_one_property(zfs_get_name(zhp), cbp,
 			    zfs_prop_to_name(pl->pl_prop),
 			    buf, sourcetype, source, recvdval);
 		} else if (zfs_prop_userquota(pl->pl_user_prop)) {
 			sourcetype = ZPROP_SRC_LOCAL;
 
 			if (zfs_prop_get_userquota(zhp, pl->pl_user_prop,
 			    buf, sizeof (buf), cbp->cb_literal) != 0) {
 				sourcetype = ZPROP_SRC_NONE;
 				(void) strlcpy(buf, "-", sizeof (buf));
 			}
 
 			zprop_print_one_property(zfs_get_name(zhp), cbp,
 			    pl->pl_user_prop, buf, sourcetype, source, NULL);
 		} else if (zfs_prop_written(pl->pl_user_prop)) {
 			sourcetype = ZPROP_SRC_LOCAL;
 
 			if (zfs_prop_get_written(zhp, pl->pl_user_prop,
 			    buf, sizeof (buf), cbp->cb_literal) != 0) {
 				sourcetype = ZPROP_SRC_NONE;
 				(void) strlcpy(buf, "-", sizeof (buf));
 			}
 
 			zprop_print_one_property(zfs_get_name(zhp), cbp,
 			    pl->pl_user_prop, buf, sourcetype, source, NULL);
 		} else {
 			if (nvlist_lookup_nvlist(user_props,
 			    pl->pl_user_prop, &propval) != 0) {
 				if (pl->pl_all)
 					continue;
 				sourcetype = ZPROP_SRC_NONE;
 				strval = "-";
 			} else {
 				verify(nvlist_lookup_string(propval,
 				    ZPROP_VALUE, &strval) == 0);
 				verify(nvlist_lookup_string(propval,
 				    ZPROP_SOURCE, &sourceval) == 0);
 
 				if (strcmp(sourceval,
 				    zfs_get_name(zhp)) == 0) {
 					sourcetype = ZPROP_SRC_LOCAL;
 				} else if (strcmp(sourceval,
 				    ZPROP_SOURCE_VAL_RECVD) == 0) {
 					sourcetype = ZPROP_SRC_RECEIVED;
 				} else {
 					sourcetype = ZPROP_SRC_INHERITED;
 					(void) strlcpy(source,
 					    sourceval, sizeof (source));
 				}
 			}
 
 			if (received && (zfs_prop_get_recvd(zhp,
 			    pl->pl_user_prop, rbuf, sizeof (rbuf),
 			    cbp->cb_literal) == 0))
 				recvdval = rbuf;
 
 			zprop_print_one_property(zfs_get_name(zhp), cbp,
 			    pl->pl_user_prop, strval, sourcetype,
 			    source, recvdval);
 		}
 	}
 
 	return (0);
 }
 
 static int
 zfs_do_get(int argc, char **argv)
 {
 	zprop_get_cbdata_t cb = { 0 };
 	int i, c, flags = ZFS_ITER_ARGS_CAN_BE_PATHS;
 	int types = ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK;
 	char *value, *fields;
 	int ret = 0;
 	int limit = 0;
 	zprop_list_t fake_name = { 0 };
 
 	/*
 	 * Set up default columns and sources.
 	 */
 	cb.cb_sources = ZPROP_SRC_ALL;
 	cb.cb_columns[0] = GET_COL_NAME;
 	cb.cb_columns[1] = GET_COL_PROPERTY;
 	cb.cb_columns[2] = GET_COL_VALUE;
 	cb.cb_columns[3] = GET_COL_SOURCE;
 	cb.cb_type = ZFS_TYPE_DATASET;
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":d:o:s:rt:Hp")) != -1) {
 		switch (c) {
 		case 'p':
 			cb.cb_literal = B_TRUE;
 			break;
 		case 'd':
 			limit = parse_depth(optarg, &flags);
 			break;
 		case 'r':
 			flags |= ZFS_ITER_RECURSE;
 			break;
 		case 'H':
 			cb.cb_scripted = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case 'o':
 			/*
 			 * Process the set of columns to display.  We zero out
 			 * the structure to give us a blank slate.
 			 */
 			bzero(&cb.cb_columns, sizeof (cb.cb_columns));
 			i = 0;
 			while (*optarg != '\0') {
 				static char *col_subopts[] =
 				    { "name", "property", "value", "received",
 				    "source", "all", NULL };
 
 				if (i == ZFS_GET_NCOLS) {
 					(void) fprintf(stderr, gettext("too "
 					    "many fields given to -o "
 					    "option\n"));
 					usage(B_FALSE);
 				}
 
 				switch (getsubopt(&optarg, col_subopts,
 				    &value)) {
 				case 0:
 					cb.cb_columns[i++] = GET_COL_NAME;
 					break;
 				case 1:
 					cb.cb_columns[i++] = GET_COL_PROPERTY;
 					break;
 				case 2:
 					cb.cb_columns[i++] = GET_COL_VALUE;
 					break;
 				case 3:
 					cb.cb_columns[i++] = GET_COL_RECVD;
 					flags |= ZFS_ITER_RECVD_PROPS;
 					break;
 				case 4:
 					cb.cb_columns[i++] = GET_COL_SOURCE;
 					break;
 				case 5:
 					if (i > 0) {
 						(void) fprintf(stderr,
 						    gettext("\"all\" conflicts "
 						    "with specific fields "
 						    "given to -o option\n"));
 						usage(B_FALSE);
 					}
 					cb.cb_columns[0] = GET_COL_NAME;
 					cb.cb_columns[1] = GET_COL_PROPERTY;
 					cb.cb_columns[2] = GET_COL_VALUE;
 					cb.cb_columns[3] = GET_COL_RECVD;
 					cb.cb_columns[4] = GET_COL_SOURCE;
 					flags |= ZFS_ITER_RECVD_PROPS;
 					i = ZFS_GET_NCOLS;
 					break;
 				default:
 					(void) fprintf(stderr,
 					    gettext("invalid column name "
 					    "'%s'\n"), suboptarg);
 					usage(B_FALSE);
 				}
 			}
 			break;
 
 		case 's':
 			cb.cb_sources = 0;
 			while (*optarg != '\0') {
 				static char *source_subopts[] = {
 					"local", "default", "inherited",
 					"received", "temporary", "none",
 					NULL };
 
 				switch (getsubopt(&optarg, source_subopts,
 				    &value)) {
 				case 0:
 					cb.cb_sources |= ZPROP_SRC_LOCAL;
 					break;
 				case 1:
 					cb.cb_sources |= ZPROP_SRC_DEFAULT;
 					break;
 				case 2:
 					cb.cb_sources |= ZPROP_SRC_INHERITED;
 					break;
 				case 3:
 					cb.cb_sources |= ZPROP_SRC_RECEIVED;
 					break;
 				case 4:
 					cb.cb_sources |= ZPROP_SRC_TEMPORARY;
 					break;
 				case 5:
 					cb.cb_sources |= ZPROP_SRC_NONE;
 					break;
 				default:
 					(void) fprintf(stderr,
 					    gettext("invalid source "
 					    "'%s'\n"), suboptarg);
 					usage(B_FALSE);
 				}
 			}
 			break;
 
 		case 't':
 			types = 0;
 			flags &= ~ZFS_ITER_PROP_LISTSNAPS;
 			while (*optarg != '\0') {
 				static char *type_subopts[] = { "filesystem",
 				    "volume", "snapshot", "bookmark",
 				    "all", NULL };
 
 				switch (getsubopt(&optarg, type_subopts,
 				    &value)) {
 				case 0:
 					types |= ZFS_TYPE_FILESYSTEM;
 					break;
 				case 1:
 					types |= ZFS_TYPE_VOLUME;
 					break;
 				case 2:
 					types |= ZFS_TYPE_SNAPSHOT;
 					break;
 				case 3:
 					types |= ZFS_TYPE_BOOKMARK;
 					break;
 				case 4:
 					types = ZFS_TYPE_DATASET |
 					    ZFS_TYPE_BOOKMARK;
 					break;
 
 				default:
 					(void) fprintf(stderr,
 					    gettext("invalid type '%s'\n"),
 					    suboptarg);
 					usage(B_FALSE);
 				}
 			}
 			break;
 
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing property "
 		    "argument\n"));
 		usage(B_FALSE);
 	}
 
 	fields = argv[0];
 
 	if (zprop_get_list(g_zfs, fields, &cb.cb_proplist, ZFS_TYPE_DATASET)
 	    != 0)
 		usage(B_FALSE);
 
 	argc--;
 	argv++;
 
 	/*
 	 * As part of zfs_expand_proplist(), we keep track of the maximum column
 	 * width for each property.  For the 'NAME' (and 'SOURCE') columns, we
 	 * need to know the maximum name length.  However, the user likely did
 	 * not specify 'name' as one of the properties to fetch, so we need to
 	 * make sure we always include at least this property for
 	 * print_get_headers() to work properly.
 	 */
 	if (cb.cb_proplist != NULL) {
 		fake_name.pl_prop = ZFS_PROP_NAME;
 		fake_name.pl_width = strlen(gettext("NAME"));
 		fake_name.pl_next = cb.cb_proplist;
 		cb.cb_proplist = &fake_name;
 	}
 
 	cb.cb_first = B_TRUE;
 
 	/* run for each object */
 	ret = zfs_for_each(argc, argv, flags, types, NULL,
 	    &cb.cb_proplist, limit, get_callback, &cb);
 
 	if (cb.cb_proplist == &fake_name)
 		zprop_free_list(fake_name.pl_next);
 	else
 		zprop_free_list(cb.cb_proplist);
 
 	return (ret);
 }
 
 /*
  * inherit [-rS] <property> <fs|vol> ...
  *
  *	-r	Recurse over all children
  *	-S	Revert to received value, if any
  *
  * For each dataset specified on the command line, inherit the given property
  * from its parent.  Inheriting a property at the pool level will cause it to
  * use the default value.  The '-r' flag will recurse over all children, and is
  * useful for setting a property on a hierarchy-wide basis, regardless of any
  * local modifications for each dataset.
  */
 
 typedef struct inherit_cbdata {
 	const char *cb_propname;
 	boolean_t cb_received;
 } inherit_cbdata_t;
 
 static int
 inherit_recurse_cb(zfs_handle_t *zhp, void *data)
 {
 	inherit_cbdata_t *cb = data;
 	zfs_prop_t prop = zfs_name_to_prop(cb->cb_propname);
 
 	/*
 	 * If we're doing it recursively, then ignore properties that
 	 * are not valid for this type of dataset.
 	 */
 	if (prop != ZPROP_INVAL &&
 	    !zfs_prop_valid_for_type(prop, zfs_get_type(zhp)))
 		return (0);
 
 	return (zfs_prop_inherit(zhp, cb->cb_propname, cb->cb_received) != 0);
 }
 
 static int
 inherit_cb(zfs_handle_t *zhp, void *data)
 {
 	inherit_cbdata_t *cb = data;
 
 	return (zfs_prop_inherit(zhp, cb->cb_propname, cb->cb_received) != 0);
 }
 
 static int
 zfs_do_inherit(int argc, char **argv)
 {
 	int c;
 	zfs_prop_t prop;
 	inherit_cbdata_t cb = { 0 };
 	char *propname;
 	int ret = 0;
 	int flags = 0;
 	boolean_t received = B_FALSE;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "rS")) != -1) {
 		switch (c) {
 		case 'r':
 			flags |= ZFS_ITER_RECURSE;
 			break;
 		case 'S':
 			received = B_TRUE;
 			break;
 		case '?':
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing property argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing dataset argument\n"));
 		usage(B_FALSE);
 	}
 
 	propname = argv[0];
 	argc--;
 	argv++;
 
 	if ((prop = zfs_name_to_prop(propname)) != ZPROP_INVAL) {
 		if (zfs_prop_readonly(prop)) {
 			(void) fprintf(stderr, gettext(
 			    "%s property is read-only\n"),
 			    propname);
 			return (1);
 		}
 		if (!zfs_prop_inheritable(prop) && !received) {
 			(void) fprintf(stderr, gettext("'%s' property cannot "
 			    "be inherited\n"), propname);
 			if (prop == ZFS_PROP_QUOTA ||
 			    prop == ZFS_PROP_RESERVATION ||
 			    prop == ZFS_PROP_REFQUOTA ||
 			    prop == ZFS_PROP_REFRESERVATION) {
 				(void) fprintf(stderr, gettext("use 'zfs set "
 				    "%s=none' to clear\n"), propname);
 				(void) fprintf(stderr, gettext("use 'zfs "
 				    "inherit -S %s' to revert to received "
 				    "value\n"), propname);
 			}
 			return (1);
 		}
 		if (received && (prop == ZFS_PROP_VOLSIZE ||
 		    prop == ZFS_PROP_VERSION)) {
 			(void) fprintf(stderr, gettext("'%s' property cannot "
 			    "be reverted to a received value\n"), propname);
 			return (1);
 		}
 	} else if (!zfs_prop_user(propname)) {
 		(void) fprintf(stderr, gettext("invalid property '%s'\n"),
 		    propname);
 		usage(B_FALSE);
 	}
 
 	cb.cb_propname = propname;
 	cb.cb_received = received;
 
 	if (flags & ZFS_ITER_RECURSE) {
 		ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET,
 		    NULL, NULL, 0, inherit_recurse_cb, &cb);
 	} else {
 		ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET,
 		    NULL, NULL, 0, inherit_cb, &cb);
 	}
 
 	return (ret);
 }
 
 typedef struct upgrade_cbdata {
 	uint64_t cb_numupgraded;
 	uint64_t cb_numsamegraded;
 	uint64_t cb_numfailed;
 	uint64_t cb_version;
 	boolean_t cb_newer;
 	boolean_t cb_foundone;
 	char cb_lastfs[ZFS_MAX_DATASET_NAME_LEN];
 } upgrade_cbdata_t;
 
 static int
 same_pool(zfs_handle_t *zhp, const char *name)
 {
 	int len1 = strcspn(name, "/@");
 	const char *zhname = zfs_get_name(zhp);
 	int len2 = strcspn(zhname, "/@");
 
 	if (len1 != len2)
 		return (B_FALSE);
 	return (strncmp(name, zhname, len1) == 0);
 }
 
 static int
 upgrade_list_callback(zfs_handle_t *zhp, void *data)
 {
 	upgrade_cbdata_t *cb = data;
 	int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
 
 	/* list if it's old/new */
 	if ((!cb->cb_newer && version < ZPL_VERSION) ||
 	    (cb->cb_newer && version > ZPL_VERSION)) {
 		char *str;
 		if (cb->cb_newer) {
 			str = gettext("The following filesystems are "
 			    "formatted using a newer software version and\n"
 			    "cannot be accessed on the current system.\n\n");
 		} else {
 			str = gettext("The following filesystems are "
 			    "out of date, and can be upgraded.  After being\n"
 			    "upgraded, these filesystems (and any 'zfs send' "
 			    "streams generated from\n"
 			    "subsequent snapshots) will no longer be "
 			    "accessible by older software versions.\n\n");
 		}
 
 		if (!cb->cb_foundone) {
 			(void) puts(str);
 			(void) printf(gettext("VER  FILESYSTEM\n"));
 			(void) printf(gettext("---  ------------\n"));
 			cb->cb_foundone = B_TRUE;
 		}
 
 		(void) printf("%2u   %s\n", version, zfs_get_name(zhp));
 	}
 
 	return (0);
 }
 
 static int
 upgrade_set_callback(zfs_handle_t *zhp, void *data)
 {
 	upgrade_cbdata_t *cb = data;
 	int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
 	int needed_spa_version;
 	int spa_version;
 
 	if (zfs_spa_version(zhp, &spa_version) < 0)
 		return (-1);
 
 	needed_spa_version = zfs_spa_version_map(cb->cb_version);
 
 	if (needed_spa_version < 0)
 		return (-1);
 
 	if (spa_version < needed_spa_version) {
 		/* can't upgrade */
 		(void) printf(gettext("%s: can not be "
 		    "upgraded; the pool version needs to first "
 		    "be upgraded\nto version %d\n\n"),
 		    zfs_get_name(zhp), needed_spa_version);
 		cb->cb_numfailed++;
 		return (0);
 	}
 
 	/* upgrade */
 	if (version < cb->cb_version) {
 		char verstr[16];
 		(void) snprintf(verstr, sizeof (verstr),
 		    "%llu", cb->cb_version);
 		if (cb->cb_lastfs[0] && !same_pool(zhp, cb->cb_lastfs)) {
 			/*
 			 * If they did "zfs upgrade -a", then we could
 			 * be doing ioctls to different pools.  We need
 			 * to log this history once to each pool, and bypass
 			 * the normal history logging that happens in main().
 			 */
 			(void) zpool_log_history(g_zfs, history_str);
 			log_history = B_FALSE;
 		}
 		if (zfs_prop_set(zhp, "version", verstr) == 0)
 			cb->cb_numupgraded++;
 		else
 			cb->cb_numfailed++;
 		(void) strcpy(cb->cb_lastfs, zfs_get_name(zhp));
 	} else if (version > cb->cb_version) {
 		/* can't downgrade */
 		(void) printf(gettext("%s: can not be downgraded; "
 		    "it is already at version %u\n"),
 		    zfs_get_name(zhp), version);
 		cb->cb_numfailed++;
 	} else {
 		cb->cb_numsamegraded++;
 	}
 	return (0);
 }
 
 /*
  * zfs upgrade
  * zfs upgrade -v
  * zfs upgrade [-r] [-V <version>] <-a | filesystem>
  */
 static int
 zfs_do_upgrade(int argc, char **argv)
 {
 	boolean_t all = B_FALSE;
 	boolean_t showversions = B_FALSE;
 	int ret = 0;
 	upgrade_cbdata_t cb = { 0 };
 	int c;
 	int flags = ZFS_ITER_ARGS_CAN_BE_PATHS;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "rvV:a")) != -1) {
 		switch (c) {
 		case 'r':
 			flags |= ZFS_ITER_RECURSE;
 			break;
 		case 'v':
 			showversions = B_TRUE;
 			break;
 		case 'V':
 			if (zfs_prop_string_to_index(ZFS_PROP_VERSION,
 			    optarg, &cb.cb_version) != 0) {
 				(void) fprintf(stderr,
 				    gettext("invalid version %s\n"), optarg);
 				usage(B_FALSE);
 			}
 			break;
 		case 'a':
 			all = B_TRUE;
 			break;
 		case '?':
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if ((!all && !argc) && ((flags & ZFS_ITER_RECURSE) | cb.cb_version))
 		usage(B_FALSE);
 	if (showversions && (flags & ZFS_ITER_RECURSE || all ||
 	    cb.cb_version || argc))
 		usage(B_FALSE);
 	if ((all || argc) && (showversions))
 		usage(B_FALSE);
 	if (all && argc)
 		usage(B_FALSE);
 
 	if (showversions) {
 		/* Show info on available versions. */
 		(void) printf(gettext("The following filesystem versions are "
 		    "supported:\n\n"));
 		(void) printf(gettext("VER  DESCRIPTION\n"));
 		(void) printf("---  -----------------------------------------"
 		    "---------------\n");
 		(void) printf(gettext(" 1   Initial ZFS filesystem version\n"));
 		(void) printf(gettext(" 2   Enhanced directory entries\n"));
 		(void) printf(gettext(" 3   Case insensitive and filesystem "
 		    "user identifier (FUID)\n"));
 		(void) printf(gettext(" 4   userquota, groupquota "
 		    "properties\n"));
 		(void) printf(gettext(" 5   System attributes\n"));
 		(void) printf(gettext("\nFor more information on a particular "
 		    "version, including supported releases,\n"));
 		(void) printf("see the ZFS Administration Guide.\n\n");
 		ret = 0;
 	} else if (argc || all) {
 		/* Upgrade filesystems */
 		if (cb.cb_version == 0)
 			cb.cb_version = ZPL_VERSION;
 		ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_FILESYSTEM,
 		    NULL, NULL, 0, upgrade_set_callback, &cb);
 		(void) printf(gettext("%llu filesystems upgraded\n"),
 		    cb.cb_numupgraded);
 		if (cb.cb_numsamegraded) {
 			(void) printf(gettext("%llu filesystems already at "
 			    "this version\n"),
 			    cb.cb_numsamegraded);
 		}
 		if (cb.cb_numfailed != 0)
 			ret = 1;
 	} else {
 		/* List old-version filesystems */
 		boolean_t found;
 		(void) printf(gettext("This system is currently running "
 		    "ZFS filesystem version %llu.\n\n"), ZPL_VERSION);
 
 		flags |= ZFS_ITER_RECURSE;
 		ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM,
 		    NULL, NULL, 0, upgrade_list_callback, &cb);
 
 		found = cb.cb_foundone;
 		cb.cb_foundone = B_FALSE;
 		cb.cb_newer = B_TRUE;
 
 		ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM,
 		    NULL, NULL, 0, upgrade_list_callback, &cb);
 
 		if (!cb.cb_foundone && !found) {
 			(void) printf(gettext("All filesystems are "
 			    "formatted with the current version.\n"));
 		}
 	}
 
 	return (ret);
 }
 
 /*
  * zfs userspace [-Hinp] [-o field[,...]] [-s field [-s field]...]
  *               [-S field [-S field]...] [-t type[,...]] filesystem | snapshot
  * zfs groupspace [-Hinp] [-o field[,...]] [-s field [-s field]...]
  *                [-S field [-S field]...] [-t type[,...]] filesystem | snapshot
  *
  *	-H      Scripted mode; elide headers and separate columns by tabs.
  *	-i	Translate SID to POSIX ID.
  *	-n	Print numeric ID instead of user/group name.
  *	-o      Control which fields to display.
  *	-p	Use exact (parsable) numeric output.
  *	-s      Specify sort columns, descending order.
  *	-S      Specify sort columns, ascending order.
  *	-t      Control which object types to display.
  *
  *	Displays space consumed by, and quotas on, each user in the specified
  *	filesystem or snapshot.
  */
 
 /* us_field_types, us_field_hdr and us_field_names should be kept in sync */
 enum us_field_types {
 	USFIELD_TYPE,
 	USFIELD_NAME,
 	USFIELD_USED,
 	USFIELD_QUOTA
 };
 static char *us_field_hdr[] = { "TYPE", "NAME", "USED", "QUOTA" };
 static char *us_field_names[] = { "type", "name", "used", "quota" };
 #define	USFIELD_LAST	(sizeof (us_field_names) / sizeof (char *))
 
 #define	USTYPE_PSX_GRP	(1 << 0)
 #define	USTYPE_PSX_USR	(1 << 1)
 #define	USTYPE_SMB_GRP	(1 << 2)
 #define	USTYPE_SMB_USR	(1 << 3)
 #define	USTYPE_ALL	\
 	(USTYPE_PSX_GRP | USTYPE_PSX_USR | USTYPE_SMB_GRP | USTYPE_SMB_USR)
 
 static int us_type_bits[] = {
 	USTYPE_PSX_GRP,
 	USTYPE_PSX_USR,
 	USTYPE_SMB_GRP,
 	USTYPE_SMB_USR,
 	USTYPE_ALL
 };
 static char *us_type_names[] = { "posixgroup", "posixuser", "smbgroup",
 	"smbuser", "all" };
 
 typedef struct us_node {
 	nvlist_t	*usn_nvl;
 	uu_avl_node_t	usn_avlnode;
 	uu_list_node_t	usn_listnode;
 } us_node_t;
 
 typedef struct us_cbdata {
 	nvlist_t	**cb_nvlp;
 	uu_avl_pool_t	*cb_avl_pool;
 	uu_avl_t	*cb_avl;
 	boolean_t	cb_numname;
 	boolean_t	cb_nicenum;
 	boolean_t	cb_sid2posix;
 	zfs_userquota_prop_t cb_prop;
 	zfs_sort_column_t *cb_sortcol;
 	size_t		cb_width[USFIELD_LAST];
 } us_cbdata_t;
 
 static boolean_t us_populated = B_FALSE;
 
 typedef struct {
 	zfs_sort_column_t *si_sortcol;
 	boolean_t	si_numname;
 } us_sort_info_t;
 
 static int
 us_field_index(char *field)
 {
 	int i;
 
 	for (i = 0; i < USFIELD_LAST; i++) {
 		if (strcmp(field, us_field_names[i]) == 0)
 			return (i);
 	}
 
 	return (-1);
 }
 
 static int
 us_compare(const void *larg, const void *rarg, void *unused)
 {
 	const us_node_t *l = larg;
 	const us_node_t *r = rarg;
 	us_sort_info_t *si = (us_sort_info_t *)unused;
 	zfs_sort_column_t *sortcol = si->si_sortcol;
 	boolean_t numname = si->si_numname;
 	nvlist_t *lnvl = l->usn_nvl;
 	nvlist_t *rnvl = r->usn_nvl;
 	int rc = 0;
 	boolean_t lvb, rvb;
 
 	for (; sortcol != NULL; sortcol = sortcol->sc_next) {
 		char *lvstr = "";
 		char *rvstr = "";
 		uint32_t lv32 = 0;
 		uint32_t rv32 = 0;
 		uint64_t lv64 = 0;
 		uint64_t rv64 = 0;
 		zfs_prop_t prop = sortcol->sc_prop;
 		const char *propname = NULL;
 		boolean_t reverse = sortcol->sc_reverse;
 
 		switch (prop) {
 		case ZFS_PROP_TYPE:
 			propname = "type";
 			(void) nvlist_lookup_uint32(lnvl, propname, &lv32);
 			(void) nvlist_lookup_uint32(rnvl, propname, &rv32);
 			if (rv32 != lv32)
 				rc = (rv32 < lv32) ? 1 : -1;
 			break;
 		case ZFS_PROP_NAME:
 			propname = "name";
 			if (numname) {
 compare_nums:
 				(void) nvlist_lookup_uint64(lnvl, propname,
 				    &lv64);
 				(void) nvlist_lookup_uint64(rnvl, propname,
 				    &rv64);
 				if (rv64 != lv64)
 					rc = (rv64 < lv64) ? 1 : -1;
 			} else {
 				if ((nvlist_lookup_string(lnvl, propname,
 						&lvstr) == ENOENT) ||
 				    (nvlist_lookup_string(rnvl, propname,
 						&rvstr) == ENOENT)) {
 					goto compare_nums;
 				}
 				rc = strcmp(lvstr, rvstr);
 			}
 			break;
 		case ZFS_PROP_USED:
 		case ZFS_PROP_QUOTA:
 			if (!us_populated)
 				break;
 			if (prop == ZFS_PROP_USED)
 				propname = "used";
 			else
 				propname = "quota";
 			(void) nvlist_lookup_uint64(lnvl, propname, &lv64);
 			(void) nvlist_lookup_uint64(rnvl, propname, &rv64);
 			if (rv64 != lv64)
 				rc = (rv64 < lv64) ? 1 : -1;
 			break;
 
 		default:
 			break;
 		}
 
 		if (rc != 0) {
 			if (rc < 0)
 				return (reverse ? 1 : -1);
 			else
 				return (reverse ? -1 : 1);
 		}
 	}
 
 	/*
 	 * If entries still seem to be the same, check if they are of the same
 	 * type (smbentity is added only if we are doing SID to POSIX ID
 	 * translation where we can have duplicate type/name combinations).
 	 */
 	if (nvlist_lookup_boolean_value(lnvl, "smbentity", &lvb) == 0 &&
 	    nvlist_lookup_boolean_value(rnvl, "smbentity", &rvb) == 0 &&
 	    lvb != rvb)
 		return (lvb < rvb ? -1 : 1);
 
 	return (0);
 }
 
 static inline const char *
 us_type2str(unsigned field_type)
 {
 	switch (field_type) {
 	case USTYPE_PSX_USR:
 		return ("POSIX User");
 	case USTYPE_PSX_GRP:
 		return ("POSIX Group");
 	case USTYPE_SMB_USR:
 		return ("SMB User");
 	case USTYPE_SMB_GRP:
 		return ("SMB Group");
 	default:
 		return ("Undefined");
 	}
 }
 
 static int
 userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space)
 {
 	us_cbdata_t *cb = (us_cbdata_t *)arg;
 	zfs_userquota_prop_t prop = cb->cb_prop;
 	char *name = NULL;
 	char *propname;
 	char sizebuf[32];
 	us_node_t *node;
 	uu_avl_pool_t *avl_pool = cb->cb_avl_pool;
 	uu_avl_t *avl = cb->cb_avl;
 	uu_avl_index_t idx;
 	nvlist_t *props;
 	us_node_t *n;
 	zfs_sort_column_t *sortcol = cb->cb_sortcol;
 	unsigned type = 0;
 	const char *typestr;
 	size_t namelen;
 	size_t typelen;
 	size_t sizelen;
 	int typeidx, nameidx, sizeidx;
 	us_sort_info_t sortinfo = { sortcol, cb->cb_numname };
 	boolean_t smbentity = B_FALSE;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 	node = safe_malloc(sizeof (us_node_t));
 	uu_avl_node_init(node, &node->usn_avlnode, avl_pool);
 	node->usn_nvl = props;
 
 	if (domain != NULL && domain[0] != '\0') {
 		/* SMB */
 		char sid[MAXNAMELEN + 32];
 		uid_t id;
 #ifdef illumos
 		int err;
 		int flag = IDMAP_REQ_FLG_USE_CACHE;
 #endif
 
 		smbentity = B_TRUE;
 
 		(void) snprintf(sid, sizeof (sid), "%s-%u", domain, rid);
 
 		if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) {
 			type = USTYPE_SMB_GRP;
 #ifdef illumos
 			err = sid_to_id(sid, B_FALSE, &id);
 #endif
 		} else {
 			type = USTYPE_SMB_USR;
 #ifdef illumos
 			err = sid_to_id(sid, B_TRUE, &id);
 #endif
 		}
 
 #ifdef illumos
 		if (err == 0) {
 			rid = id;
 			if (!cb->cb_sid2posix) {
 				if (type == USTYPE_SMB_USR) {
 					(void) idmap_getwinnamebyuid(rid, flag,
 					    &name, NULL);
 				} else {
 					(void) idmap_getwinnamebygid(rid, flag,
 					    &name, NULL);
 				}
 				if (name == NULL)
 					name = sid;
 			}
 		}
 #endif
 	}
 
 	if (cb->cb_sid2posix || domain == NULL || domain[0] == '\0') {
 		/* POSIX or -i */
 		if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) {
 			type = USTYPE_PSX_GRP;
 			if (!cb->cb_numname) {
 				struct group *g;
 
 				if ((g = getgrgid(rid)) != NULL)
 					name = g->gr_name;
 			}
 		} else {
 			type = USTYPE_PSX_USR;
 			if (!cb->cb_numname) {
 				struct passwd *p;
 
 				if ((p = getpwuid(rid)) != NULL)
 					name = p->pw_name;
 			}
 		}
 	}
 
 	/*
 	 * Make sure that the type/name combination is unique when doing
 	 * SID to POSIX ID translation (hence changing the type from SMB to
 	 * POSIX).
 	 */
 	if (cb->cb_sid2posix &&
 	    nvlist_add_boolean_value(props, "smbentity", smbentity) != 0)
 		nomem();
 
 	/* Calculate/update width of TYPE field */
 	typestr = us_type2str(type);
 	typelen = strlen(gettext(typestr));
 	typeidx = us_field_index("type");
 	if (typelen > cb->cb_width[typeidx])
 		cb->cb_width[typeidx] = typelen;
 	if (nvlist_add_uint32(props, "type", type) != 0)
 		nomem();
 
 	/* Calculate/update width of NAME field */
 	if ((cb->cb_numname && cb->cb_sid2posix) || name == NULL) {
 		if (nvlist_add_uint64(props, "name", rid) != 0)
 			nomem();
 		namelen = snprintf(NULL, 0, "%u", rid);
 	} else {
 		if (nvlist_add_string(props, "name", name) != 0)
 			nomem();
 		namelen = strlen(name);
 	}
 	nameidx = us_field_index("name");
 	if (namelen > cb->cb_width[nameidx])
 		cb->cb_width[nameidx] = namelen;
 
 	/*
 	 * Check if this type/name combination is in the list and update it;
 	 * otherwise add new node to the list.
 	 */
 	if ((n = uu_avl_find(avl, node, &sortinfo, &idx)) == NULL) {
 		uu_avl_insert(avl, node, idx);
 	} else {
 		nvlist_free(props);
 		free(node);
 		node = n;
 		props = node->usn_nvl;
 	}
 
 	/* Calculate/update width of USED/QUOTA fields */
 	if (cb->cb_nicenum)
 		zfs_nicenum(space, sizebuf, sizeof (sizebuf));
 	else
 		(void) snprintf(sizebuf, sizeof (sizebuf), "%llu", space);
 	sizelen = strlen(sizebuf);
 	if (prop == ZFS_PROP_USERUSED || prop == ZFS_PROP_GROUPUSED) {
 		propname = "used";
 		if (!nvlist_exists(props, "quota"))
 			(void) nvlist_add_uint64(props, "quota", 0);
 	} else {
 		propname = "quota";
 		if (!nvlist_exists(props, "used"))
 			(void) nvlist_add_uint64(props, "used", 0);
 	}
 	sizeidx = us_field_index(propname);
 	if (sizelen > cb->cb_width[sizeidx])
 		cb->cb_width[sizeidx] = sizelen;
 
 	if (nvlist_add_uint64(props, propname, space) != 0)
 		nomem();
 
 	return (0);
 }
 
 static void
 print_us_node(boolean_t scripted, boolean_t parsable, int *fields, int types,
     size_t *width, us_node_t *node)
 {
 	nvlist_t *nvl = node->usn_nvl;
 	char valstr[MAXNAMELEN];
 	boolean_t first = B_TRUE;
 	int cfield = 0;
 	int field;
 	uint32_t ustype;
 
 	/* Check type */
 	(void) nvlist_lookup_uint32(nvl, "type", &ustype);
 	if (!(ustype & types))
 		return;
 
 	while ((field = fields[cfield]) != USFIELD_LAST) {
 		nvpair_t *nvp = NULL;
 		data_type_t type;
 		uint32_t val32;
 		uint64_t val64;
 		char *strval = NULL;
 
 		while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 			if (strcmp(nvpair_name(nvp),
 			    us_field_names[field]) == 0)
 				break;
 		}
 
 		type = nvpair_type(nvp);
 		switch (type) {
 		case DATA_TYPE_UINT32:
 			(void) nvpair_value_uint32(nvp, &val32);
 			break;
 		case DATA_TYPE_UINT64:
 			(void) nvpair_value_uint64(nvp, &val64);
 			break;
 		case DATA_TYPE_STRING:
 			(void) nvpair_value_string(nvp, &strval);
 			break;
 		default:
 			(void) fprintf(stderr, "invalid data type\n");
 		}
 
 		switch (field) {
 		case USFIELD_TYPE:
 			strval = (char *)us_type2str(val32);
 			break;
 		case USFIELD_NAME:
 			if (type == DATA_TYPE_UINT64) {
 				(void) sprintf(valstr, "%llu", val64);
 				strval = valstr;
 			}
 			break;
 		case USFIELD_USED:
 		case USFIELD_QUOTA:
 			if (type == DATA_TYPE_UINT64) {
 				if (parsable) {
 					(void) sprintf(valstr, "%llu", val64);
 				} else {
 					zfs_nicenum(val64, valstr,
 					    sizeof (valstr));
 				}
 				if (field == USFIELD_QUOTA &&
 				    strcmp(valstr, "0") == 0)
 					strval = "none";
 				else
 					strval = valstr;
 			}
 			break;
 		}
 
 		if (!first) {
 			if (scripted)
 				(void) printf("\t");
 			else
 				(void) printf("  ");
 		}
 		if (scripted)
 			(void) printf("%s", strval);
 		else if (field == USFIELD_TYPE || field == USFIELD_NAME)
 			(void) printf("%-*s", width[field], strval);
 		else
 			(void) printf("%*s", width[field], strval);
 
 		first = B_FALSE;
 		cfield++;
 	}
 
 	(void) printf("\n");
 }
 
 static void
 print_us(boolean_t scripted, boolean_t parsable, int *fields, int types,
     size_t *width, boolean_t rmnode, uu_avl_t *avl)
 {
 	us_node_t *node;
 	const char *col;
 	int cfield = 0;
 	int field;
 
 	if (!scripted) {
 		boolean_t first = B_TRUE;
 
 		while ((field = fields[cfield]) != USFIELD_LAST) {
 			col = gettext(us_field_hdr[field]);
 			if (field == USFIELD_TYPE || field == USFIELD_NAME) {
 				(void) printf(first ? "%-*s" : "  %-*s",
 				    width[field], col);
 			} else {
 				(void) printf(first ? "%*s" : "  %*s",
 				    width[field], col);
 			}
 			first = B_FALSE;
 			cfield++;
 		}
 		(void) printf("\n");
 	}
 
 	for (node = uu_avl_first(avl); node; node = uu_avl_next(avl, node)) {
 		print_us_node(scripted, parsable, fields, types, width, node);
 		if (rmnode)
 			nvlist_free(node->usn_nvl);
 	}
 }
 
 static int
 zfs_do_userspace(int argc, char **argv)
 {
 	zfs_handle_t *zhp;
 	zfs_userquota_prop_t p;
 
 	uu_avl_pool_t *avl_pool;
 	uu_avl_t *avl_tree;
 	uu_avl_walk_t *walk;
 	char *delim;
 	char deffields[] = "type,name,used,quota";
 	char *ofield = NULL;
 	char *tfield = NULL;
 	int cfield = 0;
 	int fields[256];
 	int i;
 	boolean_t scripted = B_FALSE;
 	boolean_t prtnum = B_FALSE;
 	boolean_t parsable = B_FALSE;
 	boolean_t sid2posix = B_FALSE;
 	int ret = 0;
 	int c;
 	zfs_sort_column_t *sortcol = NULL;
 	int types = USTYPE_PSX_USR | USTYPE_SMB_USR;
 	us_cbdata_t cb;
 	us_node_t *node;
 	us_node_t *rmnode;
 	uu_list_pool_t *listpool;
 	uu_list_t *list;
 	uu_avl_index_t idx = 0;
 	uu_list_index_t idx2 = 0;
 
 	if (argc < 2)
 		usage(B_FALSE);
 
 	if (strcmp(argv[0], "groupspace") == 0)
 		/* Toggle default group types */
 		types = USTYPE_PSX_GRP | USTYPE_SMB_GRP;
 
 	while ((c = getopt(argc, argv, "nHpo:s:S:t:i")) != -1) {
 		switch (c) {
 		case 'n':
 			prtnum = B_TRUE;
 			break;
 		case 'H':
 			scripted = B_TRUE;
 			break;
 		case 'p':
 			parsable = B_TRUE;
 			break;
 		case 'o':
 			ofield = optarg;
 			break;
 		case 's':
 		case 'S':
 			if (zfs_add_sort_column(&sortcol, optarg,
 			    c == 's' ? B_FALSE : B_TRUE) != 0) {
 				(void) fprintf(stderr,
 				    gettext("invalid field '%s'\n"), optarg);
 				usage(B_FALSE);
 			}
 			break;
 		case 't':
 			tfield = optarg;
 			break;
 		case 'i':
 			sid2posix = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing dataset name\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	/* Use default output fields if not specified using -o */
 	if (ofield == NULL)
 		ofield = deffields;
 	do {
 		if ((delim = strchr(ofield, ',')) != NULL)
 			*delim = '\0';
 		if ((fields[cfield++] = us_field_index(ofield)) == -1) {
 			(void) fprintf(stderr, gettext("invalid type '%s' "
 			    "for -o option\n"), ofield);
 			return (-1);
 		}
 		if (delim != NULL)
 			ofield = delim + 1;
 	} while (delim != NULL);
 	fields[cfield] = USFIELD_LAST;
 
 	/* Override output types (-t option) */
 	if (tfield != NULL) {
 		types = 0;
 
 		do {
 			boolean_t found = B_FALSE;
 
 			if ((delim = strchr(tfield, ',')) != NULL)
 				*delim = '\0';
 			for (i = 0; i < sizeof (us_type_bits) / sizeof (int);
 			    i++) {
 				if (strcmp(tfield, us_type_names[i]) == 0) {
 					found = B_TRUE;
 					types |= us_type_bits[i];
 					break;
 				}
 			}
 			if (!found) {
 				(void) fprintf(stderr, gettext("invalid type "
 				    "'%s' for -t option\n"), tfield);
 				return (-1);
 			}
 			if (delim != NULL)
 				tfield = delim + 1;
 		} while (delim != NULL);
 	}
 
 	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL)
 		return (1);
 
 	if ((avl_pool = uu_avl_pool_create("us_avl_pool", sizeof (us_node_t),
 	    offsetof(us_node_t, usn_avlnode), us_compare, UU_DEFAULT)) == NULL)
 		nomem();
 	if ((avl_tree = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL)
 		nomem();
 
 	/* Always add default sorting columns */
 	(void) zfs_add_sort_column(&sortcol, "type", B_FALSE);
 	(void) zfs_add_sort_column(&sortcol, "name", B_FALSE);
 
 	cb.cb_sortcol = sortcol;
 	cb.cb_numname = prtnum;
 	cb.cb_nicenum = !parsable;
 	cb.cb_avl_pool = avl_pool;
 	cb.cb_avl = avl_tree;
 	cb.cb_sid2posix = sid2posix;
 
 	for (i = 0; i < USFIELD_LAST; i++)
 		cb.cb_width[i] = strlen(gettext(us_field_hdr[i]));
 
 	for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) {
 		if (((p == ZFS_PROP_USERUSED || p == ZFS_PROP_USERQUOTA) &&
 		    !(types & (USTYPE_PSX_USR | USTYPE_SMB_USR))) ||
 		    ((p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) &&
 		    !(types & (USTYPE_PSX_GRP | USTYPE_SMB_GRP))))
 			continue;
 		cb.cb_prop = p;
 		if ((ret = zfs_userspace(zhp, p, userspace_cb, &cb)) != 0)
 			return (ret);
 	}
 
 	/* Sort the list */
 	if ((node = uu_avl_first(avl_tree)) == NULL)
 		return (0);
 
 	us_populated = B_TRUE;
 
 	listpool = uu_list_pool_create("tmplist", sizeof (us_node_t),
 	    offsetof(us_node_t, usn_listnode), NULL, UU_DEFAULT);
 	list = uu_list_create(listpool, NULL, UU_DEFAULT);
 	uu_list_node_init(node, &node->usn_listnode, listpool);
 
 	while (node != NULL) {
 		rmnode = node;
 		node = uu_avl_next(avl_tree, node);
 		uu_avl_remove(avl_tree, rmnode);
 		if (uu_list_find(list, rmnode, NULL, &idx2) == NULL)
 			uu_list_insert(list, rmnode, idx2);
 	}
 
 	for (node = uu_list_first(list); node != NULL;
 	    node = uu_list_next(list, node)) {
 		us_sort_info_t sortinfo = { sortcol, cb.cb_numname };
 
 		if (uu_avl_find(avl_tree, node, &sortinfo, &idx) == NULL)
 			uu_avl_insert(avl_tree, node, idx);
 	}
 
 	uu_list_destroy(list);
 	uu_list_pool_destroy(listpool);
 
 	/* Print and free node nvlist memory */
 	print_us(scripted, parsable, fields, types, cb.cb_width, B_TRUE,
 	    cb.cb_avl);
 
 	zfs_free_sort_columns(sortcol);
 
 	/* Clean up the AVL tree */
 	if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL)
 		nomem();
 
 	while ((node = uu_avl_walk_next(walk)) != NULL) {
 		uu_avl_remove(cb.cb_avl, node);
 		free(node);
 	}
 
 	uu_avl_walk_end(walk);
 	uu_avl_destroy(avl_tree);
 	uu_avl_pool_destroy(avl_pool);
 
 	return (ret);
 }
 
 /*
  * list [-Hp][-r|-d max] [-o property[,...]] [-s property] ... [-S property] ...
  *      [-t type[,...]] [filesystem|volume|snapshot] ...
  *
  *	-H	Scripted mode; elide headers and separate columns by tabs.
  *	-p	Display values in parsable (literal) format.
  *	-r	Recurse over all children.
  *	-d	Limit recursion by depth.
  *	-o	Control which fields to display.
  *	-s	Specify sort columns, descending order.
  *	-S	Specify sort columns, ascending order.
  *	-t	Control which object types to display.
  *
  * When given no arguments, list all filesystems in the system.
  * Otherwise, list the specified datasets, optionally recursing down them if
  * '-r' is specified.
  */
 typedef struct list_cbdata {
 	boolean_t	cb_first;
 	boolean_t	cb_literal;
 	boolean_t	cb_scripted;
 	zprop_list_t	*cb_proplist;
 } list_cbdata_t;
 
 /*
  * Given a list of columns to display, output appropriate headers for each one.
  */
 static void
 print_header(list_cbdata_t *cb)
 {
 	zprop_list_t *pl = cb->cb_proplist;
 	char headerbuf[ZFS_MAXPROPLEN];
 	const char *header;
 	int i;
 	boolean_t first = B_TRUE;
 	boolean_t right_justify;
 
 	for (; pl != NULL; pl = pl->pl_next) {
 		if (!first) {
 			(void) printf("  ");
 		} else {
 			first = B_FALSE;
 		}
 
 		right_justify = B_FALSE;
 		if (pl->pl_prop != ZPROP_INVAL) {
 			header = zfs_prop_column_name(pl->pl_prop);
 			right_justify = zfs_prop_align_right(pl->pl_prop);
 		} else {
 			for (i = 0; pl->pl_user_prop[i] != '\0'; i++)
 				headerbuf[i] = toupper(pl->pl_user_prop[i]);
 			headerbuf[i] = '\0';
 			header = headerbuf;
 		}
 
 		if (pl->pl_next == NULL && !right_justify)
 			(void) printf("%s", header);
 		else if (right_justify)
 			(void) printf("%*s", pl->pl_width, header);
 		else
 			(void) printf("%-*s", pl->pl_width, header);
 	}
 
 	(void) printf("\n");
 }
 
 /*
  * Given a dataset and a list of fields, print out all the properties according
  * to the described layout.
  */
 static void
 print_dataset(zfs_handle_t *zhp, list_cbdata_t *cb)
 {
 	zprop_list_t *pl = cb->cb_proplist;
 	boolean_t first = B_TRUE;
 	char property[ZFS_MAXPROPLEN];
 	nvlist_t *userprops = zfs_get_user_props(zhp);
 	nvlist_t *propval;
 	char *propstr;
 	boolean_t right_justify;
 
 	for (; pl != NULL; pl = pl->pl_next) {
 		if (!first) {
 			if (cb->cb_scripted)
 				(void) printf("\t");
 			else
 				(void) printf("  ");
 		} else {
 			first = B_FALSE;
 		}
 
 		if (pl->pl_prop == ZFS_PROP_NAME) {
 			(void) strlcpy(property, zfs_get_name(zhp),
 			    sizeof (property));
 			propstr = property;
 			right_justify = zfs_prop_align_right(pl->pl_prop);
 		} else if (pl->pl_prop != ZPROP_INVAL) {
 			if (zfs_prop_get(zhp, pl->pl_prop, property,
 			    sizeof (property), NULL, NULL, 0,
 			    cb->cb_literal) != 0)
 				propstr = "-";
 			else
 				propstr = property;
 			right_justify = zfs_prop_align_right(pl->pl_prop);
 		} else if (zfs_prop_userquota(pl->pl_user_prop)) {
 			if (zfs_prop_get_userquota(zhp, pl->pl_user_prop,
 			    property, sizeof (property), cb->cb_literal) != 0)
 				propstr = "-";
 			else
 				propstr = property;
 			right_justify = B_TRUE;
 		} else if (zfs_prop_written(pl->pl_user_prop)) {
 			if (zfs_prop_get_written(zhp, pl->pl_user_prop,
 			    property, sizeof (property), cb->cb_literal) != 0)
 				propstr = "-";
 			else
 				propstr = property;
 			right_justify = B_TRUE;
 		} else {
 			if (nvlist_lookup_nvlist(userprops,
 			    pl->pl_user_prop, &propval) != 0)
 				propstr = "-";
 			else
 				verify(nvlist_lookup_string(propval,
 				    ZPROP_VALUE, &propstr) == 0);
 			right_justify = B_FALSE;
 		}
 
 		/*
 		 * If this is being called in scripted mode, or if this is the
 		 * last column and it is left-justified, don't include a width
 		 * format specifier.
 		 */
 		if (cb->cb_scripted || (pl->pl_next == NULL && !right_justify))
 			(void) printf("%s", propstr);
 		else if (right_justify)
 			(void) printf("%*s", pl->pl_width, propstr);
 		else
 			(void) printf("%-*s", pl->pl_width, propstr);
 	}
 
 	(void) printf("\n");
 }
 
 /*
  * Generic callback function to list a dataset or snapshot.
  */
 static int
 list_callback(zfs_handle_t *zhp, void *data)
 {
 	list_cbdata_t *cbp = data;
 
 	if (cbp->cb_first) {
 		if (!cbp->cb_scripted)
 			print_header(cbp);
 		cbp->cb_first = B_FALSE;
 	}
 
 	print_dataset(zhp, cbp);
 
 	return (0);
 }
 
 static int
 zfs_do_list(int argc, char **argv)
 {
 	int c;
 	static char default_fields[] =
 	    "name,used,available,referenced,mountpoint";
 	int types = ZFS_TYPE_DATASET;
 	boolean_t types_specified = B_FALSE;
 	char *fields = NULL;
 	list_cbdata_t cb = { 0 };
 	char *value;
 	int limit = 0;
 	int ret = 0;
 	zfs_sort_column_t *sortcol = NULL;
 	int flags = ZFS_ITER_PROP_LISTSNAPS | ZFS_ITER_ARGS_CAN_BE_PATHS;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "HS:d:o:prs:t:")) != -1) {
 		switch (c) {
 		case 'o':
 			fields = optarg;
 			break;
 		case 'p':
 			cb.cb_literal = B_TRUE;
 			flags |= ZFS_ITER_LITERAL_PROPS;
 			break;
 		case 'd':
 			limit = parse_depth(optarg, &flags);
 			break;
 		case 'r':
 			flags |= ZFS_ITER_RECURSE;
 			break;
 		case 'H':
 			cb.cb_scripted = B_TRUE;
 			break;
 		case 's':
 			if (zfs_add_sort_column(&sortcol, optarg,
 			    B_FALSE) != 0) {
 				(void) fprintf(stderr,
 				    gettext("invalid property '%s'\n"), optarg);
 				usage(B_FALSE);
 			}
 			break;
 		case 'S':
 			if (zfs_add_sort_column(&sortcol, optarg,
 			    B_TRUE) != 0) {
 				(void) fprintf(stderr,
 				    gettext("invalid property '%s'\n"), optarg);
 				usage(B_FALSE);
 			}
 			break;
 		case 't':
 			types = 0;
 			types_specified = B_TRUE;
 			flags &= ~ZFS_ITER_PROP_LISTSNAPS;
 			while (*optarg != '\0') {
 				static char *type_subopts[] = { "filesystem",
 				    "volume", "snapshot", "snap", "bookmark",
 				    "all", NULL };
 
 				switch (getsubopt(&optarg, type_subopts,
 				    &value)) {
 				case 0:
 					types |= ZFS_TYPE_FILESYSTEM;
 					break;
 				case 1:
 					types |= ZFS_TYPE_VOLUME;
 					break;
 				case 2:
 				case 3:
 					types |= ZFS_TYPE_SNAPSHOT;
 					break;
 				case 4:
 					types |= ZFS_TYPE_BOOKMARK;
 					break;
 				case 5:
 					types = ZFS_TYPE_DATASET |
 					    ZFS_TYPE_BOOKMARK;
 					break;
 				default:
 					(void) fprintf(stderr,
 					    gettext("invalid type '%s'\n"),
 					    suboptarg);
 					usage(B_FALSE);
 				}
 			}
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (fields == NULL)
 		fields = default_fields;
 
 	/*
 	 * If we are only going to list snapshot names and sort by name,
 	 * then we can use faster version.
 	 */
 	if (strcmp(fields, "name") == 0 && zfs_sort_only_by_name(sortcol))
 		flags |= ZFS_ITER_SIMPLE;
 
 	/*
 	 * If "-o space" and no types were specified, don't display snapshots.
 	 */
 	if (strcmp(fields, "space") == 0 && types_specified == B_FALSE)
 		types &= ~ZFS_TYPE_SNAPSHOT;
 
 	/*
 	 * If the user specifies '-o all', the zprop_get_list() doesn't
 	 * normally include the name of the dataset.  For 'zfs list', we always
 	 * want this property to be first.
 	 */
 	if (zprop_get_list(g_zfs, fields, &cb.cb_proplist, ZFS_TYPE_DATASET)
 	    != 0)
 		usage(B_FALSE);
 
 	cb.cb_first = B_TRUE;
 
 	ret = zfs_for_each(argc, argv, flags, types, sortcol, &cb.cb_proplist,
 	    limit, list_callback, &cb);
 
 	zprop_free_list(cb.cb_proplist);
 	zfs_free_sort_columns(sortcol);
 
 	if (ret == 0 && cb.cb_first && !cb.cb_scripted)
 		(void) printf(gettext("no datasets available\n"));
 
 	return (ret);
 }
 
 /*
  * zfs rename [-f] <fs | snap | vol> <fs | snap | vol>
  * zfs rename [-f] -p <fs | vol> <fs | vol>
  * zfs rename -r <snap> <snap>
  * zfs rename -u [-p] <fs> <fs>
  *
  * Renames the given dataset to another of the same type.
  *
  * The '-p' flag creates all the non-existing ancestors of the target first.
  */
 /* ARGSUSED */
 static int
 zfs_do_rename(int argc, char **argv)
 {
 	zfs_handle_t *zhp;
 	renameflags_t flags = { 0 };
 	int c;
 	int ret = 0;
 	int types;
 	boolean_t parents = B_FALSE;
 	char *snapshot = NULL;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "fpru")) != -1) {
 		switch (c) {
 		case 'p':
 			parents = B_TRUE;
 			break;
 		case 'r':
 			flags.recurse = B_TRUE;
 			break;
 		case 'u':
 			flags.nounmount = B_TRUE;
 			break;
 		case 'f':
 			flags.forceunmount = B_TRUE;
 			break;
 		case '?':
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing source dataset "
 		    "argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing target dataset "
 		    "argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 2) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	if (flags.recurse && parents) {
 		(void) fprintf(stderr, gettext("-p and -r options are mutually "
 		    "exclusive\n"));
 		usage(B_FALSE);
 	}
 
 	if (flags.recurse && strchr(argv[0], '@') == 0) {
 		(void) fprintf(stderr, gettext("source dataset for recursive "
 		    "rename must be a snapshot\n"));
 		usage(B_FALSE);
 	}
 
 	if (flags.nounmount && parents) {
 		(void) fprintf(stderr, gettext("-u and -p options are mutually "
 		    "exclusive\n"));
 		usage(B_FALSE);
 	}
 
 	if (flags.nounmount)
 		types = ZFS_TYPE_FILESYSTEM;
 	else if (parents)
 		types = ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME;
 	else
 		types = ZFS_TYPE_DATASET;
 
 	if (flags.recurse) {
 		/*
 		 * When we do recursive rename we are fine when the given
 		 * snapshot for the given dataset doesn't exist - it can
 		 * still exists below.
 		 */
 
 		snapshot = strchr(argv[0], '@');
 		assert(snapshot != NULL);
 		*snapshot = '\0';
 		snapshot++;
 	}
 
 	if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL)
 		return (1);
 
 	/* If we were asked and the name looks good, try to create ancestors. */
 	if (parents && zfs_name_valid(argv[1], zfs_get_type(zhp)) &&
 	    zfs_create_ancestors(g_zfs, argv[1]) != 0) {
 		zfs_close(zhp);
 		return (1);
 	}
 
 	ret = (zfs_rename(zhp, snapshot, argv[1], flags) != 0);
 
 	zfs_close(zhp);
 	return (ret);
 }
 
 /*
  * zfs promote <fs>
  *
  * Promotes the given clone fs to be the parent
  */
 /* ARGSUSED */
 static int
 zfs_do_promote(int argc, char **argv)
 {
 	zfs_handle_t *zhp;
 	int ret = 0;
 
 	/* check options */
 	if (argc > 1 && argv[1][0] == '-') {
 		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 		    argv[1][1]);
 		usage(B_FALSE);
 	}
 
 	/* check number of arguments */
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing clone filesystem"
 		    " argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 2) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 	if (zhp == NULL)
 		return (1);
 
 	ret = (zfs_promote(zhp) != 0);
 
 
 	zfs_close(zhp);
 	return (ret);
 }
 
 /*
  * zfs rollback [-rRf] <snapshot>
  *
  *	-r	Delete any intervening snapshots before doing rollback
  *	-R	Delete any snapshots and their clones
  *	-f	ignored for backwards compatability
  *
  * Given a filesystem, rollback to a specific snapshot, discarding any changes
  * since then and making it the active dataset.  If more recent snapshots exist,
  * the command will complain unless the '-r' flag is given.
  */
 typedef struct rollback_cbdata {
 	uint64_t	cb_create;
 	boolean_t	cb_first;
 	int		cb_doclones;
 	char		*cb_target;
 	int		cb_error;
 	boolean_t	cb_recurse;
 } rollback_cbdata_t;
 
 static int
 rollback_check_dependent(zfs_handle_t *zhp, void *data)
 {
 	rollback_cbdata_t *cbp = data;
 
 	if (cbp->cb_first && cbp->cb_recurse) {
 		(void) fprintf(stderr, gettext("cannot rollback to "
 		    "'%s': clones of previous snapshots exist\n"),
 		    cbp->cb_target);
 		(void) fprintf(stderr, gettext("use '-R' to "
 		    "force deletion of the following clones and "
 		    "dependents:\n"));
 		cbp->cb_first = 0;
 		cbp->cb_error = 1;
 	}
 
 	(void) fprintf(stderr, "%s\n", zfs_get_name(zhp));
 
 	zfs_close(zhp);
 	return (0);
 }
 
 /*
  * Report any snapshots more recent than the one specified.  Used when '-r' is
  * not specified.  We reuse this same callback for the snapshot dependents - if
  * 'cb_dependent' is set, then this is a dependent and we should report it
  * without checking the transaction group.
  */
 static int
 rollback_check(zfs_handle_t *zhp, void *data)
 {
 	rollback_cbdata_t *cbp = data;
 
 	if (cbp->cb_doclones) {
 		zfs_close(zhp);
 		return (0);
 	}
 
 	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > cbp->cb_create) {
 		if (cbp->cb_first && !cbp->cb_recurse) {
 			(void) fprintf(stderr, gettext("cannot "
 			    "rollback to '%s': more recent snapshots "
 			    "or bookmarks exist\n"),
 			    cbp->cb_target);
 			(void) fprintf(stderr, gettext("use '-r' to "
 			    "force deletion of the following "
 			    "snapshots and bookmarks:\n"));
 			cbp->cb_first = 0;
 			cbp->cb_error = 1;
 		}
 
 		if (cbp->cb_recurse) {
 			if (zfs_iter_dependents(zhp, B_TRUE,
 			    rollback_check_dependent, cbp) != 0) {
 				zfs_close(zhp);
 				return (-1);
 			}
 		} else {
 			(void) fprintf(stderr, "%s\n",
 			    zfs_get_name(zhp));
 		}
 	}
 	zfs_close(zhp);
 	return (0);
 }
 
 static int
 zfs_do_rollback(int argc, char **argv)
 {
 	int ret = 0;
 	int c;
 	boolean_t force = B_FALSE;
 	rollback_cbdata_t cb = { 0 };
 	zfs_handle_t *zhp, *snap;
 	char parentname[ZFS_MAX_DATASET_NAME_LEN];
 	char *delim;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "rRf")) != -1) {
 		switch (c) {
 		case 'r':
 			cb.cb_recurse = 1;
 			break;
 		case 'R':
 			cb.cb_recurse = 1;
 			cb.cb_doclones = 1;
 			break;
 		case 'f':
 			force = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing dataset argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	/* open the snapshot */
 	if ((snap = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
 		return (1);
 
 	/* open the parent dataset */
 	(void) strlcpy(parentname, argv[0], sizeof (parentname));
 	verify((delim = strrchr(parentname, '@')) != NULL);
 	*delim = '\0';
 	if ((zhp = zfs_open(g_zfs, parentname, ZFS_TYPE_DATASET)) == NULL) {
 		zfs_close(snap);
 		return (1);
 	}
 
 	/*
 	 * Check for more recent snapshots and/or clones based on the presence
 	 * of '-r' and '-R'.
 	 */
 	cb.cb_target = argv[0];
 	cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
 	cb.cb_first = B_TRUE;
 	cb.cb_error = 0;
 	if ((ret = zfs_iter_snapshots(zhp, B_FALSE, rollback_check, &cb)) != 0)
 		goto out;
 	if ((ret = zfs_iter_bookmarks(zhp, rollback_check, &cb)) != 0)
 		goto out;
 
 	if ((ret = cb.cb_error) != 0)
 		goto out;
 
 	/*
 	 * Rollback parent to the given snapshot.
 	 */
 	ret = zfs_rollback(zhp, snap, force);
 
 out:
 	zfs_close(snap);
 	zfs_close(zhp);
 
 	if (ret == 0)
 		return (0);
 	else
 		return (1);
 }
 
 /*
  * zfs set property=value ... { fs | snap | vol } ...
  *
  * Sets the given properties for all datasets specified on the command line.
  */
 
 static int
 set_callback(zfs_handle_t *zhp, void *data)
 {
 	nvlist_t *props = data;
 
 	if (zfs_prop_set_list(zhp, props) != 0) {
 		switch (libzfs_errno(g_zfs)) {
 		case EZFS_MOUNTFAILED:
 			(void) fprintf(stderr, gettext("property may be set "
 			    "but unable to remount filesystem\n"));
 			break;
 		case EZFS_SHARENFSFAILED:
 			(void) fprintf(stderr, gettext("property may be set "
 			    "but unable to reshare filesystem\n"));
 			break;
 		}
 		return (1);
 	}
 	return (0);
 }
 
 static int
 zfs_do_set(int argc, char **argv)
 {
 	nvlist_t *props = NULL;
 	int ds_start = -1; /* argv idx of first dataset arg */
 	int ret = 0;
 
 	/* check for options */
 	if (argc > 1 && argv[1][0] == '-') {
 		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 		    argv[1][1]);
 		usage(B_FALSE);
 	}
 
 	/* check number of arguments */
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing arguments\n"));
 		usage(B_FALSE);
 	}
 	if (argc < 3) {
 		if (strchr(argv[1], '=') == NULL) {
 			(void) fprintf(stderr, gettext("missing property=value "
 			    "argument(s)\n"));
 		} else {
 			(void) fprintf(stderr, gettext("missing dataset "
 			    "name(s)\n"));
 		}
 		usage(B_FALSE);
 	}
 
 	/* validate argument order:  prop=val args followed by dataset args */
 	for (int i = 1; i < argc; i++) {
 		if (strchr(argv[i], '=') != NULL) {
 			if (ds_start > 0) {
 				/* out-of-order prop=val argument */
 				(void) fprintf(stderr, gettext("invalid "
 				    "argument order\n"), i);
 				usage(B_FALSE);
 			}
 		} else if (ds_start < 0) {
 			ds_start = i;
 		}
 	}
 	if (ds_start < 0) {
 		(void) fprintf(stderr, gettext("missing dataset name(s)\n"));
 		usage(B_FALSE);
 	}
 
 	/* Populate a list of property settings */
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 	for (int i = 1; i < ds_start; i++) {
 		if ((ret = parseprop(props, argv[i])) != 0)
 			goto error;
 	}
 
 	ret = zfs_for_each(argc - ds_start, argv + ds_start, 0,
 	    ZFS_TYPE_DATASET, NULL, NULL, 0, set_callback, props);
 
 error:
 	nvlist_free(props);
 	return (ret);
 }
 
 typedef struct snap_cbdata {
 	nvlist_t *sd_nvl;
 	boolean_t sd_recursive;
 	const char *sd_snapname;
 } snap_cbdata_t;
 
 static int
 zfs_snapshot_cb(zfs_handle_t *zhp, void *arg)
 {
 	snap_cbdata_t *sd = arg;
 	char *name;
 	int rv = 0;
 	int error;
 
 	if (sd->sd_recursive &&
 	    zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) != 0) {
 		zfs_close(zhp);
 		return (0);
 	}
 
 	error = asprintf(&name, "%s@%s", zfs_get_name(zhp), sd->sd_snapname);
 	if (error == -1)
 		nomem();
 	fnvlist_add_boolean(sd->sd_nvl, name);
 	free(name);
 
 	if (sd->sd_recursive)
 		rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd);
 	zfs_close(zhp);
 	return (rv);
 }
 
 /*
  * zfs snapshot [-r] [-o prop=value] ... <fs@snap>
  *
  * Creates a snapshot with the given name.  While functionally equivalent to
  * 'zfs create', it is a separate command to differentiate intent.
  */
 static int
 zfs_do_snapshot(int argc, char **argv)
 {
 	int ret = 0;
 	int c;
 	nvlist_t *props;
 	snap_cbdata_t sd = { 0 };
 	boolean_t multiple_snaps = B_FALSE;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 	if (nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	/* check options */
 	while ((c = getopt(argc, argv, "ro:")) != -1) {
 		switch (c) {
 		case 'o':
 			if (parseprop(props, optarg) != 0)
 				return (1);
 			break;
 		case 'r':
 			sd.sd_recursive = B_TRUE;
 			multiple_snaps = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			goto usage;
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
 		goto usage;
 	}
 
 	if (argc > 1)
 		multiple_snaps = B_TRUE;
 	for (; argc > 0; argc--, argv++) {
 		char *atp;
 		zfs_handle_t *zhp;
 
 		atp = strchr(argv[0], '@');
 		if (atp == NULL)
 			goto usage;
 		*atp = '\0';
 		sd.sd_snapname = atp + 1;
 		zhp = zfs_open(g_zfs, argv[0],
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 		if (zhp == NULL)
 			goto usage;
 		if (zfs_snapshot_cb(zhp, &sd) != 0)
 			goto usage;
 	}
 
 	ret = zfs_snapshot_nvl(g_zfs, sd.sd_nvl, props);
 	nvlist_free(sd.sd_nvl);
 	nvlist_free(props);
 	if (ret != 0 && multiple_snaps)
 		(void) fprintf(stderr, gettext("no snapshots were created\n"));
 	return (ret != 0);
 
 usage:
 	nvlist_free(sd.sd_nvl);
 	nvlist_free(props);
 	usage(B_FALSE);
 	return (-1);
 }
 
 /*
  * Send a backup stream to stdout.
  */
 static int
 zfs_do_send(int argc, char **argv)
 {
 	char *fromname = NULL;
 	char *toname = NULL;
 	char *resume_token = NULL;
 	char *cp;
 	zfs_handle_t *zhp;
 	sendflags_t flags = { 0 };
 	int c, err;
 	nvlist_t *dbgnv = NULL;
 	boolean_t extraverbose = B_FALSE;
 
 	struct option long_options[] = {
 		{"replicate",	no_argument,		NULL, 'R'},
 		{"props",	no_argument,		NULL, 'p'},
 		{"parsable",	no_argument,		NULL, 'P'},
 		{"dedup",	no_argument,		NULL, 'D'},
 		{"verbose",	no_argument,		NULL, 'v'},
 		{"dryrun",	no_argument,		NULL, 'n'},
 		{"large-block",	no_argument,		NULL, 'L'},
 		{"embed",	no_argument,		NULL, 'e'},
 		{"resume",	required_argument,	NULL, 't'},
 		{"compressed",	no_argument,		NULL, 'c'},
 		{0, 0, 0, 0}
 	};
 
 	/* check options */
 	while ((c = getopt_long(argc, argv, ":i:I:RbDpVvnPLet:c", long_options,
 	    NULL)) != -1) {
 		switch (c) {
 		case 'i':
 			if (fromname)
 				usage(B_FALSE);
 			fromname = optarg;
 			break;
 		case 'I':
 			if (fromname)
 				usage(B_FALSE);
 			fromname = optarg;
 			flags.doall = B_TRUE;
 			break;
 		case 'R':
 			flags.replicate = B_TRUE;
 			break;
 		case 'p':
 			flags.props = B_TRUE;
 			break;
 		case 'P':
 			flags.parsable = B_TRUE;
 			flags.verbose = B_TRUE;
 			break;
 		case 'V':
 			flags.progress = B_TRUE;
 			flags.progressastitle = B_TRUE;
 			break;
 		case 'v':
 			if (flags.verbose)
 				extraverbose = B_TRUE;
 			flags.verbose = B_TRUE;
 			flags.progress = B_TRUE;
 			break;
 		case 'D':
 			flags.dedup = B_TRUE;
 			break;
 		case 'n':
 			flags.dryrun = B_TRUE;
 			break;
 		case 'L':
 			flags.largeblock = B_TRUE;
 			break;
 		case 'e':
 			flags.embed_data = B_TRUE;
 			break;
 		case 't':
 			resume_token = optarg;
 			break;
 		case 'c':
 			flags.compress = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			/*FALLTHROUGH*/
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (resume_token != NULL) {
 		if (fromname != NULL || flags.replicate || flags.props ||
 		    flags.dedup) {
 			(void) fprintf(stderr,
 			    gettext("invalid flags combined with -t\n"));
 			usage(B_FALSE);
 		}
 		if (argc != 0) {
 			(void) fprintf(stderr, gettext("no additional "
 			    "arguments are permitted with -t\n"));
 			usage(B_FALSE);
 		}
 	} else {
 		if (argc < 1) {
 			(void) fprintf(stderr,
 			    gettext("missing snapshot argument\n"));
 			usage(B_FALSE);
 		}
 		if (argc > 1) {
 			(void) fprintf(stderr, gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 	}
 
 	if (!flags.dryrun && isatty(STDOUT_FILENO)) {
 		(void) fprintf(stderr,
 		    gettext("Error: Stream can not be written to a terminal.\n"
 		    "You must redirect standard output.\n"));
 		return (1);
 	}
 
 	if (resume_token != NULL) {
 		return (zfs_send_resume(g_zfs, &flags, STDOUT_FILENO,
 		    resume_token));
 	}
 
 	/*
 	 * Special case sending a filesystem, or from a bookmark.
 	 */
 	if (strchr(argv[0], '@') == NULL ||
 	    (fromname && strchr(fromname, '#') != NULL)) {
 		char frombuf[ZFS_MAX_DATASET_NAME_LEN];
-		enum lzc_send_flags lzc_flags = 0;
 
 		if (flags.replicate || flags.doall || flags.props ||
-		    flags.dedup || flags.dryrun || flags.verbose ||
-		    flags.progress) {
-			(void) fprintf(stderr,
-			    gettext("Error: "
+		    flags.dedup || (strchr(argv[0], '@') == NULL &&
+		    (flags.dryrun || flags.verbose || flags.progress))) {
+			(void) fprintf(stderr, gettext("Error: "
 			    "Unsupported flag with filesystem or bookmark.\n"));
 			return (1);
 		}
 
 		zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET);
 		if (zhp == NULL)
 			return (1);
 
-		if (flags.largeblock)
-			lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
-		if (flags.embed_data)
-			lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
-		if (flags.compress)
-			lzc_flags |= LZC_SEND_FLAG_COMPRESS;
-
 		if (fromname != NULL &&
 		    (fromname[0] == '#' || fromname[0] == '@')) {
 			/*
 			 * Incremental source name begins with # or @.
 			 * Default to same fs as target.
 			 */
 			(void) strncpy(frombuf, argv[0], sizeof (frombuf));
 			cp = strchr(frombuf, '@');
 			if (cp != NULL)
 				*cp = '\0';
 			(void) strlcat(frombuf, fromname, sizeof (frombuf));
 			fromname = frombuf;
 		}
-		err = zfs_send_one(zhp, fromname, STDOUT_FILENO, lzc_flags);
+		err = zfs_send_one(zhp, fromname, STDOUT_FILENO, flags);
 		zfs_close(zhp);
 		return (err != 0);
 	}
 
 	cp = strchr(argv[0], '@');
 	*cp = '\0';
 	toname = cp + 1;
 	zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 	if (zhp == NULL)
 		return (1);
 
 	/*
 	 * If they specified the full path to the snapshot, chop off
 	 * everything except the short name of the snapshot, but special
 	 * case if they specify the origin.
 	 */
 	if (fromname && (cp = strchr(fromname, '@')) != NULL) {
 		char origin[ZFS_MAX_DATASET_NAME_LEN];
 		zprop_source_t src;
 
 		(void) zfs_prop_get(zhp, ZFS_PROP_ORIGIN,
 		    origin, sizeof (origin), &src, NULL, 0, B_FALSE);
 
 		if (strcmp(origin, fromname) == 0) {
 			fromname = NULL;
 			flags.fromorigin = B_TRUE;
 		} else {
 			*cp = '\0';
 			if (cp != fromname && strcmp(argv[0], fromname)) {
 				(void) fprintf(stderr,
 				    gettext("incremental source must be "
 				    "in same filesystem\n"));
 				usage(B_FALSE);
 			}
 			fromname = cp + 1;
 			if (strchr(fromname, '@') || strchr(fromname, '/')) {
 				(void) fprintf(stderr,
 				    gettext("invalid incremental source\n"));
 				usage(B_FALSE);
 			}
 		}
 	}
 
 	if (flags.replicate && fromname == NULL)
 		flags.doall = B_TRUE;
 
 	err = zfs_send(zhp, fromname, toname, &flags, STDOUT_FILENO, NULL, 0,
 	    extraverbose ? &dbgnv : NULL);
 
 	if (extraverbose && dbgnv != NULL) {
 		/*
 		 * dump_nvlist prints to stdout, but that's been
 		 * redirected to a file.  Make it print to stderr
 		 * instead.
 		 */
 		(void) dup2(STDERR_FILENO, STDOUT_FILENO);
 		dump_nvlist(dbgnv, 0);
 		nvlist_free(dbgnv);
 	}
 	zfs_close(zhp);
 
 	return (err != 0);
 }
 
 /*
  * Restore a backup stream from stdin.
  */
 static int
 zfs_do_receive(int argc, char **argv)
 {
 	int c, err = 0;
 	recvflags_t flags = { 0 };
 	boolean_t abort_resumable = B_FALSE;
 
 	nvlist_t *props;
 	nvpair_t *nvp = NULL;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":o:denuvFsA")) != -1) {
 		switch (c) {
 		case 'o':
 			if (parseprop(props, optarg) != 0)
 				return (1);
 			break;
 		case 'd':
 			flags.isprefix = B_TRUE;
 			break;
 		case 'e':
 			flags.isprefix = B_TRUE;
 			flags.istail = B_TRUE;
 			break;
 		case 'n':
 			flags.dryrun = B_TRUE;
 			break;
 		case 'u':
 			flags.nomount = B_TRUE;
 			break;
 		case 'v':
 			flags.verbose = B_TRUE;
 			break;
 		case 's':
 			flags.resumable = B_TRUE;
 			break;
 		case 'F':
 			flags.force = B_TRUE;
 			break;
 		case 'A':
 			abort_resumable = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	while ((nvp = nvlist_next_nvpair(props, nvp))) {
 		if (strcmp(nvpair_name(nvp), "origin") != 0) {
 			(void) fprintf(stderr, gettext("invalid option"));
 			usage(B_FALSE);
 		}
 	}
 
 	if (abort_resumable) {
 		if (flags.isprefix || flags.istail || flags.dryrun ||
 		    flags.resumable || flags.nomount) {
 			(void) fprintf(stderr, gettext("invalid option"));
 			usage(B_FALSE);
 		}
 
 		char namebuf[ZFS_MAX_DATASET_NAME_LEN];
 		(void) snprintf(namebuf, sizeof (namebuf),
 		    "%s/%%recv", argv[0]);
 
 		if (zfs_dataset_exists(g_zfs, namebuf,
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) {
 			zfs_handle_t *zhp = zfs_open(g_zfs,
 			    namebuf, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 			if (zhp == NULL)
 				return (1);
 			err = zfs_destroy(zhp, B_FALSE);
 		} else {
 			zfs_handle_t *zhp = zfs_open(g_zfs,
 			    argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 			if (zhp == NULL)
 				usage(B_FALSE);
 			if (!zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) ||
 			    zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
 			    NULL, 0, NULL, NULL, 0, B_TRUE) == -1) {
 				(void) fprintf(stderr,
 				    gettext("'%s' does not have any "
 				    "resumable receive state to abort\n"),
 				    argv[0]);
 				return (1);
 			}
 			err = zfs_destroy(zhp, B_FALSE);
 		}
 
 		return (err != 0);
 	}
 
 	if (isatty(STDIN_FILENO)) {
 		(void) fprintf(stderr,
 		    gettext("Error: Backup stream can not be read "
 		    "from a terminal.\n"
 		    "You must redirect standard input.\n"));
 		return (1);
 	}
 	err = zfs_receive(g_zfs, argv[0], props, &flags, STDIN_FILENO, NULL);
 
 	return (err != 0);
 }
 
 /*
  * allow/unallow stuff
  */
 /* copied from zfs/sys/dsl_deleg.h */
 #define	ZFS_DELEG_PERM_CREATE		"create"
 #define	ZFS_DELEG_PERM_DESTROY		"destroy"
 #define	ZFS_DELEG_PERM_SNAPSHOT		"snapshot"
 #define	ZFS_DELEG_PERM_ROLLBACK		"rollback"
 #define	ZFS_DELEG_PERM_CLONE		"clone"
 #define	ZFS_DELEG_PERM_PROMOTE		"promote"
 #define	ZFS_DELEG_PERM_RENAME		"rename"
 #define	ZFS_DELEG_PERM_MOUNT		"mount"
 #define	ZFS_DELEG_PERM_SHARE		"share"
 #define	ZFS_DELEG_PERM_SEND		"send"
 #define	ZFS_DELEG_PERM_RECEIVE		"receive"
 #define	ZFS_DELEG_PERM_ALLOW		"allow"
 #define	ZFS_DELEG_PERM_USERPROP		"userprop"
 #define	ZFS_DELEG_PERM_VSCAN		"vscan" /* ??? */
 #define	ZFS_DELEG_PERM_USERQUOTA	"userquota"
 #define	ZFS_DELEG_PERM_GROUPQUOTA	"groupquota"
 #define	ZFS_DELEG_PERM_USERUSED		"userused"
 #define	ZFS_DELEG_PERM_GROUPUSED	"groupused"
 #define	ZFS_DELEG_PERM_HOLD		"hold"
 #define	ZFS_DELEG_PERM_RELEASE		"release"
 #define	ZFS_DELEG_PERM_DIFF		"diff"
 #define	ZFS_DELEG_PERM_BOOKMARK		"bookmark"
 #define	ZFS_DELEG_PERM_REMAP		"remap"
 
 #define	ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE
 
 static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = {
 	{ ZFS_DELEG_PERM_ALLOW, ZFS_DELEG_NOTE_ALLOW },
 	{ ZFS_DELEG_PERM_CLONE, ZFS_DELEG_NOTE_CLONE },
 	{ ZFS_DELEG_PERM_CREATE, ZFS_DELEG_NOTE_CREATE },
 	{ ZFS_DELEG_PERM_DESTROY, ZFS_DELEG_NOTE_DESTROY },
 	{ ZFS_DELEG_PERM_DIFF, ZFS_DELEG_NOTE_DIFF},
 	{ ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD },
 	{ ZFS_DELEG_PERM_MOUNT, ZFS_DELEG_NOTE_MOUNT },
 	{ ZFS_DELEG_PERM_PROMOTE, ZFS_DELEG_NOTE_PROMOTE },
 	{ ZFS_DELEG_PERM_RECEIVE, ZFS_DELEG_NOTE_RECEIVE },
 	{ ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE },
 	{ ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME },
 	{ ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
 	{ ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND },
 	{ ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
 	{ ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
 	{ ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK },
 	{ ZFS_DELEG_PERM_REMAP, ZFS_DELEG_NOTE_REMAP },
 
 	{ ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
 	{ ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED },
 	{ ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
 	{ ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
 	{ ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED },
 	{ NULL, ZFS_DELEG_NOTE_NONE }
 };
 
 /* permission structure */
 typedef struct deleg_perm {
 	zfs_deleg_who_type_t	dp_who_type;
 	const char		*dp_name;
 	boolean_t		dp_local;
 	boolean_t		dp_descend;
 } deleg_perm_t;
 
 /* */
 typedef struct deleg_perm_node {
 	deleg_perm_t		dpn_perm;
 
 	uu_avl_node_t		dpn_avl_node;
 } deleg_perm_node_t;
 
 typedef struct fs_perm fs_perm_t;
 
 /* permissions set */
 typedef struct who_perm {
 	zfs_deleg_who_type_t	who_type;
 	const char		*who_name;		/* id */
 	char			who_ug_name[256];	/* user/group name */
 	fs_perm_t		*who_fsperm;		/* uplink */
 
 	uu_avl_t		*who_deleg_perm_avl;	/* permissions */
 } who_perm_t;
 
 /* */
 typedef struct who_perm_node {
 	who_perm_t	who_perm;
 	uu_avl_node_t	who_avl_node;
 } who_perm_node_t;
 
 typedef struct fs_perm_set fs_perm_set_t;
 /* fs permissions */
 struct fs_perm {
 	const char		*fsp_name;
 
 	uu_avl_t		*fsp_sc_avl;	/* sets,create */
 	uu_avl_t		*fsp_uge_avl;	/* user,group,everyone */
 
 	fs_perm_set_t		*fsp_set;	/* uplink */
 };
 
 /* */
 typedef struct fs_perm_node {
 	fs_perm_t	fspn_fsperm;
 	uu_avl_t	*fspn_avl;
 
 	uu_list_node_t	fspn_list_node;
 } fs_perm_node_t;
 
 /* top level structure */
 struct fs_perm_set {
 	uu_list_pool_t	*fsps_list_pool;
 	uu_list_t	*fsps_list; /* list of fs_perms */
 
 	uu_avl_pool_t	*fsps_named_set_avl_pool;
 	uu_avl_pool_t	*fsps_who_perm_avl_pool;
 	uu_avl_pool_t	*fsps_deleg_perm_avl_pool;
 };
 
 static inline const char *
 deleg_perm_type(zfs_deleg_note_t note)
 {
 	/* subcommands */
 	switch (note) {
 		/* SUBCOMMANDS */
 		/* OTHER */
 	case ZFS_DELEG_NOTE_GROUPQUOTA:
 	case ZFS_DELEG_NOTE_GROUPUSED:
 	case ZFS_DELEG_NOTE_USERPROP:
 	case ZFS_DELEG_NOTE_USERQUOTA:
 	case ZFS_DELEG_NOTE_USERUSED:
 		/* other */
 		return (gettext("other"));
 	default:
 		return (gettext("subcommand"));
 	}
 }
 
 static int
 who_type2weight(zfs_deleg_who_type_t who_type)
 {
 	int res;
 	switch (who_type) {
 		case ZFS_DELEG_NAMED_SET_SETS:
 		case ZFS_DELEG_NAMED_SET:
 			res = 0;
 			break;
 		case ZFS_DELEG_CREATE_SETS:
 		case ZFS_DELEG_CREATE:
 			res = 1;
 			break;
 		case ZFS_DELEG_USER_SETS:
 		case ZFS_DELEG_USER:
 			res = 2;
 			break;
 		case ZFS_DELEG_GROUP_SETS:
 		case ZFS_DELEG_GROUP:
 			res = 3;
 			break;
 		case ZFS_DELEG_EVERYONE_SETS:
 		case ZFS_DELEG_EVERYONE:
 			res = 4;
 			break;
 		default:
 			res = -1;
 	}
 
 	return (res);
 }
 
 /* ARGSUSED */
 static int
 who_perm_compare(const void *larg, const void *rarg, void *unused)
 {
 	const who_perm_node_t *l = larg;
 	const who_perm_node_t *r = rarg;
 	zfs_deleg_who_type_t ltype = l->who_perm.who_type;
 	zfs_deleg_who_type_t rtype = r->who_perm.who_type;
 	int lweight = who_type2weight(ltype);
 	int rweight = who_type2weight(rtype);
 	int res = lweight - rweight;
 	if (res == 0)
 		res = strncmp(l->who_perm.who_name, r->who_perm.who_name,
 		    ZFS_MAX_DELEG_NAME-1);
 
 	if (res == 0)
 		return (0);
 	if (res > 0)
 		return (1);
 	else
 		return (-1);
 }
 
 /* ARGSUSED */
 static int
 deleg_perm_compare(const void *larg, const void *rarg, void *unused)
 {
 	const deleg_perm_node_t *l = larg;
 	const deleg_perm_node_t *r = rarg;
 	int res =  strncmp(l->dpn_perm.dp_name, r->dpn_perm.dp_name,
 	    ZFS_MAX_DELEG_NAME-1);
 
 	if (res == 0)
 		return (0);
 
 	if (res > 0)
 		return (1);
 	else
 		return (-1);
 }
 
 static inline void
 fs_perm_set_init(fs_perm_set_t *fspset)
 {
 	bzero(fspset, sizeof (fs_perm_set_t));
 
 	if ((fspset->fsps_list_pool = uu_list_pool_create("fsps_list_pool",
 	    sizeof (fs_perm_node_t), offsetof(fs_perm_node_t, fspn_list_node),
 	    NULL, UU_DEFAULT)) == NULL)
 		nomem();
 	if ((fspset->fsps_list = uu_list_create(fspset->fsps_list_pool, NULL,
 	    UU_DEFAULT)) == NULL)
 		nomem();
 
 	if ((fspset->fsps_named_set_avl_pool = uu_avl_pool_create(
 	    "named_set_avl_pool", sizeof (who_perm_node_t), offsetof(
 	    who_perm_node_t, who_avl_node), who_perm_compare,
 	    UU_DEFAULT)) == NULL)
 		nomem();
 
 	if ((fspset->fsps_who_perm_avl_pool = uu_avl_pool_create(
 	    "who_perm_avl_pool", sizeof (who_perm_node_t), offsetof(
 	    who_perm_node_t, who_avl_node), who_perm_compare,
 	    UU_DEFAULT)) == NULL)
 		nomem();
 
 	if ((fspset->fsps_deleg_perm_avl_pool = uu_avl_pool_create(
 	    "deleg_perm_avl_pool", sizeof (deleg_perm_node_t), offsetof(
 	    deleg_perm_node_t, dpn_avl_node), deleg_perm_compare, UU_DEFAULT))
 	    == NULL)
 		nomem();
 }
 
 static inline void fs_perm_fini(fs_perm_t *);
 static inline void who_perm_fini(who_perm_t *);
 
 static inline void
 fs_perm_set_fini(fs_perm_set_t *fspset)
 {
 	fs_perm_node_t *node = uu_list_first(fspset->fsps_list);
 
 	while (node != NULL) {
 		fs_perm_node_t *next_node =
 		    uu_list_next(fspset->fsps_list, node);
 		fs_perm_t *fsperm = &node->fspn_fsperm;
 		fs_perm_fini(fsperm);
 		uu_list_remove(fspset->fsps_list, node);
 		free(node);
 		node = next_node;
 	}
 
 	uu_avl_pool_destroy(fspset->fsps_named_set_avl_pool);
 	uu_avl_pool_destroy(fspset->fsps_who_perm_avl_pool);
 	uu_avl_pool_destroy(fspset->fsps_deleg_perm_avl_pool);
 }
 
 static inline void
 deleg_perm_init(deleg_perm_t *deleg_perm, zfs_deleg_who_type_t type,
     const char *name)
 {
 	deleg_perm->dp_who_type = type;
 	deleg_perm->dp_name = name;
 }
 
 static inline void
 who_perm_init(who_perm_t *who_perm, fs_perm_t *fsperm,
     zfs_deleg_who_type_t type, const char *name)
 {
 	uu_avl_pool_t	*pool;
 	pool = fsperm->fsp_set->fsps_deleg_perm_avl_pool;
 
 	bzero(who_perm, sizeof (who_perm_t));
 
 	if ((who_perm->who_deleg_perm_avl = uu_avl_create(pool, NULL,
 	    UU_DEFAULT)) == NULL)
 		nomem();
 
 	who_perm->who_type = type;
 	who_perm->who_name = name;
 	who_perm->who_fsperm = fsperm;
 }
 
 static inline void
 who_perm_fini(who_perm_t *who_perm)
 {
 	deleg_perm_node_t *node = uu_avl_first(who_perm->who_deleg_perm_avl);
 
 	while (node != NULL) {
 		deleg_perm_node_t *next_node =
 		    uu_avl_next(who_perm->who_deleg_perm_avl, node);
 
 		uu_avl_remove(who_perm->who_deleg_perm_avl, node);
 		free(node);
 		node = next_node;
 	}
 
 	uu_avl_destroy(who_perm->who_deleg_perm_avl);
 }
 
 static inline void
 fs_perm_init(fs_perm_t *fsperm, fs_perm_set_t *fspset, const char *fsname)
 {
 	uu_avl_pool_t	*nset_pool = fspset->fsps_named_set_avl_pool;
 	uu_avl_pool_t	*who_pool = fspset->fsps_who_perm_avl_pool;
 
 	bzero(fsperm, sizeof (fs_perm_t));
 
 	if ((fsperm->fsp_sc_avl = uu_avl_create(nset_pool, NULL, UU_DEFAULT))
 	    == NULL)
 		nomem();
 
 	if ((fsperm->fsp_uge_avl = uu_avl_create(who_pool, NULL, UU_DEFAULT))
 	    == NULL)
 		nomem();
 
 	fsperm->fsp_set = fspset;
 	fsperm->fsp_name = fsname;
 }
 
 static inline void
 fs_perm_fini(fs_perm_t *fsperm)
 {
 	who_perm_node_t *node = uu_avl_first(fsperm->fsp_sc_avl);
 	while (node != NULL) {
 		who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_sc_avl,
 		    node);
 		who_perm_t *who_perm = &node->who_perm;
 		who_perm_fini(who_perm);
 		uu_avl_remove(fsperm->fsp_sc_avl, node);
 		free(node);
 		node = next_node;
 	}
 
 	node = uu_avl_first(fsperm->fsp_uge_avl);
 	while (node != NULL) {
 		who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_uge_avl,
 		    node);
 		who_perm_t *who_perm = &node->who_perm;
 		who_perm_fini(who_perm);
 		uu_avl_remove(fsperm->fsp_uge_avl, node);
 		free(node);
 		node = next_node;
 	}
 
 	uu_avl_destroy(fsperm->fsp_sc_avl);
 	uu_avl_destroy(fsperm->fsp_uge_avl);
 }
 
 static void
 set_deleg_perm_node(uu_avl_t *avl, deleg_perm_node_t *node,
     zfs_deleg_who_type_t who_type, const char *name, char locality)
 {
 	uu_avl_index_t idx = 0;
 
 	deleg_perm_node_t *found_node = NULL;
 	deleg_perm_t	*deleg_perm = &node->dpn_perm;
 
 	deleg_perm_init(deleg_perm, who_type, name);
 
 	if ((found_node = uu_avl_find(avl, node, NULL, &idx))
 	    == NULL)
 		uu_avl_insert(avl, node, idx);
 	else {
 		node = found_node;
 		deleg_perm = &node->dpn_perm;
 	}
 
 
 	switch (locality) {
 	case ZFS_DELEG_LOCAL:
 		deleg_perm->dp_local = B_TRUE;
 		break;
 	case ZFS_DELEG_DESCENDENT:
 		deleg_perm->dp_descend = B_TRUE;
 		break;
 	case ZFS_DELEG_NA:
 		break;
 	default:
 		assert(B_FALSE); /* invalid locality */
 	}
 }
 
 static inline int
 parse_who_perm(who_perm_t *who_perm, nvlist_t *nvl, char locality)
 {
 	nvpair_t *nvp = NULL;
 	fs_perm_set_t *fspset = who_perm->who_fsperm->fsp_set;
 	uu_avl_t *avl = who_perm->who_deleg_perm_avl;
 	zfs_deleg_who_type_t who_type = who_perm->who_type;
 
 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 		const char *name = nvpair_name(nvp);
 		data_type_t type = nvpair_type(nvp);
 		uu_avl_pool_t *avl_pool = fspset->fsps_deleg_perm_avl_pool;
 		deleg_perm_node_t *node =
 		    safe_malloc(sizeof (deleg_perm_node_t));
 
 		assert(type == DATA_TYPE_BOOLEAN);
 
 		uu_avl_node_init(node, &node->dpn_avl_node, avl_pool);
 		set_deleg_perm_node(avl, node, who_type, name, locality);
 	}
 
 	return (0);
 }
 
 static inline int
 parse_fs_perm(fs_perm_t *fsperm, nvlist_t *nvl)
 {
 	nvpair_t *nvp = NULL;
 	fs_perm_set_t *fspset = fsperm->fsp_set;
 
 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 		nvlist_t *nvl2 = NULL;
 		const char *name = nvpair_name(nvp);
 		uu_avl_t *avl = NULL;
 		uu_avl_pool_t *avl_pool = NULL;
 		zfs_deleg_who_type_t perm_type = name[0];
 		char perm_locality = name[1];
 		const char *perm_name = name + 3;
 		boolean_t is_set = B_TRUE;
 		who_perm_t *who_perm = NULL;
 
 		assert('$' == name[2]);
 
 		if (nvpair_value_nvlist(nvp, &nvl2) != 0)
 			return (-1);
 
 		switch (perm_type) {
 		case ZFS_DELEG_CREATE:
 		case ZFS_DELEG_CREATE_SETS:
 		case ZFS_DELEG_NAMED_SET:
 		case ZFS_DELEG_NAMED_SET_SETS:
 			avl_pool = fspset->fsps_named_set_avl_pool;
 			avl = fsperm->fsp_sc_avl;
 			break;
 		case ZFS_DELEG_USER:
 		case ZFS_DELEG_USER_SETS:
 		case ZFS_DELEG_GROUP:
 		case ZFS_DELEG_GROUP_SETS:
 		case ZFS_DELEG_EVERYONE:
 		case ZFS_DELEG_EVERYONE_SETS:
 			avl_pool = fspset->fsps_who_perm_avl_pool;
 			avl = fsperm->fsp_uge_avl;
 			break;
 
 		default:
 			assert(!"unhandled zfs_deleg_who_type_t");
 		}
 
 		if (is_set) {
 			who_perm_node_t *found_node = NULL;
 			who_perm_node_t *node = safe_malloc(
 			    sizeof (who_perm_node_t));
 			who_perm = &node->who_perm;
 			uu_avl_index_t idx = 0;
 
 			uu_avl_node_init(node, &node->who_avl_node, avl_pool);
 			who_perm_init(who_perm, fsperm, perm_type, perm_name);
 
 			if ((found_node = uu_avl_find(avl, node, NULL, &idx))
 			    == NULL) {
 				if (avl == fsperm->fsp_uge_avl) {
 					uid_t rid = 0;
 					struct passwd *p = NULL;
 					struct group *g = NULL;
 					const char *nice_name = NULL;
 
 					switch (perm_type) {
 					case ZFS_DELEG_USER_SETS:
 					case ZFS_DELEG_USER:
 						rid = atoi(perm_name);
 						p = getpwuid(rid);
 						if (p)
 							nice_name = p->pw_name;
 						break;
 					case ZFS_DELEG_GROUP_SETS:
 					case ZFS_DELEG_GROUP:
 						rid = atoi(perm_name);
 						g = getgrgid(rid);
 						if (g)
 							nice_name = g->gr_name;
 						break;
 
 					default:
 						break;
 					}
 
 					if (nice_name != NULL)
 						(void) strlcpy(
 						    node->who_perm.who_ug_name,
 						    nice_name, 256);
 				}
 
 				uu_avl_insert(avl, node, idx);
 			} else {
 				node = found_node;
 				who_perm = &node->who_perm;
 			}
 		}
 
 		(void) parse_who_perm(who_perm, nvl2, perm_locality);
 	}
 
 	return (0);
 }
 
 static inline int
 parse_fs_perm_set(fs_perm_set_t *fspset, nvlist_t *nvl)
 {
 	nvpair_t *nvp = NULL;
 	uu_avl_index_t idx = 0;
 
 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 		nvlist_t *nvl2 = NULL;
 		const char *fsname = nvpair_name(nvp);
 		data_type_t type = nvpair_type(nvp);
 		fs_perm_t *fsperm = NULL;
 		fs_perm_node_t *node = safe_malloc(sizeof (fs_perm_node_t));
 		if (node == NULL)
 			nomem();
 
 		fsperm = &node->fspn_fsperm;
 
 		assert(DATA_TYPE_NVLIST == type);
 
 		uu_list_node_init(node, &node->fspn_list_node,
 		    fspset->fsps_list_pool);
 
 		idx = uu_list_numnodes(fspset->fsps_list);
 		fs_perm_init(fsperm, fspset, fsname);
 
 		if (nvpair_value_nvlist(nvp, &nvl2) != 0)
 			return (-1);
 
 		(void) parse_fs_perm(fsperm, nvl2);
 
 		uu_list_insert(fspset->fsps_list, node, idx);
 	}
 
 	return (0);
 }
 
 static inline const char *
 deleg_perm_comment(zfs_deleg_note_t note)
 {
 	const char *str = "";
 
 	/* subcommands */
 	switch (note) {
 		/* SUBCOMMANDS */
 	case ZFS_DELEG_NOTE_ALLOW:
 		str = gettext("Must also have the permission that is being"
 		    "\n\t\t\t\tallowed");
 		break;
 	case ZFS_DELEG_NOTE_CLONE:
 		str = gettext("Must also have the 'create' ability and 'mount'"
 		    "\n\t\t\t\tability in the origin file system");
 		break;
 	case ZFS_DELEG_NOTE_CREATE:
 		str = gettext("Must also have the 'mount' ability");
 		break;
 	case ZFS_DELEG_NOTE_DESTROY:
 		str = gettext("Must also have the 'mount' ability");
 		break;
 	case ZFS_DELEG_NOTE_DIFF:
 		str = gettext("Allows lookup of paths within a dataset;"
 		    "\n\t\t\t\tgiven an object number. Ordinary users need this"
 		    "\n\t\t\t\tin order to use zfs diff");
 		break;
 	case ZFS_DELEG_NOTE_HOLD:
 		str = gettext("Allows adding a user hold to a snapshot");
 		break;
 	case ZFS_DELEG_NOTE_MOUNT:
 		str = gettext("Allows mount/umount of ZFS datasets");
 		break;
 	case ZFS_DELEG_NOTE_PROMOTE:
 		str = gettext("Must also have the 'mount'\n\t\t\t\tand"
 		    " 'promote' ability in the origin file system");
 		break;
 	case ZFS_DELEG_NOTE_RECEIVE:
 		str = gettext("Must also have the 'mount' and 'create'"
 		    " ability");
 		break;
 	case ZFS_DELEG_NOTE_RELEASE:
 		str = gettext("Allows releasing a user hold which\n\t\t\t\t"
 		    "might destroy the snapshot");
 		break;
 	case ZFS_DELEG_NOTE_RENAME:
 		str = gettext("Must also have the 'mount' and 'create'"
 		    "\n\t\t\t\tability in the new parent");
 		break;
 	case ZFS_DELEG_NOTE_ROLLBACK:
 		str = gettext("");
 		break;
 	case ZFS_DELEG_NOTE_SEND:
 		str = gettext("");
 		break;
 	case ZFS_DELEG_NOTE_SHARE:
 		str = gettext("Allows sharing file systems over NFS or SMB"
 		    "\n\t\t\t\tprotocols");
 		break;
 	case ZFS_DELEG_NOTE_SNAPSHOT:
 		str = gettext("");
 		break;
 /*
  *	case ZFS_DELEG_NOTE_VSCAN:
  *		str = gettext("");
  *		break;
  */
 		/* OTHER */
 	case ZFS_DELEG_NOTE_GROUPQUOTA:
 		str = gettext("Allows accessing any groupquota@... property");
 		break;
 	case ZFS_DELEG_NOTE_GROUPUSED:
 		str = gettext("Allows reading any groupused@... property");
 		break;
 	case ZFS_DELEG_NOTE_USERPROP:
 		str = gettext("Allows changing any user property");
 		break;
 	case ZFS_DELEG_NOTE_USERQUOTA:
 		str = gettext("Allows accessing any userquota@... property");
 		break;
 	case ZFS_DELEG_NOTE_USERUSED:
 		str = gettext("Allows reading any userused@... property");
 		break;
 		/* other */
 	default:
 		str = "";
 	}
 
 	return (str);
 }
 
 struct allow_opts {
 	boolean_t local;
 	boolean_t descend;
 	boolean_t user;
 	boolean_t group;
 	boolean_t everyone;
 	boolean_t create;
 	boolean_t set;
 	boolean_t recursive; /* unallow only */
 	boolean_t prt_usage;
 
 	boolean_t prt_perms;
 	char *who;
 	char *perms;
 	const char *dataset;
 };
 
 static inline int
 prop_cmp(const void *a, const void *b)
 {
 	const char *str1 = *(const char **)a;
 	const char *str2 = *(const char **)b;
 	return (strcmp(str1, str2));
 }
 
 static void
 allow_usage(boolean_t un, boolean_t requested, const char *msg)
 {
 	const char *opt_desc[] = {
 		"-h", gettext("show this help message and exit"),
 		"-l", gettext("set permission locally"),
 		"-d", gettext("set permission for descents"),
 		"-u", gettext("set permission for user"),
 		"-g", gettext("set permission for group"),
 		"-e", gettext("set permission for everyone"),
 		"-c", gettext("set create time permission"),
 		"-s", gettext("define permission set"),
 		/* unallow only */
 		"-r", gettext("remove permissions recursively"),
 	};
 	size_t unallow_size = sizeof (opt_desc) / sizeof (char *);
 	size_t allow_size = unallow_size - 2;
 	const char *props[ZFS_NUM_PROPS];
 	int i;
 	size_t count = 0;
 	FILE *fp = requested ? stdout : stderr;
 	zprop_desc_t *pdtbl = zfs_prop_get_table();
 	const char *fmt = gettext("%-16s %-14s\t%s\n");
 
 	(void) fprintf(fp, gettext("Usage: %s\n"), get_usage(un ? HELP_UNALLOW :
 	    HELP_ALLOW));
 	(void) fprintf(fp, gettext("Options:\n"));
 	for (i = 0; i < (un ? unallow_size : allow_size); i++) {
 		const char *opt = opt_desc[i++];
 		const char *optdsc = opt_desc[i];
 		(void) fprintf(fp, gettext("  %-10s  %s\n"), opt, optdsc);
 	}
 
 	(void) fprintf(fp, gettext("\nThe following permissions are "
 	    "supported:\n\n"));
 	(void) fprintf(fp, fmt, gettext("NAME"), gettext("TYPE"),
 	    gettext("NOTES"));
 	for (i = 0; i < ZFS_NUM_DELEG_NOTES; i++) {
 		const char *perm_name = zfs_deleg_perm_tbl[i].z_perm;
 		zfs_deleg_note_t perm_note = zfs_deleg_perm_tbl[i].z_note;
 		const char *perm_type = deleg_perm_type(perm_note);
 		const char *perm_comment = deleg_perm_comment(perm_note);
 		(void) fprintf(fp, fmt, perm_name, perm_type, perm_comment);
 	}
 
 	for (i = 0; i < ZFS_NUM_PROPS; i++) {
 		zprop_desc_t *pd = &pdtbl[i];
 		if (pd->pd_visible != B_TRUE)
 			continue;
 
 		if (pd->pd_attr == PROP_READONLY)
 			continue;
 
 		props[count++] = pd->pd_name;
 	}
 	props[count] = NULL;
 
 	qsort(props, count, sizeof (char *), prop_cmp);
 
 	for (i = 0; i < count; i++)
 		(void) fprintf(fp, fmt, props[i], gettext("property"), "");
 
 	if (msg != NULL)
 		(void) fprintf(fp, gettext("\nzfs: error: %s"), msg);
 
 	exit(requested ? 0 : 2);
 }
 
 static inline const char *
 munge_args(int argc, char **argv, boolean_t un, size_t expected_argc,
     char **permsp)
 {
 	if (un && argc == expected_argc - 1)
 		*permsp = NULL;
 	else if (argc == expected_argc)
 		*permsp = argv[argc - 2];
 	else
 		allow_usage(un, B_FALSE,
 		    gettext("wrong number of parameters\n"));
 
 	return (argv[argc - 1]);
 }
 
 static void
 parse_allow_args(int argc, char **argv, boolean_t un, struct allow_opts *opts)
 {
 	int uge_sum = opts->user + opts->group + opts->everyone;
 	int csuge_sum = opts->create + opts->set + uge_sum;
 	int ldcsuge_sum = csuge_sum + opts->local + opts->descend;
 	int all_sum = un ? ldcsuge_sum + opts->recursive : ldcsuge_sum;
 
 	if (uge_sum > 1)
 		allow_usage(un, B_FALSE,
 		    gettext("-u, -g, and -e are mutually exclusive\n"));
 
 	if (opts->prt_usage) {
 		if (argc == 0 && all_sum == 0)
 			allow_usage(un, B_TRUE, NULL);
 		else
 			usage(B_FALSE);
 	}
 
 	if (opts->set) {
 		if (csuge_sum > 1)
 			allow_usage(un, B_FALSE,
 			    gettext("invalid options combined with -s\n"));
 
 		opts->dataset = munge_args(argc, argv, un, 3, &opts->perms);
 		if (argv[0][0] != '@')
 			allow_usage(un, B_FALSE,
 			    gettext("invalid set name: missing '@' prefix\n"));
 		opts->who = argv[0];
 	} else if (opts->create) {
 		if (ldcsuge_sum > 1)
 			allow_usage(un, B_FALSE,
 			    gettext("invalid options combined with -c\n"));
 		opts->dataset = munge_args(argc, argv, un, 2, &opts->perms);
 	} else if (opts->everyone) {
 		if (csuge_sum > 1)
 			allow_usage(un, B_FALSE,
 			    gettext("invalid options combined with -e\n"));
 		opts->dataset = munge_args(argc, argv, un, 2, &opts->perms);
 	} else if (uge_sum == 0 && argc > 0 && strcmp(argv[0], "everyone")
 	    == 0) {
 		opts->everyone = B_TRUE;
 		argc--;
 		argv++;
 		opts->dataset = munge_args(argc, argv, un, 2, &opts->perms);
 	} else if (argc == 1 && !un) {
 		opts->prt_perms = B_TRUE;
 		opts->dataset = argv[argc-1];
 	} else {
 		opts->dataset = munge_args(argc, argv, un, 3, &opts->perms);
 		opts->who = argv[0];
 	}
 
 	if (!opts->local && !opts->descend) {
 		opts->local = B_TRUE;
 		opts->descend = B_TRUE;
 	}
 }
 
 static void
 store_allow_perm(zfs_deleg_who_type_t type, boolean_t local, boolean_t descend,
     const char *who, char *perms, nvlist_t *top_nvl)
 {
 	int i;
 	char ld[2] = { '\0', '\0' };
 	char who_buf[MAXNAMELEN + 32];
 	char base_type = '\0';
 	char set_type = '\0';
 	nvlist_t *base_nvl = NULL;
 	nvlist_t *set_nvl = NULL;
 	nvlist_t *nvl;
 
 	if (nvlist_alloc(&base_nvl, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 	if (nvlist_alloc(&set_nvl, NV_UNIQUE_NAME, 0) !=  0)
 		nomem();
 
 	switch (type) {
 	case ZFS_DELEG_NAMED_SET_SETS:
 	case ZFS_DELEG_NAMED_SET:
 		set_type = ZFS_DELEG_NAMED_SET_SETS;
 		base_type = ZFS_DELEG_NAMED_SET;
 		ld[0] = ZFS_DELEG_NA;
 		break;
 	case ZFS_DELEG_CREATE_SETS:
 	case ZFS_DELEG_CREATE:
 		set_type = ZFS_DELEG_CREATE_SETS;
 		base_type = ZFS_DELEG_CREATE;
 		ld[0] = ZFS_DELEG_NA;
 		break;
 	case ZFS_DELEG_USER_SETS:
 	case ZFS_DELEG_USER:
 		set_type = ZFS_DELEG_USER_SETS;
 		base_type = ZFS_DELEG_USER;
 		if (local)
 			ld[0] = ZFS_DELEG_LOCAL;
 		if (descend)
 			ld[1] = ZFS_DELEG_DESCENDENT;
 		break;
 	case ZFS_DELEG_GROUP_SETS:
 	case ZFS_DELEG_GROUP:
 		set_type = ZFS_DELEG_GROUP_SETS;
 		base_type = ZFS_DELEG_GROUP;
 		if (local)
 			ld[0] = ZFS_DELEG_LOCAL;
 		if (descend)
 			ld[1] = ZFS_DELEG_DESCENDENT;
 		break;
 	case ZFS_DELEG_EVERYONE_SETS:
 	case ZFS_DELEG_EVERYONE:
 		set_type = ZFS_DELEG_EVERYONE_SETS;
 		base_type = ZFS_DELEG_EVERYONE;
 		if (local)
 			ld[0] = ZFS_DELEG_LOCAL;
 		if (descend)
 			ld[1] = ZFS_DELEG_DESCENDENT;
 		break;
 
 	default:
 		assert(set_type != '\0' && base_type != '\0');
 	}
 
 	if (perms != NULL) {
 		char *curr = perms;
 		char *end = curr + strlen(perms);
 
 		while (curr < end) {
 			char *delim = strchr(curr, ',');
 			if (delim == NULL)
 				delim = end;
 			else
 				*delim = '\0';
 
 			if (curr[0] == '@')
 				nvl = set_nvl;
 			else
 				nvl = base_nvl;
 
 			(void) nvlist_add_boolean(nvl, curr);
 			if (delim != end)
 				*delim = ',';
 			curr = delim + 1;
 		}
 
 		for (i = 0; i < 2; i++) {
 			char locality = ld[i];
 			if (locality == 0)
 				continue;
 
 			if (!nvlist_empty(base_nvl)) {
 				if (who != NULL)
 					(void) snprintf(who_buf,
 					    sizeof (who_buf), "%c%c$%s",
 					    base_type, locality, who);
 				else
 					(void) snprintf(who_buf,
 					    sizeof (who_buf), "%c%c$",
 					    base_type, locality);
 
 				(void) nvlist_add_nvlist(top_nvl, who_buf,
 				    base_nvl);
 			}
 
 
 			if (!nvlist_empty(set_nvl)) {
 				if (who != NULL)
 					(void) snprintf(who_buf,
 					    sizeof (who_buf), "%c%c$%s",
 					    set_type, locality, who);
 				else
 					(void) snprintf(who_buf,
 					    sizeof (who_buf), "%c%c$",
 					    set_type, locality);
 
 				(void) nvlist_add_nvlist(top_nvl, who_buf,
 				    set_nvl);
 			}
 		}
 	} else {
 		for (i = 0; i < 2; i++) {
 			char locality = ld[i];
 			if (locality == 0)
 				continue;
 
 			if (who != NULL)
 				(void) snprintf(who_buf, sizeof (who_buf),
 				    "%c%c$%s", base_type, locality, who);
 			else
 				(void) snprintf(who_buf, sizeof (who_buf),
 				    "%c%c$", base_type, locality);
 			(void) nvlist_add_boolean(top_nvl, who_buf);
 
 			if (who != NULL)
 				(void) snprintf(who_buf, sizeof (who_buf),
 				    "%c%c$%s", set_type, locality, who);
 			else
 				(void) snprintf(who_buf, sizeof (who_buf),
 				    "%c%c$", set_type, locality);
 			(void) nvlist_add_boolean(top_nvl, who_buf);
 		}
 	}
 }
 
 static int
 construct_fsacl_list(boolean_t un, struct allow_opts *opts, nvlist_t **nvlp)
 {
 	if (nvlist_alloc(nvlp, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	if (opts->set) {
 		store_allow_perm(ZFS_DELEG_NAMED_SET, opts->local,
 		    opts->descend, opts->who, opts->perms, *nvlp);
 	} else if (opts->create) {
 		store_allow_perm(ZFS_DELEG_CREATE, opts->local,
 		    opts->descend, NULL, opts->perms, *nvlp);
 	} else if (opts->everyone) {
 		store_allow_perm(ZFS_DELEG_EVERYONE, opts->local,
 		    opts->descend, NULL, opts->perms, *nvlp);
 	} else {
 		char *curr = opts->who;
 		char *end = curr + strlen(curr);
 
 		while (curr < end) {
 			const char *who;
 			zfs_deleg_who_type_t who_type = ZFS_DELEG_WHO_UNKNOWN;
 			char *endch;
 			char *delim = strchr(curr, ',');
 			char errbuf[256];
 			char id[64];
 			struct passwd *p = NULL;
 			struct group *g = NULL;
 
 			uid_t rid;
 			if (delim == NULL)
 				delim = end;
 			else
 				*delim = '\0';
 
 			rid = (uid_t)strtol(curr, &endch, 0);
 			if (opts->user) {
 				who_type = ZFS_DELEG_USER;
 				if (*endch != '\0')
 					p = getpwnam(curr);
 				else
 					p = getpwuid(rid);
 
 				if (p != NULL)
 					rid = p->pw_uid;
 				else {
 					(void) snprintf(errbuf, 256, gettext(
 					    "invalid user %s"), curr);
 					allow_usage(un, B_TRUE, errbuf);
 				}
 			} else if (opts->group) {
 				who_type = ZFS_DELEG_GROUP;
 				if (*endch != '\0')
 					g = getgrnam(curr);
 				else
 					g = getgrgid(rid);
 
 				if (g != NULL)
 					rid = g->gr_gid;
 				else {
 					(void) snprintf(errbuf, 256, gettext(
 					    "invalid group %s"),  curr);
 					allow_usage(un, B_TRUE, errbuf);
 				}
 			} else {
 				if (*endch != '\0') {
 					p = getpwnam(curr);
 				} else {
 					p = getpwuid(rid);
 				}
 
 				if (p == NULL) {
 					if (*endch != '\0') {
 						g = getgrnam(curr);
 					} else {
 						g = getgrgid(rid);
 					}
 				}
 
 				if (p != NULL) {
 					who_type = ZFS_DELEG_USER;
 					rid = p->pw_uid;
 				} else if (g != NULL) {
 					who_type = ZFS_DELEG_GROUP;
 					rid = g->gr_gid;
 				} else {
 					(void) snprintf(errbuf, 256, gettext(
 					    "invalid user/group %s"), curr);
 					allow_usage(un, B_TRUE, errbuf);
 				}
 			}
 
 			(void) sprintf(id, "%u", rid);
 			who = id;
 
 			store_allow_perm(who_type, opts->local,
 			    opts->descend, who, opts->perms, *nvlp);
 			curr = delim + 1;
 		}
 	}
 
 	return (0);
 }
 
 static void
 print_set_creat_perms(uu_avl_t *who_avl)
 {
 	const char *sc_title[] = {
 		gettext("Permission sets:\n"),
 		gettext("Create time permissions:\n"),
 		NULL
 	};
 	const char **title_ptr = sc_title;
 	who_perm_node_t *who_node = NULL;
 	int prev_weight = -1;
 
 	for (who_node = uu_avl_first(who_avl); who_node != NULL;
 	    who_node = uu_avl_next(who_avl, who_node)) {
 		uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl;
 		zfs_deleg_who_type_t who_type = who_node->who_perm.who_type;
 		const char *who_name = who_node->who_perm.who_name;
 		int weight = who_type2weight(who_type);
 		boolean_t first = B_TRUE;
 		deleg_perm_node_t *deleg_node;
 
 		if (prev_weight != weight) {
 			(void) printf(*title_ptr++);
 			prev_weight = weight;
 		}
 
 		if (who_name == NULL || strnlen(who_name, 1) == 0)
 			(void) printf("\t");
 		else
 			(void) printf("\t%s ", who_name);
 
 		for (deleg_node = uu_avl_first(avl); deleg_node != NULL;
 		    deleg_node = uu_avl_next(avl, deleg_node)) {
 			if (first) {
 				(void) printf("%s",
 				    deleg_node->dpn_perm.dp_name);
 				first = B_FALSE;
 			} else
 				(void) printf(",%s",
 				    deleg_node->dpn_perm.dp_name);
 		}
 
 		(void) printf("\n");
 	}
 }
 
 static void
 print_uge_deleg_perms(uu_avl_t *who_avl, boolean_t local, boolean_t descend,
     const char *title)
 {
 	who_perm_node_t *who_node = NULL;
 	boolean_t prt_title = B_TRUE;
 	uu_avl_walk_t *walk;
 
 	if ((walk = uu_avl_walk_start(who_avl, UU_WALK_ROBUST)) == NULL)
 		nomem();
 
 	while ((who_node = uu_avl_walk_next(walk)) != NULL) {
 		const char *who_name = who_node->who_perm.who_name;
 		const char *nice_who_name = who_node->who_perm.who_ug_name;
 		uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl;
 		zfs_deleg_who_type_t who_type = who_node->who_perm.who_type;
 		char delim = ' ';
 		deleg_perm_node_t *deleg_node;
 		boolean_t prt_who = B_TRUE;
 
 		for (deleg_node = uu_avl_first(avl);
 		    deleg_node != NULL;
 		    deleg_node = uu_avl_next(avl, deleg_node)) {
 			if (local != deleg_node->dpn_perm.dp_local ||
 			    descend != deleg_node->dpn_perm.dp_descend)
 				continue;
 
 			if (prt_who) {
 				const char *who = NULL;
 				if (prt_title) {
 					prt_title = B_FALSE;
 					(void) printf(title);
 				}
 
 				switch (who_type) {
 				case ZFS_DELEG_USER_SETS:
 				case ZFS_DELEG_USER:
 					who = gettext("user");
 					if (nice_who_name)
 						who_name  = nice_who_name;
 					break;
 				case ZFS_DELEG_GROUP_SETS:
 				case ZFS_DELEG_GROUP:
 					who = gettext("group");
 					if (nice_who_name)
 						who_name  = nice_who_name;
 					break;
 				case ZFS_DELEG_EVERYONE_SETS:
 				case ZFS_DELEG_EVERYONE:
 					who = gettext("everyone");
 					who_name = NULL;
 					break;
 
 				default:
 					assert(who != NULL);
 				}
 
 				prt_who = B_FALSE;
 				if (who_name == NULL)
 					(void) printf("\t%s", who);
 				else
 					(void) printf("\t%s %s", who, who_name);
 			}
 
 			(void) printf("%c%s", delim,
 			    deleg_node->dpn_perm.dp_name);
 			delim = ',';
 		}
 
 		if (!prt_who)
 			(void) printf("\n");
 	}
 
 	uu_avl_walk_end(walk);
 }
 
 static void
 print_fs_perms(fs_perm_set_t *fspset)
 {
 	fs_perm_node_t *node = NULL;
 	char buf[MAXNAMELEN + 32];
 	const char *dsname = buf;
 
 	for (node = uu_list_first(fspset->fsps_list); node != NULL;
 	    node = uu_list_next(fspset->fsps_list, node)) {
 		uu_avl_t *sc_avl = node->fspn_fsperm.fsp_sc_avl;
 		uu_avl_t *uge_avl = node->fspn_fsperm.fsp_uge_avl;
 		int left = 0;
 
 		(void) snprintf(buf, sizeof (buf),
 		    gettext("---- Permissions on %s "),
 		    node->fspn_fsperm.fsp_name);
 		(void) printf(dsname);
 		left = 70 - strlen(buf);
 		while (left-- > 0)
 			(void) printf("-");
 		(void) printf("\n");
 
 		print_set_creat_perms(sc_avl);
 		print_uge_deleg_perms(uge_avl, B_TRUE, B_FALSE,
 		    gettext("Local permissions:\n"));
 		print_uge_deleg_perms(uge_avl, B_FALSE, B_TRUE,
 		    gettext("Descendent permissions:\n"));
 		print_uge_deleg_perms(uge_avl, B_TRUE, B_TRUE,
 		    gettext("Local+Descendent permissions:\n"));
 	}
 }
 
 static fs_perm_set_t fs_perm_set = { NULL, NULL, NULL, NULL };
 
 struct deleg_perms {
 	boolean_t un;
 	nvlist_t *nvl;
 };
 
 static int
 set_deleg_perms(zfs_handle_t *zhp, void *data)
 {
 	struct deleg_perms *perms = (struct deleg_perms *)data;
 	zfs_type_t zfs_type = zfs_get_type(zhp);
 
 	if (zfs_type != ZFS_TYPE_FILESYSTEM && zfs_type != ZFS_TYPE_VOLUME)
 		return (0);
 
 	return (zfs_set_fsacl(zhp, perms->un, perms->nvl));
 }
 
 static int
 zfs_do_allow_unallow_impl(int argc, char **argv, boolean_t un)
 {
 	zfs_handle_t *zhp;
 	nvlist_t *perm_nvl = NULL;
 	nvlist_t *update_perm_nvl = NULL;
 	int error = 1;
 	int c;
 	struct allow_opts opts = { 0 };
 
 	const char *optstr = un ? "ldugecsrh" : "ldugecsh";
 
 	/* check opts */
 	while ((c = getopt(argc, argv, optstr)) != -1) {
 		switch (c) {
 		case 'l':
 			opts.local = B_TRUE;
 			break;
 		case 'd':
 			opts.descend = B_TRUE;
 			break;
 		case 'u':
 			opts.user = B_TRUE;
 			break;
 		case 'g':
 			opts.group = B_TRUE;
 			break;
 		case 'e':
 			opts.everyone = B_TRUE;
 			break;
 		case 's':
 			opts.set = B_TRUE;
 			break;
 		case 'c':
 			opts.create = B_TRUE;
 			break;
 		case 'r':
 			opts.recursive = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case 'h':
 			opts.prt_usage = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check arguments */
 	parse_allow_args(argc, argv, un, &opts);
 
 	/* try to open the dataset */
 	if ((zhp = zfs_open(g_zfs, opts.dataset, ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_VOLUME)) == NULL) {
 		(void) fprintf(stderr, "Failed to open dataset: %s\n",
 		    opts.dataset);
 		return (-1);
 	}
 
 	if (zfs_get_fsacl(zhp, &perm_nvl) != 0)
 		goto cleanup2;
 
 	fs_perm_set_init(&fs_perm_set);
 	if (parse_fs_perm_set(&fs_perm_set, perm_nvl) != 0) {
 		(void) fprintf(stderr, "Failed to parse fsacl permissions\n");
 		goto cleanup1;
 	}
 
 	if (opts.prt_perms)
 		print_fs_perms(&fs_perm_set);
 	else {
 		(void) construct_fsacl_list(un, &opts, &update_perm_nvl);
 		if (zfs_set_fsacl(zhp, un, update_perm_nvl) != 0)
 			goto cleanup0;
 
 		if (un && opts.recursive) {
 			struct deleg_perms data = { un, update_perm_nvl };
 			if (zfs_iter_filesystems(zhp, set_deleg_perms,
 			    &data) != 0)
 				goto cleanup0;
 		}
 	}
 
 	error = 0;
 
 cleanup0:
 	nvlist_free(perm_nvl);
 	nvlist_free(update_perm_nvl);
 cleanup1:
 	fs_perm_set_fini(&fs_perm_set);
 cleanup2:
 	zfs_close(zhp);
 
 	return (error);
 }
 
 static int
 zfs_do_allow(int argc, char **argv)
 {
 	return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE));
 }
 
 static int
 zfs_do_unallow(int argc, char **argv)
 {
 	return (zfs_do_allow_unallow_impl(argc, argv, B_TRUE));
 }
 
 static int
 zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
 {
 	int errors = 0;
 	int i;
 	const char *tag;
 	boolean_t recursive = B_FALSE;
 	const char *opts = holding ? "rt" : "r";
 	int c;
 
 	/* check options */
 	while ((c = getopt(argc, argv, opts)) != -1) {
 		switch (c) {
 		case 'r':
 			recursive = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 2)
 		usage(B_FALSE);
 
 	tag = argv[0];
 	--argc;
 	++argv;
 
 	if (holding && tag[0] == '.') {
 		/* tags starting with '.' are reserved for libzfs */
 		(void) fprintf(stderr, gettext("tag may not start with '.'\n"));
 		usage(B_FALSE);
 	}
 
 	for (i = 0; i < argc; ++i) {
 		zfs_handle_t *zhp;
 		char parent[ZFS_MAX_DATASET_NAME_LEN];
 		const char *delim;
 		char *path = argv[i];
 
 		delim = strchr(path, '@');
 		if (delim == NULL) {
 			(void) fprintf(stderr,
 			    gettext("'%s' is not a snapshot\n"), path);
 			++errors;
 			continue;
 		}
 		(void) strncpy(parent, path, delim - path);
 		parent[delim - path] = '\0';
 
 		zhp = zfs_open(g_zfs, parent,
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 		if (zhp == NULL) {
 			++errors;
 			continue;
 		}
 		if (holding) {
 			if (zfs_hold(zhp, delim+1, tag, recursive, -1) != 0)
 				++errors;
 		} else {
 			if (zfs_release(zhp, delim+1, tag, recursive) != 0)
 				++errors;
 		}
 		zfs_close(zhp);
 	}
 
 	return (errors != 0);
 }
 
 /*
  * zfs hold [-r] [-t] <tag> <snap> ...
  *
  *	-r	Recursively hold
  *
  * Apply a user-hold with the given tag to the list of snapshots.
  */
 static int
 zfs_do_hold(int argc, char **argv)
 {
 	return (zfs_do_hold_rele_impl(argc, argv, B_TRUE));
 }
 
 /*
  * zfs release [-r] <tag> <snap> ...
  *
  *	-r	Recursively release
  *
  * Release a user-hold with the given tag from the list of snapshots.
  */
 static int
 zfs_do_release(int argc, char **argv)
 {
 	return (zfs_do_hold_rele_impl(argc, argv, B_FALSE));
 }
 
 typedef struct holds_cbdata {
 	boolean_t	cb_recursive;
 	const char	*cb_snapname;
 	nvlist_t	**cb_nvlp;
 	size_t		cb_max_namelen;
 	size_t		cb_max_taglen;
 } holds_cbdata_t;
 
 #define	STRFTIME_FMT_STR "%a %b %e %k:%M %Y"
 #define	DATETIME_BUF_LEN (32)
 /*
  *
  */
 static void
 print_holds(boolean_t scripted, boolean_t literal, size_t nwidth,
     size_t tagwidth, nvlist_t *nvl)
 {
 	int i;
 	nvpair_t *nvp = NULL;
 	char *hdr_cols[] = { "NAME", "TAG", "TIMESTAMP" };
 	const char *col;
 
 	if (!scripted) {
 		for (i = 0; i < 3; i++) {
 			col = gettext(hdr_cols[i]);
 			if (i < 2)
 				(void) printf("%-*s  ", i ? tagwidth : nwidth,
 				    col);
 			else
 				(void) printf("%s\n", col);
 		}
 	}
 
 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 		char *zname = nvpair_name(nvp);
 		nvlist_t *nvl2;
 		nvpair_t *nvp2 = NULL;
 		(void) nvpair_value_nvlist(nvp, &nvl2);
 		while ((nvp2 = nvlist_next_nvpair(nvl2, nvp2)) != NULL) {
 			char tsbuf[DATETIME_BUF_LEN];
 			char *tagname = nvpair_name(nvp2);
 			uint64_t val = 0;
 			time_t time;
 			struct tm t;
 
 			(void) nvpair_value_uint64(nvp2, &val);
 			if (literal)
 				snprintf(tsbuf, DATETIME_BUF_LEN, "%llu", val);
 			else {
 				time = (time_t)val;
 				(void) localtime_r(&time, &t);
 				(void) strftime(tsbuf, DATETIME_BUF_LEN,
 				    gettext(STRFTIME_FMT_STR), &t);
 			}
 
 			if (scripted) {
 				(void) printf("%s\t%s\t%s\n", zname,
 				    tagname, tsbuf);
 			} else {
 				(void) printf("%-*s  %-*s  %s\n", nwidth,
 				    zname, tagwidth, tagname, tsbuf);
 			}
 		}
 	}
 }
 
 /*
  * Generic callback function to list a dataset or snapshot.
  */
 static int
 holds_callback(zfs_handle_t *zhp, void *data)
 {
 	holds_cbdata_t *cbp = data;
 	nvlist_t *top_nvl = *cbp->cb_nvlp;
 	nvlist_t *nvl = NULL;
 	nvpair_t *nvp = NULL;
 	const char *zname = zfs_get_name(zhp);
 	size_t znamelen = strlen(zname);
 
 	if (cbp->cb_recursive && cbp->cb_snapname != NULL) {
 		const char *snapname;
 		char *delim  = strchr(zname, '@');
 		if (delim == NULL)
 			return (0);
 
 		snapname = delim + 1;
 		if (strcmp(cbp->cb_snapname, snapname))
 			return (0);
 	}
 
 	if (zfs_get_holds(zhp, &nvl) != 0)
 		return (-1);
 
 	if (znamelen > cbp->cb_max_namelen)
 		cbp->cb_max_namelen  = znamelen;
 
 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 		const char *tag = nvpair_name(nvp);
 		size_t taglen = strlen(tag);
 		if (taglen > cbp->cb_max_taglen)
 			cbp->cb_max_taglen  = taglen;
 	}
 
 	return (nvlist_add_nvlist(top_nvl, zname, nvl));
 }
 
 /*
  * zfs holds [-Hp] [-r | -d max] <dataset|snap> ...
  *
  *	-H	Suppress header output
  *	-p	Output literal values
  *	-r	Recursively search for holds
  *	-d max	Limit depth of recursive search
  */
 static int
 zfs_do_holds(int argc, char **argv)
 {
 	int errors = 0;
 	int c;
 	int i;
 	boolean_t scripted = B_FALSE;
 	boolean_t literal = B_FALSE;
 	boolean_t recursive = B_FALSE;
 	const char *opts = "d:rHp";
 	nvlist_t *nvl;
 
 	int types = ZFS_TYPE_SNAPSHOT;
 	holds_cbdata_t cb = { 0 };
 
 	int limit = 0;
 	int ret = 0;
 	int flags = 0;
 
 	/* check options */
 	while ((c = getopt(argc, argv, opts)) != -1) {
 		switch (c) {
 		case 'd':
 			limit = parse_depth(optarg, &flags);
 			recursive = B_TRUE;
 			break;
 		case 'r':
 			recursive = B_TRUE;
 			break;
 		case 'H':
 			scripted = B_TRUE;
 			break;
 		case 'p':
 			literal = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	if (recursive) {
 		types |= ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME;
 		flags |= ZFS_ITER_RECURSE;
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1)
 		usage(B_FALSE);
 
 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	for (i = 0; i < argc; ++i) {
 		char *snapshot = argv[i];
 		const char *delim;
 		const char *snapname = NULL;
 
 		delim = strchr(snapshot, '@');
 		if (delim != NULL) {
 			snapname = delim + 1;
 			if (recursive)
 				snapshot[delim - snapshot] = '\0';
 		}
 
 		cb.cb_recursive = recursive;
 		cb.cb_snapname = snapname;
 		cb.cb_nvlp = &nvl;
 
 		/*
 		 *  1. collect holds data, set format options
 		 */
 		ret = zfs_for_each(argc, argv, flags, types, NULL, NULL, limit,
 		    holds_callback, &cb);
 		if (ret != 0)
 			++errors;
 	}
 
 	/*
 	 *  2. print holds data
 	 */
 	print_holds(scripted, literal, cb.cb_max_namelen, cb.cb_max_taglen,
 	    nvl);
 
 	if (nvlist_empty(nvl))
 		(void) printf(gettext("no datasets available\n"));
 
 	nvlist_free(nvl);
 
 	return (0 != errors);
 }
 
 #define	CHECK_SPINNER 30
 #define	SPINNER_TIME 3		/* seconds */
 #define	MOUNT_TIME 1		/* seconds */
 
 typedef struct get_all_state {
 	boolean_t	ga_verbose;
 	get_all_cb_t	*ga_cbp;
 } get_all_state_t;
 
 static int
 get_one_dataset(zfs_handle_t *zhp, void *data)
 {
 	static char *spin[] = { "-", "\\", "|", "/" };
 	static int spinval = 0;
 	static int spincheck = 0;
 	static time_t last_spin_time = (time_t)0;
 	get_all_state_t *state = data;
 	zfs_type_t type = zfs_get_type(zhp);
 
 	if (state->ga_verbose) {
 		if (--spincheck < 0) {
 			time_t now = time(NULL);
 			if (last_spin_time + SPINNER_TIME < now) {
 				update_progress(spin[spinval++ % 4]);
 				last_spin_time = now;
 			}
 			spincheck = CHECK_SPINNER;
 		}
 	}
 
 	/*
 	 * Interate over any nested datasets.
 	 */
 	if (zfs_iter_filesystems(zhp, get_one_dataset, data) != 0) {
 		zfs_close(zhp);
 		return (1);
 	}
 
 	/*
 	 * Skip any datasets whose type does not match.
 	 */
 	if ((type & ZFS_TYPE_FILESYSTEM) == 0) {
 		zfs_close(zhp);
 		return (0);
 	}
 	libzfs_add_handle(state->ga_cbp, zhp);
 	assert(state->ga_cbp->cb_used <= state->ga_cbp->cb_alloc);
 
 	return (0);
 }
 
 static void
 get_all_datasets(get_all_cb_t *cbp, boolean_t verbose)
 {
 	get_all_state_t state = {
 		.ga_verbose = verbose,
 		.ga_cbp = cbp
 	};
 
 	if (verbose)
 		set_progress_header(gettext("Reading ZFS config"));
 	(void) zfs_iter_root(g_zfs, get_one_dataset, &state);
 
 	if (verbose)
 		finish_progress(gettext("done."));
 }
 
 /*
  * Generic callback for sharing or mounting filesystems.  Because the code is so
  * similar, we have a common function with an extra parameter to determine which
  * mode we are using.
  */
 typedef enum { OP_SHARE, OP_MOUNT } share_mount_op_t;
 
 typedef struct share_mount_state {
 	share_mount_op_t	sm_op;
 	boolean_t	sm_verbose;
 	int	sm_flags;
 	char	*sm_options;
 	char	*sm_proto; /* only valid for OP_SHARE */
 	pthread_mutex_t	sm_lock; /* protects the remaining fields */
 	uint_t	sm_total; /* number of filesystems to process */
 	uint_t	sm_done; /* number of filesystems processed */
 	int	sm_status; /* -1 if any of the share/mount operations failed */
 } share_mount_state_t;
 
 /*
  * Share or mount a dataset.
  */
 static int
 share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
     boolean_t explicit, const char *options)
 {
 	char mountpoint[ZFS_MAXPROPLEN];
 	char shareopts[ZFS_MAXPROPLEN];
 	char smbshareopts[ZFS_MAXPROPLEN];
 	const char *cmdname = op == OP_SHARE ? "share" : "mount";
 	struct mnttab mnt;
 	uint64_t zoned, canmount;
 	boolean_t shared_nfs, shared_smb;
 
 	assert(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM);
 
 	/*
 	 * Check to make sure we can mount/share this dataset.  If we
 	 * are in the global zone and the filesystem is exported to a
 	 * local zone, or if we are in a local zone and the
 	 * filesystem is not exported, then it is an error.
 	 */
 	zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
 
 	if (zoned && getzoneid() == GLOBAL_ZONEID) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "dataset is exported to a local zone\n"), cmdname,
 		    zfs_get_name(zhp));
 		return (1);
 
 	} else if (!zoned && getzoneid() != GLOBAL_ZONEID) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "permission denied\n"), cmdname,
 		    zfs_get_name(zhp));
 		return (1);
 	}
 
 	/*
 	 * Ignore any filesystems which don't apply to us. This
 	 * includes those with a legacy mountpoint, or those with
 	 * legacy share options.
 	 */
 	verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
 	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
 	verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts,
 	    sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
 	verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshareopts,
 	    sizeof (smbshareopts), NULL, NULL, 0, B_FALSE) == 0);
 
 	if (op == OP_SHARE && strcmp(shareopts, "off") == 0 &&
 	    strcmp(smbshareopts, "off") == 0) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot share '%s': "
 		    "legacy share\n"), zfs_get_name(zhp));
 		(void) fprintf(stderr, gettext("to "
 		    "share this filesystem set "
 		    "sharenfs property on\n"));
 		return (1);
 	}
 
 	/*
 	 * We cannot share or mount legacy filesystems. If the
 	 * shareopts is non-legacy but the mountpoint is legacy, we
 	 * treat it as a legacy share.
 	 */
 	if (strcmp(mountpoint, "legacy") == 0) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "legacy mountpoint\n"), cmdname, zfs_get_name(zhp));
 		(void) fprintf(stderr, gettext("use %s(8) to "
 		    "%s this filesystem\n"), cmdname, cmdname);
 		return (1);
 	}
 
 	if (strcmp(mountpoint, "none") == 0) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': no "
 		    "mountpoint set\n"), cmdname, zfs_get_name(zhp));
 		return (1);
 	}
 
 	/*
 	 * canmount	explicit	outcome
 	 * on		no		pass through
 	 * on		yes		pass through
 	 * off		no		return 0
 	 * off		yes		display error, return 1
 	 * noauto	no		return 0
 	 * noauto	yes		pass through
 	 */
 	canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
 	if (canmount == ZFS_CANMOUNT_OFF) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "'canmount' property is set to 'off'\n"), cmdname,
 		    zfs_get_name(zhp));
 		return (1);
 	} else if (canmount == ZFS_CANMOUNT_NOAUTO && !explicit) {
 		return (0);
 	}
 
 	/*
 	 * If this filesystem is inconsistent and has a receive resume
 	 * token, we can not mount it.
 	 */
 	if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&
 	    zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
 	    NULL, 0, NULL, NULL, 0, B_TRUE) == 0) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "Contains partially-completed state from "
 		    "\"zfs receive -r\", which can be resumed with "
 		    "\"zfs send -t\"\n"),
 		    cmdname, zfs_get_name(zhp));
 		return (1);
 	}
 
 	/*
 	 * At this point, we have verified that the mountpoint and/or
 	 * shareopts are appropriate for auto management. If the
 	 * filesystem is already mounted or shared, return (failing
 	 * for explicit requests); otherwise mount or share the
 	 * filesystem.
 	 */
 	switch (op) {
 	case OP_SHARE:
 
 		shared_nfs = zfs_is_shared_nfs(zhp, NULL);
 		shared_smb = zfs_is_shared_smb(zhp, NULL);
 
 		if ((shared_nfs && shared_smb) ||
 		    (shared_nfs && strcmp(shareopts, "on") == 0 &&
 		    strcmp(smbshareopts, "off") == 0) ||
 		    (shared_smb && strcmp(smbshareopts, "on") == 0 &&
 		    strcmp(shareopts, "off") == 0)) {
 			if (!explicit)
 				return (0);
 
 			(void) fprintf(stderr, gettext("cannot share "
 			    "'%s': filesystem already shared\n"),
 			    zfs_get_name(zhp));
 			return (1);
 		}
 
 		if (!zfs_is_mounted(zhp, NULL) &&
 		    zfs_mount(zhp, NULL, 0) != 0)
 			return (1);
 
 		if (protocol == NULL) {
 			if (zfs_shareall(zhp) != 0)
 				return (1);
 		} else if (strcmp(protocol, "nfs") == 0) {
 			if (zfs_share_nfs(zhp))
 				return (1);
 		} else if (strcmp(protocol, "smb") == 0) {
 			if (zfs_share_smb(zhp))
 				return (1);
 		} else {
 			(void) fprintf(stderr, gettext("cannot share "
 			    "'%s': invalid share type '%s' "
 			    "specified\n"),
 			    zfs_get_name(zhp), protocol);
 			return (1);
 		}
 
 		break;
 
 	case OP_MOUNT:
 		if (options == NULL)
 			mnt.mnt_mntopts = "";
 		else
 			mnt.mnt_mntopts = (char *)options;
 
 		if (!hasmntopt(&mnt, MNTOPT_REMOUNT) &&
 		    zfs_is_mounted(zhp, NULL)) {
 			if (!explicit)
 				return (0);
 
 			(void) fprintf(stderr, gettext("cannot mount "
 			    "'%s': filesystem already mounted\n"),
 			    zfs_get_name(zhp));
 			return (1);
 		}
 
 		if (zfs_mount(zhp, options, flags) != 0)
 			return (1);
 		break;
 	}
 
 	return (0);
 }
 
 /*
  * Reports progress in the form "(current/total)".  Not thread-safe.
  */
 static void
 report_mount_progress(int current, int total)
 {
 	static time_t last_progress_time = 0;
 	time_t now = time(NULL);
 	char info[32];
 
 	/* display header if we're here for the first time */
 	if (current == 1) {
 		set_progress_header(gettext("Mounting ZFS filesystems"));
 	} else if (current != total && last_progress_time + MOUNT_TIME >= now) {
 		/* too soon to report again */
 		return;
 	}
 
 	last_progress_time = now;
 
 	(void) sprintf(info, "(%d/%d)", current, total);
 
 	if (current == total)
 		finish_progress(info);
 	else
 		update_progress(info);
 }
 
 /*
  * zfs_foreach_mountpoint() callback that mounts or shares on filesystem and
  * updates the progress meter
  */
 static int
 share_mount_one_cb(zfs_handle_t *zhp, void *arg)
 {
 	share_mount_state_t *sms = arg;
 	int ret;
 
 	ret = share_mount_one(zhp, sms->sm_op, sms->sm_flags, sms->sm_proto,
 	    B_FALSE, sms->sm_options);
 
 	pthread_mutex_lock(&sms->sm_lock);
 	if (ret != 0)
 		sms->sm_status = ret;
 	sms->sm_done++;
 	if (sms->sm_verbose)
 		report_mount_progress(sms->sm_done, sms->sm_total);
 	pthread_mutex_unlock(&sms->sm_lock);
 	return (ret);
 }
 
 static void
 append_options(char *mntopts, char *newopts)
 {
 	int len = strlen(mntopts);
 
 	/* original length plus new string to append plus 1 for the comma */
 	if (len + 1 + strlen(newopts) >= MNT_LINE_MAX) {
 		(void) fprintf(stderr, gettext("the opts argument for "
 		    "'%c' option is too long (more than %d chars)\n"),
 		    "-o", MNT_LINE_MAX);
 		usage(B_FALSE);
 	}
 
 	if (*mntopts)
 		mntopts[len++] = ',';
 
 	(void) strcpy(&mntopts[len], newopts);
 }
 
 static int
 share_mount(int op, int argc, char **argv)
 {
 	int do_all = 0;
 	boolean_t verbose = B_FALSE;
 	int c, ret = 0;
 	char *options = NULL;
 	int flags = 0;
 
 	/* check options */
 	while ((c = getopt(argc, argv, op == OP_MOUNT ? ":avo:O" : "a"))
 	    != -1) {
 		switch (c) {
 		case 'a':
 			do_all = 1;
 			break;
 		case 'v':
 			verbose = B_TRUE;
 			break;
 		case 'o':
 			if (*optarg == '\0') {
 				(void) fprintf(stderr, gettext("empty mount "
 				    "options (-o) specified\n"));
 				usage(B_FALSE);
 			}
 
 			if (options == NULL)
 				options = safe_malloc(MNT_LINE_MAX + 1);
 
 			/* option validation is done later */
 			append_options(options, optarg);
 			break;
 
 		case 'O':
 			warnx("no overlay mounts support on FreeBSD, ignoring");
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (do_all) {
 		char *protocol = NULL;
 
 		if (op == OP_SHARE && argc > 0) {
 			if (strcmp(argv[0], "nfs") != 0 &&
 			    strcmp(argv[0], "smb") != 0) {
 				(void) fprintf(stderr, gettext("share type "
 				    "must be 'nfs' or 'smb'\n"));
 				usage(B_FALSE);
 			}
 			protocol = argv[0];
 			argc--;
 			argv++;
 		}
 
 		if (argc != 0) {
 			(void) fprintf(stderr, gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 
 		start_progress_timer();
 		get_all_cb_t cb = { 0 };
 		get_all_datasets(&cb, verbose);
 
 		if (cb.cb_used == 0) {
 			if (options != NULL)
 				free(options);
 			return (0);
 		}
 
 #ifdef illumos
 		if (op == OP_SHARE) {
 			sa_init_selective_arg_t sharearg;
 			sharearg.zhandle_arr = cb.cb_handles;
 			sharearg.zhandle_len = cb.cb_used;
 			if ((ret = zfs_init_libshare_arg(g_zfs,
 			    SA_INIT_SHARE_API_SELECTIVE, &sharearg)) != SA_OK) {
 				(void) fprintf(stderr, gettext(
 				    "Could not initialize libshare, %d"), ret);
 				return (ret);
 			}
 		}
 #endif
 		share_mount_state_t share_mount_state = { 0 };
 		share_mount_state.sm_op = op;
 		share_mount_state.sm_verbose = verbose;
 		share_mount_state.sm_flags = flags;
 		share_mount_state.sm_options = options;
 		share_mount_state.sm_proto = protocol;
 		share_mount_state.sm_total = cb.cb_used;
 		pthread_mutex_init(&share_mount_state.sm_lock, NULL);
 
 		/*
 		 * libshare isn't mt-safe, so only do the operation in parallel
 		 * if we're mounting.
 		 */
 		zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used,
 		    share_mount_one_cb, &share_mount_state, op == OP_MOUNT);
 		ret = share_mount_state.sm_status;
 
 		for (int i = 0; i < cb.cb_used; i++)
 			zfs_close(cb.cb_handles[i]);
 		free(cb.cb_handles);
 	} else if (argc == 0) {
 		struct mnttab entry;
 
 		if ((op == OP_SHARE) || (options != NULL)) {
 			(void) fprintf(stderr, gettext("missing filesystem "
 			    "argument (specify -a for all)\n"));
 			usage(B_FALSE);
 		}
 
 		/*
 		 * When mount is given no arguments, go through /etc/mnttab and
 		 * display any active ZFS mounts.  We hide any snapshots, since
 		 * they are controlled automatically.
 		 */
 		rewind(mnttab_file);
 		while (getmntent(mnttab_file, &entry) == 0) {
 			if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0 ||
 			    strchr(entry.mnt_special, '@') != NULL)
 				continue;
 
 			(void) printf("%-30s  %s\n", entry.mnt_special,
 			    entry.mnt_mountp);
 		}
 
 	} else {
 		zfs_handle_t *zhp;
 
 		if (argc > 1) {
 			(void) fprintf(stderr,
 			    gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 
 		if ((zhp = zfs_open(g_zfs, argv[0],
 		    ZFS_TYPE_FILESYSTEM)) == NULL) {
 			ret = 1;
 		} else {
 			ret = share_mount_one(zhp, op, flags, NULL, B_TRUE,
 			    options);
 			zfs_close(zhp);
 		}
 	}
 
 	return (ret);
 }
 
 /*
  * zfs mount -a [nfs]
  * zfs mount filesystem
  *
  * Mount all filesystems, or mount the given filesystem.
  */
 static int
 zfs_do_mount(int argc, char **argv)
 {
 	return (share_mount(OP_MOUNT, argc, argv));
 }
 
 /*
  * zfs share -a [nfs | smb]
  * zfs share filesystem
  *
  * Share all filesystems, or share the given filesystem.
  */
 static int
 zfs_do_share(int argc, char **argv)
 {
 	return (share_mount(OP_SHARE, argc, argv));
 }
 
 typedef struct unshare_unmount_node {
 	zfs_handle_t	*un_zhp;
 	char		*un_mountp;
 	uu_avl_node_t	un_avlnode;
 } unshare_unmount_node_t;
 
 /* ARGSUSED */
 static int
 unshare_unmount_compare(const void *larg, const void *rarg, void *unused)
 {
 	const unshare_unmount_node_t *l = larg;
 	const unshare_unmount_node_t *r = rarg;
 
 	return (strcmp(l->un_mountp, r->un_mountp));
 }
 
 /*
  * Convenience routine used by zfs_do_umount() and manual_unmount().  Given an
  * absolute path, find the entry /etc/mnttab, verify that its a ZFS filesystem,
  * and unmount it appropriately.
  */
 static int
 unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual)
 {
 	zfs_handle_t *zhp;
 	int ret = 0;
 	struct stat64 statbuf;
 	struct extmnttab entry;
 	const char *cmdname = (op == OP_SHARE) ? "unshare" : "unmount";
 	ino_t path_inode;
 
 	/*
 	 * Search for the path in /etc/mnttab.  Rather than looking for the
 	 * specific path, which can be fooled by non-standard paths (i.e. ".."
 	 * or "//"), we stat() the path and search for the corresponding
 	 * (major,minor) device pair.
 	 */
 	if (stat64(path, &statbuf) != 0) {
 		(void) fprintf(stderr, gettext("cannot %s '%s': %s\n"),
 		    cmdname, path, strerror(errno));
 		return (1);
 	}
 	path_inode = statbuf.st_ino;
 
 	/*
 	 * Search for the given (major,minor) pair in the mount table.
 	 */
 #ifdef illumos
 	rewind(mnttab_file);
 	while ((ret = getextmntent(mnttab_file, &entry, 0)) == 0) {
 		if (entry.mnt_major == major(statbuf.st_dev) &&
 		    entry.mnt_minor == minor(statbuf.st_dev))
 			break;
 	}
 #else
 	{
 		struct statfs sfs;
 
 		if (statfs(path, &sfs) != 0) {
 			(void) fprintf(stderr, "%s: %s\n", path,
 			    strerror(errno));
 			ret = -1;
 		}
 		statfs2mnttab(&sfs, &entry);
 	}
 #endif
 	if (ret != 0) {
 		if (op == OP_SHARE) {
 			(void) fprintf(stderr, gettext("cannot %s '%s': not "
 			    "currently mounted\n"), cmdname, path);
 			return (1);
 		}
 		(void) fprintf(stderr, gettext("warning: %s not in mnttab\n"),
 		    path);
 		if ((ret = umount2(path, flags)) != 0)
 			(void) fprintf(stderr, gettext("%s: %s\n"), path,
 			    strerror(errno));
 		return (ret != 0);
 	}
 
 	if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) {
 		(void) fprintf(stderr, gettext("cannot %s '%s': not a ZFS "
 		    "filesystem\n"), cmdname, path);
 		return (1);
 	}
 
 	if ((zhp = zfs_open(g_zfs, entry.mnt_special,
 	    ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (1);
 
 	ret = 1;
 	if (stat64(entry.mnt_mountp, &statbuf) != 0) {
 		(void) fprintf(stderr, gettext("cannot %s '%s': %s\n"),
 		    cmdname, path, strerror(errno));
 		goto out;
 	} else if (statbuf.st_ino != path_inode) {
 		(void) fprintf(stderr, gettext("cannot "
 		    "%s '%s': not a mountpoint\n"), cmdname, path);
 		goto out;
 	}
 
 	if (op == OP_SHARE) {
 		char nfs_mnt_prop[ZFS_MAXPROPLEN];
 		char smbshare_prop[ZFS_MAXPROPLEN];
 
 		verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, nfs_mnt_prop,
 		    sizeof (nfs_mnt_prop), NULL, NULL, 0, B_FALSE) == 0);
 		verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshare_prop,
 		    sizeof (smbshare_prop), NULL, NULL, 0, B_FALSE) == 0);
 
 		if (strcmp(nfs_mnt_prop, "off") == 0 &&
 		    strcmp(smbshare_prop, "off") == 0) {
 			(void) fprintf(stderr, gettext("cannot unshare "
 			    "'%s': legacy share\n"), path);
 #ifdef illumos
 			(void) fprintf(stderr, gettext("use "
 			    "unshare(1M) to unshare this filesystem\n"));
 #endif
 		} else if (!zfs_is_shared(zhp)) {
 			(void) fprintf(stderr, gettext("cannot unshare '%s': "
 			    "not currently shared\n"), path);
 		} else {
 			ret = zfs_unshareall_bypath(zhp, path);
 		}
 	} else {
 		char mtpt_prop[ZFS_MAXPROPLEN];
 
 		verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mtpt_prop,
 		    sizeof (mtpt_prop), NULL, NULL, 0, B_FALSE) == 0);
 
 		if (is_manual) {
 			ret = zfs_unmount(zhp, NULL, flags);
 		} else if (strcmp(mtpt_prop, "legacy") == 0) {
 			(void) fprintf(stderr, gettext("cannot unmount "
 			    "'%s': legacy mountpoint\n"),
 			    zfs_get_name(zhp));
 			(void) fprintf(stderr, gettext("use umount(8) "
 			    "to unmount this filesystem\n"));
 		} else {
 			ret = zfs_unmountall(zhp, flags);
 		}
 	}
 
 out:
 	zfs_close(zhp);
 
 	return (ret != 0);
 }
 
 /*
  * Generic callback for unsharing or unmounting a filesystem.
  */
 static int
 unshare_unmount(int op, int argc, char **argv)
 {
 	int do_all = 0;
 	int flags = 0;
 	int ret = 0;
 	int c;
 	zfs_handle_t *zhp;
 	char nfs_mnt_prop[ZFS_MAXPROPLEN];
 	char sharesmb[ZFS_MAXPROPLEN];
 
 	/* check options */
 	while ((c = getopt(argc, argv, op == OP_SHARE ? "a" : "af")) != -1) {
 		switch (c) {
 		case 'a':
 			do_all = 1;
 			break;
 		case 'f':
 			flags = MS_FORCE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (do_all) {
 		/*
 		 * We could make use of zfs_for_each() to walk all datasets in
 		 * the system, but this would be very inefficient, especially
 		 * since we would have to linearly search /etc/mnttab for each
 		 * one.  Instead, do one pass through /etc/mnttab looking for
 		 * zfs entries and call zfs_unmount() for each one.
 		 *
 		 * Things get a little tricky if the administrator has created
 		 * mountpoints beneath other ZFS filesystems.  In this case, we
 		 * have to unmount the deepest filesystems first.  To accomplish
 		 * this, we place all the mountpoints in an AVL tree sorted by
 		 * the special type (dataset name), and walk the result in
 		 * reverse to make sure to get any snapshots first.
 		 */
 		struct mnttab entry;
 		uu_avl_pool_t *pool;
 		uu_avl_t *tree = NULL;
 		unshare_unmount_node_t *node;
 		uu_avl_index_t idx;
 		uu_avl_walk_t *walk;
 
 		if (argc != 0) {
 			(void) fprintf(stderr, gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 
 		if (((pool = uu_avl_pool_create("unmount_pool",
 		    sizeof (unshare_unmount_node_t),
 		    offsetof(unshare_unmount_node_t, un_avlnode),
 		    unshare_unmount_compare, UU_DEFAULT)) == NULL) ||
 		    ((tree = uu_avl_create(pool, NULL, UU_DEFAULT)) == NULL))
 			nomem();
 
 		rewind(mnttab_file);
 		while (getmntent(mnttab_file, &entry) == 0) {
 
 			/* ignore non-ZFS entries */
 			if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
 				continue;
 
 			/* ignore snapshots */
 			if (strchr(entry.mnt_special, '@') != NULL)
 				continue;
 
 			if ((zhp = zfs_open(g_zfs, entry.mnt_special,
 			    ZFS_TYPE_FILESYSTEM)) == NULL) {
 				ret = 1;
 				continue;
 			}
 
 			/*
 			 * Ignore datasets that are excluded/restricted by
 			 * parent pool name.
 			 */
 			if (zpool_skip_pool(zfs_get_pool_name(zhp))) {
 				zfs_close(zhp);
 				continue;
 			}
 
 			switch (op) {
 			case OP_SHARE:
 				verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS,
 				    nfs_mnt_prop,
 				    sizeof (nfs_mnt_prop),
 				    NULL, NULL, 0, B_FALSE) == 0);
 				if (strcmp(nfs_mnt_prop, "off") != 0)
 					break;
 				verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB,
 				    nfs_mnt_prop,
 				    sizeof (nfs_mnt_prop),
 				    NULL, NULL, 0, B_FALSE) == 0);
 				if (strcmp(nfs_mnt_prop, "off") == 0)
 					continue;
 				break;
 			case OP_MOUNT:
 				/* Ignore legacy mounts */
 				verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT,
 				    nfs_mnt_prop,
 				    sizeof (nfs_mnt_prop),
 				    NULL, NULL, 0, B_FALSE) == 0);
 				if (strcmp(nfs_mnt_prop, "legacy") == 0)
 					continue;
 				/* Ignore canmount=noauto mounts */
 				if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) ==
 				    ZFS_CANMOUNT_NOAUTO)
 					continue;
 			default:
 				break;
 			}
 
 			node = safe_malloc(sizeof (unshare_unmount_node_t));
 			node->un_zhp = zhp;
 			node->un_mountp = safe_strdup(entry.mnt_mountp);
 
 			uu_avl_node_init(node, &node->un_avlnode, pool);
 
 			if (uu_avl_find(tree, node, NULL, &idx) == NULL) {
 				uu_avl_insert(tree, node, idx);
 			} else {
 				zfs_close(node->un_zhp);
 				free(node->un_mountp);
 				free(node);
 			}
 		}
 
 		/*
 		 * Walk the AVL tree in reverse, unmounting each filesystem and
 		 * removing it from the AVL tree in the process.
 		 */
 		if ((walk = uu_avl_walk_start(tree,
 		    UU_WALK_REVERSE | UU_WALK_ROBUST)) == NULL)
 			nomem();
 
 		while ((node = uu_avl_walk_next(walk)) != NULL) {
 			uu_avl_remove(tree, node);
 
 			switch (op) {
 			case OP_SHARE:
 				if (zfs_unshareall_bypath(node->un_zhp,
 				    node->un_mountp) != 0)
 					ret = 1;
 				break;
 
 			case OP_MOUNT:
 				if (zfs_unmount(node->un_zhp,
 				    node->un_mountp, flags) != 0)
 					ret = 1;
 				break;
 			}
 
 			zfs_close(node->un_zhp);
 			free(node->un_mountp);
 			free(node);
 		}
 
 		uu_avl_walk_end(walk);
 		uu_avl_destroy(tree);
 		uu_avl_pool_destroy(pool);
 
 	} else {
 		if (argc != 1) {
 			if (argc == 0)
 				(void) fprintf(stderr,
 				    gettext("missing filesystem argument\n"));
 			else
 				(void) fprintf(stderr,
 				    gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 
 		/*
 		 * We have an argument, but it may be a full path or a ZFS
 		 * filesystem.  Pass full paths off to unmount_path() (shared by
 		 * manual_unmount), otherwise open the filesystem and pass to
 		 * zfs_unmount().
 		 */
 		if (argv[0][0] == '/')
 			return (unshare_unmount_path(op, argv[0],
 			    flags, B_FALSE));
 
 		if ((zhp = zfs_open(g_zfs, argv[0],
 		    ZFS_TYPE_FILESYSTEM)) == NULL)
 			return (1);
 
 		verify(zfs_prop_get(zhp, op == OP_SHARE ?
 		    ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT,
 		    nfs_mnt_prop, sizeof (nfs_mnt_prop), NULL,
 		    NULL, 0, B_FALSE) == 0);
 
 		switch (op) {
 		case OP_SHARE:
 			verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS,
 			    nfs_mnt_prop,
 			    sizeof (nfs_mnt_prop),
 			    NULL, NULL, 0, B_FALSE) == 0);
 			verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB,
 			    sharesmb, sizeof (sharesmb), NULL, NULL,
 			    0, B_FALSE) == 0);
 
 			if (strcmp(nfs_mnt_prop, "off") == 0 &&
 			    strcmp(sharesmb, "off") == 0) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "unshare '%s': legacy share\n"),
 				    zfs_get_name(zhp));
 #ifdef illumos
 				(void) fprintf(stderr, gettext("use "
 				    "unshare(1M) to unshare this "
 				    "filesystem\n"));
 #endif
 				ret = 1;
 			} else if (!zfs_is_shared(zhp)) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "unshare '%s': not currently "
 				    "shared\n"), zfs_get_name(zhp));
 				ret = 1;
 			} else if (zfs_unshareall(zhp) != 0) {
 				ret = 1;
 			}
 			break;
 
 		case OP_MOUNT:
 			if (strcmp(nfs_mnt_prop, "legacy") == 0) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "unmount '%s': legacy "
 				    "mountpoint\n"), zfs_get_name(zhp));
 				(void) fprintf(stderr, gettext("use "
 				    "umount(8) to unmount this "
 				    "filesystem\n"));
 				ret = 1;
 			} else if (!zfs_is_mounted(zhp, NULL)) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "unmount '%s': not currently "
 				    "mounted\n"),
 				    zfs_get_name(zhp));
 				ret = 1;
 			} else if (zfs_unmountall(zhp, flags) != 0) {
 				ret = 1;
 			}
 			break;
 		}
 
 		zfs_close(zhp);
 	}
 
 	return (ret);
 }
 
 /*
  * zfs unmount -a
  * zfs unmount filesystem
  *
  * Unmount all filesystems, or a specific ZFS filesystem.
  */
 static int
 zfs_do_unmount(int argc, char **argv)
 {
 	return (unshare_unmount(OP_MOUNT, argc, argv));
 }
 
 /*
  * zfs unshare -a
  * zfs unshare filesystem
  *
  * Unshare all filesystems, or a specific ZFS filesystem.
  */
 static int
 zfs_do_unshare(int argc, char **argv)
 {
 	return (unshare_unmount(OP_SHARE, argc, argv));
 }
 
 /*
  * Attach/detach the given dataset to/from the given jail
  */
 /* ARGSUSED */
 static int
 do_jail(int argc, char **argv, int attach)
 {
 	zfs_handle_t *zhp;
 	int jailid, ret;
 
 	/* check number of arguments */
 	if (argc < 3) {
 		(void) fprintf(stderr, gettext("missing argument(s)\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 3) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	jailid = jail_getid(argv[1]);
 	if (jailid < 0) {
 		(void) fprintf(stderr, gettext("invalid jail id or name\n"));
 		usage(B_FALSE);
 	}
 
 	zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM);
 	if (zhp == NULL)
 		return (1);
 
 	ret = (zfs_jail(zhp, jailid, attach) != 0);
 
 	zfs_close(zhp);
 	return (ret);
 }
 
 /*
  * zfs jail jailid filesystem
  *
  * Attach the given dataset to the given jail
  */
 /* ARGSUSED */
 static int
 zfs_do_jail(int argc, char **argv)
 {
 
 	return (do_jail(argc, argv, 1));
 }
 
 /*
  * zfs unjail jailid filesystem
  *
  * Detach the given dataset from the given jail
  */
 /* ARGSUSED */
 static int
 zfs_do_unjail(int argc, char **argv)
 {
 
 	return (do_jail(argc, argv, 0));
 }
 
 /*
  * Called when invoked as /etc/fs/zfs/mount.  Do the mount if the mountpoint is
  * 'legacy'.  Otherwise, complain that use should be using 'zfs mount'.
  */
 static int
 manual_mount(int argc, char **argv)
 {
 	zfs_handle_t *zhp;
 	char mountpoint[ZFS_MAXPROPLEN];
 	char mntopts[MNT_LINE_MAX] = { '\0' };
 	int ret = 0;
 	int c;
 	int flags = 0;
 	char *dataset, *path;
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":mo:O")) != -1) {
 		switch (c) {
 		case 'o':
 			(void) strlcpy(mntopts, optarg, sizeof (mntopts));
 			break;
 		case 'O':
 			flags |= MS_OVERLAY;
 			break;
 		case 'm':
 			flags |= MS_NOMNTTAB;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			(void) fprintf(stderr, gettext("usage: mount [-o opts] "
 			    "<path>\n"));
 			return (2);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check that we only have two arguments */
 	if (argc != 2) {
 		if (argc == 0)
 			(void) fprintf(stderr, gettext("missing dataset "
 			    "argument\n"));
 		else if (argc == 1)
 			(void) fprintf(stderr,
 			    gettext("missing mountpoint argument\n"));
 		else
 			(void) fprintf(stderr, gettext("too many arguments\n"));
 		(void) fprintf(stderr, "usage: mount <dataset> <mountpoint>\n");
 		return (2);
 	}
 
 	dataset = argv[0];
 	path = argv[1];
 
 	/* try to open the dataset */
 	if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (1);
 
 	(void) zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
 	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE);
 
 	/* check for legacy mountpoint and complain appropriately */
 	ret = 0;
 	if (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) == 0) {
 		if (zmount(dataset, path, flags, MNTTYPE_ZFS,
 		    NULL, 0, mntopts, sizeof (mntopts)) != 0) {
 			(void) fprintf(stderr, gettext("mount failed: %s\n"),
 			    strerror(errno));
 			ret = 1;
 		}
 	} else {
 		(void) fprintf(stderr, gettext("filesystem '%s' cannot be "
 		    "mounted using 'mount -t zfs'\n"), dataset);
 		(void) fprintf(stderr, gettext("Use 'zfs set mountpoint=%s' "
 		    "instead.\n"), path);
 		(void) fprintf(stderr, gettext("If you must use 'mount -t zfs' "
 		    "or /etc/fstab, use 'zfs set mountpoint=legacy'.\n"));
 		(void) fprintf(stderr, gettext("See zfs(8) for more "
 		    "information.\n"));
 		ret = 1;
 	}
 
 	return (ret);
 }
 
 /*
  * Called when invoked as /etc/fs/zfs/umount.  Unlike a manual mount, we allow
  * unmounts of non-legacy filesystems, as this is the dominant administrative
  * interface.
  */
 static int
 manual_unmount(int argc, char **argv)
 {
 	int flags = 0;
 	int c;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "f")) != -1) {
 		switch (c) {
 		case 'f':
 			flags = MS_FORCE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			(void) fprintf(stderr, gettext("usage: unmount [-f] "
 			    "<path>\n"));
 			return (2);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check arguments */
 	if (argc != 1) {
 		if (argc == 0)
 			(void) fprintf(stderr, gettext("missing path "
 			    "argument\n"));
 		else
 			(void) fprintf(stderr, gettext("too many arguments\n"));
 		(void) fprintf(stderr, gettext("usage: unmount [-f] <path>\n"));
 		return (2);
 	}
 
 	return (unshare_unmount_path(OP_MOUNT, argv[0], flags, B_TRUE));
 }
 
 static int
 find_command_idx(char *command, int *idx)
 {
 	int i;
 
 	for (i = 0; i < NCOMMAND; i++) {
 		if (command_table[i].name == NULL)
 			continue;
 
 		if (strcmp(command, command_table[i].name) == 0) {
 			*idx = i;
 			return (0);
 		}
 	}
 	return (1);
 }
 
 static int
 zfs_do_diff(int argc, char **argv)
 {
 	zfs_handle_t *zhp;
 	int flags = 0;
 	char *tosnap = NULL;
 	char *fromsnap = NULL;
 	char *atp, *copy;
 	int err = 0;
 	int c;
 
 	while ((c = getopt(argc, argv, "FHt")) != -1) {
 		switch (c) {
 		case 'F':
 			flags |= ZFS_DIFF_CLASSIFY;
 			break;
 		case 'H':
 			flags |= ZFS_DIFF_PARSEABLE;
 			break;
 		case 't':
 			flags |= ZFS_DIFF_TIMESTAMP;
 			break;
 		default:
 			(void) fprintf(stderr,
 			    gettext("invalid option '%c'\n"), optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 1) {
 		(void) fprintf(stderr,
 		    gettext("must provide at least one snapshot name\n"));
 		usage(B_FALSE);
 	}
 
 	if (argc > 2) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	fromsnap = argv[0];
 	tosnap = (argc == 2) ? argv[1] : NULL;
 
 	copy = NULL;
 	if (*fromsnap != '@')
 		copy = strdup(fromsnap);
 	else if (tosnap)
 		copy = strdup(tosnap);
 	if (copy == NULL)
 		usage(B_FALSE);
 
 	if ((atp = strchr(copy, '@')) != NULL)
 		*atp = '\0';
 
 	if ((zhp = zfs_open(g_zfs, copy, ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (1);
 
 	free(copy);
 
 	/*
 	 * Ignore SIGPIPE so that the library can give us
 	 * information on any failure
 	 */
 	(void) sigignore(SIGPIPE);
 
 	err = zfs_show_diffs(zhp, STDOUT_FILENO, fromsnap, tosnap, flags);
 
 	zfs_close(zhp);
 
 	return (err != 0);
 }
 
 /*
  * zfs remap <filesystem | volume>
  *
  * Remap the indirect blocks in the given fileystem or volume.
  */
 static int
 zfs_do_remap(int argc, char **argv)
 {
 	const char *fsname;
 	int err = 0;
 	int c;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "")) != -1) {
 		switch (c) {
 		case '?':
 			(void) fprintf(stderr,
 			    gettext("invalid option '%c'\n"), optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	if (argc != 2) {
 		(void) fprintf(stderr, gettext("wrong number of arguments\n"));
 		usage(B_FALSE);
 	}
 
 	fsname = argv[1];
 	err = zfs_remap_indirects(g_zfs, fsname);
 
 	return (err);
 }
 
 /*
  * zfs bookmark <fs@snap> <fs#bmark>
  *
  * Creates a bookmark with the given name from the given snapshot.
  */
 static int
 zfs_do_bookmark(int argc, char **argv)
 {
 	char snapname[ZFS_MAX_DATASET_NAME_LEN];
 	zfs_handle_t *zhp;
 	nvlist_t *nvl;
 	int ret = 0;
 	int c;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "")) != -1) {
 		switch (c) {
 		case '?':
 			(void) fprintf(stderr,
 			    gettext("invalid option '%c'\n"), optopt);
 			goto usage;
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
 		goto usage;
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing bookmark argument\n"));
 		goto usage;
 	}
 
 	if (strchr(argv[1], '#') == NULL) {
 		(void) fprintf(stderr,
 		    gettext("invalid bookmark name '%s' -- "
 		    "must contain a '#'\n"), argv[1]);
 		goto usage;
 	}
 
 	if (argv[0][0] == '@') {
 		/*
 		 * Snapshot name begins with @.
 		 * Default to same fs as bookmark.
 		 */
 		(void) strncpy(snapname, argv[1], sizeof (snapname));
 		*strchr(snapname, '#') = '\0';
 		(void) strlcat(snapname, argv[0], sizeof (snapname));
 	} else {
 		(void) strncpy(snapname, argv[0], sizeof (snapname));
 	}
 	zhp = zfs_open(g_zfs, snapname, ZFS_TYPE_SNAPSHOT);
 	if (zhp == NULL)
 		goto usage;
 	zfs_close(zhp);
 
 
 	nvl = fnvlist_alloc();
 	fnvlist_add_string(nvl, argv[1], snapname);
 	ret = lzc_bookmark(nvl, NULL);
 	fnvlist_free(nvl);
 
 	if (ret != 0) {
 		const char *err_msg;
 		char errbuf[1024];
 
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "cannot create bookmark '%s'"), argv[1]);
 
 		switch (ret) {
 		case EXDEV:
 			err_msg = "bookmark is in a different pool";
 			break;
 		case EEXIST:
 			err_msg = "bookmark exists";
 			break;
 		case EINVAL:
 			err_msg = "invalid argument";
 			break;
 		case ENOTSUP:
 			err_msg = "bookmark feature not enabled";
 			break;
 		case ENOSPC:
 			err_msg = "out of space";
 			break;
 		default:
 			err_msg = "unknown error";
 			break;
 		}
 		(void) fprintf(stderr, "%s: %s\n", errbuf,
 		    dgettext(TEXT_DOMAIN, err_msg));
 	}
 
 	return (ret != 0);
 
 usage:
 	usage(B_FALSE);
 	return (-1);
 }
 
 static int
 zfs_do_channel_program(int argc, char **argv)
 {
 	int ret, fd;
 	char c;
 	char *progbuf, *filename, *poolname;
 	size_t progsize, progread;
 	nvlist_t *outnvl;
 	uint64_t instrlimit = ZCP_DEFAULT_INSTRLIMIT;
 	uint64_t memlimit = ZCP_DEFAULT_MEMLIMIT;
 	boolean_t sync_flag = B_TRUE, json_output = B_FALSE;
 	zpool_handle_t *zhp;
 
 	/* check options */
 	while (-1 !=
 	    (c = getopt(argc, argv, "jnt:(instr-limit)m:(memory-limit)"))) {
 		switch (c) {
 		case 't':
 		case 'm': {
 			uint64_t arg;
 			char *endp;
 
 			errno = 0;
 			arg = strtoull(optarg, &endp, 0);
 			if (errno != 0 || *endp != '\0') {
 				(void) fprintf(stderr, gettext(
 				    "invalid argument "
 				    "'%s': expected integer\n"), optarg);
 				goto usage;
 			}
 
 			if (c == 't') {
 				if (arg > ZCP_MAX_INSTRLIMIT || arg == 0) {
 					(void) fprintf(stderr, gettext(
 					    "Invalid instruction limit: "
 					    "%s\n"), optarg);
 					return (1);
 				} else {
 					instrlimit = arg;
 				}
 			} else {
 				ASSERT3U(c, ==, 'm');
 				if (arg > ZCP_MAX_MEMLIMIT || arg == 0) {
 					(void) fprintf(stderr, gettext(
 					    "Invalid memory limit: "
 					    "%s\n"), optarg);
 					return (1);
 				} else {
 					memlimit = arg;
 				}
 			}
 			break;
 		}
 		case 'n': {
 			sync_flag = B_FALSE;
 			break;
 		}
 		case 'j': {
 			json_output = B_TRUE;
 			break;
 		}
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			goto usage;
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 2) {
 		(void) fprintf(stderr,
 		    gettext("invalid number of arguments\n"));
 		goto usage;
 	}
 
 	poolname = argv[0];
 	filename = argv[1];
 	if (strcmp(filename, "-") == 0) {
 		fd = 0;
 		filename = "standard input";
 	} else if ((fd = open(filename, O_RDONLY)) < 0) {
 		(void) fprintf(stderr, gettext("cannot open '%s': %s\n"),
 		    filename, strerror(errno));
 		return (1);
 	}
 
 	if ((zhp = zpool_open(g_zfs, poolname)) == NULL) {
 		(void) fprintf(stderr, gettext("cannot open pool '%s'"),
 		    poolname);
 		return (1);
 	}
 	zpool_close(zhp);
 
 	/*
 	 * Read in the channel program, expanding the program buffer as
 	 * necessary.
 	 */
 	progread = 0;
 	progsize = 1024;
 	progbuf = safe_malloc(progsize);
 	do {
 		ret = read(fd, progbuf + progread, progsize - progread);
 		progread += ret;
 		if (progread == progsize && ret > 0) {
 			progsize *= 2;
 			progbuf = safe_realloc(progbuf, progsize);
 		}
 	} while (ret > 0);
 
 	if (fd != 0)
 		(void) close(fd);
 	if (ret < 0) {
 		free(progbuf);
 		(void) fprintf(stderr,
 		    gettext("cannot read '%s': %s\n"),
 		    filename, strerror(errno));
 		return (1);
 	}
 	progbuf[progread] = '\0';
 
 	/*
 	 * Any remaining arguments are passed as arguments to the lua script as
 	 * a string array:
 	 * {
 	 *	"argv" -> [ "arg 1", ... "arg n" ],
 	 * }
 	 */
 	nvlist_t *argnvl = fnvlist_alloc();
 	fnvlist_add_string_array(argnvl, ZCP_ARG_CLIARGV, argv + 2, argc - 2);
 
 	if (sync_flag) {
 		ret = lzc_channel_program(poolname, progbuf,
 		    instrlimit, memlimit, argnvl, &outnvl);
 	} else {
 		ret = lzc_channel_program_nosync(poolname, progbuf,
 		    instrlimit, memlimit, argnvl, &outnvl);
 	}
 
 	if (ret != 0) {
 		/*
 		 * On error, report the error message handed back by lua if one
 		 * exists.  Otherwise, generate an appropriate error message,
 		 * falling back on strerror() for an unexpected return code.
 		 */
 		char *errstring = NULL;
 		if (nvlist_exists(outnvl, ZCP_RET_ERROR)) {
 			(void) nvlist_lookup_string(outnvl,
 			    ZCP_RET_ERROR, &errstring);
 			if (errstring == NULL)
 				errstring = strerror(ret);
 		} else {
 			switch (ret) {
 			case EINVAL:
 				errstring =
 				    "Invalid instruction or memory limit.";
 				break;
 			case ENOMEM:
 				errstring = "Return value too large.";
 				break;
 			case ENOSPC:
 				errstring = "Memory limit exhausted.";
 				break;
 #ifdef illumos
 			case ETIME:
 #else
 			case ETIMEDOUT:
 #endif
 				errstring = "Timed out.";
 				break;
 			case EPERM:
 				errstring = "Permission denied. Channel "
 				    "programs must be run as root.";
 				break;
 			default:
 				errstring = strerror(ret);
 			}
 		}
 		(void) fprintf(stderr,
 		    gettext("Channel program execution failed:\n%s\n"),
 		    errstring);
 	} else {
 		if (json_output) {
 			(void) nvlist_print_json(stdout, outnvl);
 		} else if (nvlist_empty(outnvl)) {
 			(void) fprintf(stdout, gettext("Channel program fully "
 			    "executed and did not produce output.\n"));
 		} else {
 			(void) fprintf(stdout, gettext("Channel program fully "
 			    "executed and produced output:\n"));
 			dump_nvlist(outnvl, 4);
 		}
 	}
 
 	free(progbuf);
 	fnvlist_free(outnvl);
 	fnvlist_free(argnvl);
 	return (ret != 0);
 
 usage:
 	usage(B_FALSE);
 	return (-1);
 }
 
 int
 main(int argc, char **argv)
 {
 	int ret = 0;
 	int i;
 	char *progname;
 	char *cmdname;
 
 	(void) setlocale(LC_ALL, "");
 	(void) textdomain(TEXT_DOMAIN);
 
 	opterr = 0;
 
 	if ((g_zfs = libzfs_init()) == NULL) {
 		(void) fprintf(stderr, gettext("internal error: failed to "
 		    "initialize ZFS library\n"));
 		return (1);
 	}
 
 	zfs_save_arguments(argc, argv, history_str, sizeof (history_str));
 
 	libzfs_print_on_error(g_zfs, B_TRUE);
 
 	if ((mnttab_file = fopen(MNTTAB, "r")) == NULL) {
 		(void) fprintf(stderr, gettext("internal error: unable to "
 		    "open %s\n"), MNTTAB);
 		return (1);
 	}
 
 	/*
 	 * This command also doubles as the /etc/fs mount and unmount program.
 	 * Determine if we should take this behavior based on argv[0].
 	 */
 	progname = basename(argv[0]);
 	if (strcmp(progname, "mount") == 0) {
 		ret = manual_mount(argc, argv);
 	} else if (strcmp(progname, "umount") == 0) {
 		ret = manual_unmount(argc, argv);
 	} else {
 		/*
 		 * Make sure the user has specified some command.
 		 */
 		if (argc < 2) {
 			(void) fprintf(stderr, gettext("missing command\n"));
 			usage(B_FALSE);
 		}
 
 		cmdname = argv[1];
 
 		/*
 		 * The 'umount' command is an alias for 'unmount'
 		 */
 		if (strcmp(cmdname, "umount") == 0)
 			cmdname = "unmount";
 
 		/*
 		 * The 'recv' command is an alias for 'receive'
 		 */
 		if (strcmp(cmdname, "recv") == 0)
 			cmdname = "receive";
 
 		/*
 		 * The 'snap' command is an alias for 'snapshot'
 		 */
 		if (strcmp(cmdname, "snap") == 0)
 			cmdname = "snapshot";
 
 		/*
 		 * Special case '-?'
 		 */
 		if (strcmp(cmdname, "-?") == 0)
 			usage(B_TRUE);
 
 		/*
 		 * Run the appropriate command.
 		 */
 		libzfs_mnttab_cache(g_zfs, B_TRUE);
 		if (find_command_idx(cmdname, &i) == 0) {
 			current_command = &command_table[i];
 			ret = command_table[i].func(argc - 1, argv + 1);
 		} else if (strchr(cmdname, '=') != NULL) {
 			verify(find_command_idx("set", &i) == 0);
 			current_command = &command_table[i];
 			ret = command_table[i].func(argc, argv);
 		} else {
 			(void) fprintf(stderr, gettext("unrecognized "
 			    "command '%s'\n"), cmdname);
 			usage(B_FALSE);
 		}
 		libzfs_mnttab_cache(g_zfs, B_FALSE);
 	}
 
 	(void) fclose(mnttab_file);
 
 	if (ret == 0 && log_history)
 		(void) zpool_log_history(g_zfs, history_str);
 
 	libzfs_fini(g_zfs);
 
 	/*
 	 * The 'ZFS_ABORT' environment variable causes us to dump core on exit
 	 * for the purposes of running ::findleaks.
 	 */
 	if (getenv("ZFS_ABORT") != NULL) {
 		(void) printf("dumping core by request\n");
 		abort();
 	}
 
 	return (ret);
 }
Index: projects/clang900-import/cddl/contrib/opensolaris/cmd/zfs
===================================================================
--- projects/clang900-import/cddl/contrib/opensolaris/cmd/zfs	(revision 352536)
+++ projects/clang900-import/cddl/contrib/opensolaris/cmd/zfs	(revision 352537)

Property changes on: projects/clang900-import/cddl/contrib/opensolaris/cmd/zfs
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/cddl/contrib/opensolaris/cmd/zfs:r351317-352536
Index: projects/clang900-import/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
===================================================================
--- projects/clang900-import/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h	(revision 352536)
+++ projects/clang900-import/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h	(revision 352537)
@@ -1,864 +1,864 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Pawel Jakub Dawidek. All rights reserved.
  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Nexenta Systems, Inc.
  * Copyright (c) 2017 Datto Inc.
  */
 
 #ifndef	_LIBZFS_H
 #define	_LIBZFS_H
 
 #include <assert.h>
 #include <libnvpair.h>
 #include <sys/mnttab.h>
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/varargs.h>
 #include <sys/fs/zfs.h>
 #include <sys/avl.h>
 #include <sys/zfs_ioctl.h>
 #include <libzfs_core.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 /*
  * Miscellaneous ZFS constants
  */
 #define	ZFS_MAXPROPLEN		MAXPATHLEN
 #define	ZPOOL_MAXPROPLEN	MAXPATHLEN
 
 /*
  * libzfs errors
  */
 typedef enum zfs_error {
 	EZFS_SUCCESS = 0,	/* no error -- success */
 	EZFS_NOMEM = 2000,	/* out of memory */
 	EZFS_BADPROP,		/* invalid property value */
 	EZFS_PROPREADONLY,	/* cannot set readonly property */
 	EZFS_PROPTYPE,		/* property does not apply to dataset type */
 	EZFS_PROPNONINHERIT,	/* property is not inheritable */
 	EZFS_PROPSPACE,		/* bad quota or reservation */
 	EZFS_BADTYPE,		/* dataset is not of appropriate type */
 	EZFS_BUSY,		/* pool or dataset is busy */
 	EZFS_EXISTS,		/* pool or dataset already exists */
 	EZFS_NOENT,		/* no such pool or dataset */
 	EZFS_BADSTREAM,		/* bad backup stream */
 	EZFS_DSREADONLY,	/* dataset is readonly */
 	EZFS_VOLTOOBIG,		/* volume is too large for 32-bit system */
 	EZFS_INVALIDNAME,	/* invalid dataset name */
 	EZFS_BADRESTORE,	/* unable to restore to destination */
 	EZFS_BADBACKUP,		/* backup failed */
 	EZFS_BADTARGET,		/* bad attach/detach/replace target */
 	EZFS_NODEVICE,		/* no such device in pool */
 	EZFS_BADDEV,		/* invalid device to add */
 	EZFS_NOREPLICAS,	/* no valid replicas */
 	EZFS_RESILVERING,	/* currently resilvering */
 	EZFS_BADVERSION,	/* unsupported version */
 	EZFS_POOLUNAVAIL,	/* pool is currently unavailable */
 	EZFS_DEVOVERFLOW,	/* too many devices in one vdev */
 	EZFS_BADPATH,		/* must be an absolute path */
 	EZFS_CROSSTARGET,	/* rename or clone across pool or dataset */
 	EZFS_ZONED,		/* used improperly in local zone */
 	EZFS_MOUNTFAILED,	/* failed to mount dataset */
 	EZFS_UMOUNTFAILED,	/* failed to unmount dataset */
 	EZFS_UNSHARENFSFAILED,	/* unshare(1M) failed */
 	EZFS_SHARENFSFAILED,	/* share(1M) failed */
 	EZFS_PERM,		/* permission denied */
 	EZFS_NOSPC,		/* out of space */
 	EZFS_FAULT,		/* bad address */
 	EZFS_IO,		/* I/O error */
 	EZFS_INTR,		/* signal received */
 	EZFS_ISSPARE,		/* device is a hot spare */
 	EZFS_INVALCONFIG,	/* invalid vdev configuration */
 	EZFS_RECURSIVE,		/* recursive dependency */
 	EZFS_NOHISTORY,		/* no history object */
 	EZFS_POOLPROPS,		/* couldn't retrieve pool props */
 	EZFS_POOL_NOTSUP,	/* ops not supported for this type of pool */
 	EZFS_POOL_INVALARG,	/* invalid argument for this pool operation */
 	EZFS_NAMETOOLONG,	/* dataset name is too long */
 	EZFS_OPENFAILED,	/* open of device failed */
 	EZFS_NOCAP,		/* couldn't get capacity */
 	EZFS_LABELFAILED,	/* write of label failed */
 	EZFS_BADWHO,		/* invalid permission who */
 	EZFS_BADPERM,		/* invalid permission */
 	EZFS_BADPERMSET,	/* invalid permission set name */
 	EZFS_NODELEGATION,	/* delegated administration is disabled */
 	EZFS_UNSHARESMBFAILED,	/* failed to unshare over smb */
 	EZFS_SHARESMBFAILED,	/* failed to share over smb */
 	EZFS_BADCACHE,		/* bad cache file */
 	EZFS_ISL2CACHE,		/* device is for the level 2 ARC */
 	EZFS_VDEVNOTSUP,	/* unsupported vdev type */
 	EZFS_NOTSUP,		/* ops not supported on this dataset */
 	EZFS_ACTIVE_SPARE,	/* pool has active shared spare devices */
 	EZFS_UNPLAYED_LOGS,	/* log device has unplayed logs */
 	EZFS_REFTAG_RELE,	/* snapshot release: tag not found */
 	EZFS_REFTAG_HOLD,	/* snapshot hold: tag already exists */
 	EZFS_TAGTOOLONG,	/* snapshot hold/rele: tag too long */
 	EZFS_PIPEFAILED,	/* pipe create failed */
 	EZFS_THREADCREATEFAILED, /* thread create failed */
 	EZFS_POSTSPLIT_ONLINE,	/* onlining a disk after splitting it */
 	EZFS_SCRUBBING,		/* currently scrubbing */
 	EZFS_NO_SCRUB,		/* no active scrub */
 	EZFS_DIFF,		/* general failure of zfs diff */
 	EZFS_DIFFDATA,		/* bad zfs diff data */
 	EZFS_POOLREADONLY,	/* pool is in read-only mode */
 	EZFS_SCRUB_PAUSED,	/* scrub currently paused */
 	EZFS_NO_PENDING,	/* cannot cancel, no operation is pending */
 	EZFS_CHECKPOINT_EXISTS,	/* checkpoint exists */
 	EZFS_DISCARDING_CHECKPOINT,	/* currently discarding a checkpoint */
 	EZFS_NO_CHECKPOINT,	/* pool has no checkpoint */
 	EZFS_DEVRM_IN_PROGRESS,	/* a device is currently being removed */
 	EZFS_VDEV_TOO_BIG,	/* a device is too big to be used */
 	EZFS_TOOMANY,		/* argument list too long */
 	EZFS_INITIALIZING,	/* currently initializing */
 	EZFS_NO_INITIALIZE,	/* no active initialize */
 	EZFS_UNKNOWN
 } zfs_error_t;
 
 /*
  * UEFI boot support parameters. When creating whole disk boot pool,
  * zpool create should allow to create EFI System partition for UEFI boot
  * program. In case of BIOS, the EFI System partition is not used
  * even if it does exist.
  */
 typedef enum zpool_boot_label {
 	ZPOOL_NO_BOOT_LABEL = 0,
 	ZPOOL_CREATE_BOOT_LABEL,
 	ZPOOL_COPY_BOOT_LABEL
 } zpool_boot_label_t;
 
 /*
  * The following data structures are all part
  * of the zfs_allow_t data structure which is
  * used for printing 'allow' permissions.
  * It is a linked list of zfs_allow_t's which
  * then contain avl tree's for user/group/sets/...
  * and each one of the entries in those trees have
  * avl tree's for the permissions they belong to and
  * whether they are local,descendent or local+descendent
  * permissions.  The AVL trees are used primarily for
  * sorting purposes, but also so that we can quickly find
  * a given user and or permission.
  */
 typedef struct zfs_perm_node {
 	avl_node_t z_node;
 	char z_pname[MAXPATHLEN];
 } zfs_perm_node_t;
 
 typedef struct zfs_allow_node {
 	avl_node_t z_node;
 	char z_key[MAXPATHLEN];		/* name, such as joe */
 	avl_tree_t z_localdescend;	/* local+descendent perms */
 	avl_tree_t z_local;		/* local permissions */
 	avl_tree_t z_descend;		/* descendent permissions */
 } zfs_allow_node_t;
 
 typedef struct zfs_allow {
 	struct zfs_allow *z_next;
 	char z_setpoint[MAXPATHLEN];
 	avl_tree_t z_sets;
 	avl_tree_t z_crperms;
 	avl_tree_t z_user;
 	avl_tree_t z_group;
 	avl_tree_t z_everyone;
 } zfs_allow_t;
 
 /*
  * Basic handle types
  */
 typedef struct zfs_handle zfs_handle_t;
 typedef struct zpool_handle zpool_handle_t;
 typedef struct libzfs_handle libzfs_handle_t;
 
 /*
  * Library initialization
  */
 extern libzfs_handle_t *libzfs_init(void);
 extern void libzfs_fini(libzfs_handle_t *);
 
 extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *);
 extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *);
 
 extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t);
 
 extern void zfs_save_arguments(int argc, char **, char *, int);
 extern int zpool_log_history(libzfs_handle_t *, const char *);
 
 extern int libzfs_errno(libzfs_handle_t *);
 extern const char *libzfs_error_action(libzfs_handle_t *);
 extern const char *libzfs_error_description(libzfs_handle_t *);
 extern int zfs_standard_error(libzfs_handle_t *, int, const char *);
 extern void libzfs_mnttab_init(libzfs_handle_t *);
 extern void libzfs_mnttab_fini(libzfs_handle_t *);
 extern void libzfs_mnttab_cache(libzfs_handle_t *, boolean_t);
 extern int libzfs_mnttab_find(libzfs_handle_t *, const char *,
     struct mnttab *);
 extern void libzfs_mnttab_add(libzfs_handle_t *, const char *,
     const char *, const char *);
 extern void libzfs_mnttab_remove(libzfs_handle_t *, const char *);
 
 /*
  * Basic handle functions
  */
 extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *);
 extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *);
 extern void zpool_close(zpool_handle_t *);
 extern const char *zpool_get_name(zpool_handle_t *);
 extern int zpool_get_state(zpool_handle_t *);
 extern const char *zpool_state_to_name(vdev_state_t, vdev_aux_t);
 extern const char *zpool_pool_state_to_name(pool_state_t);
 extern void zpool_free_handles(libzfs_handle_t *);
 extern int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t, const char *);
 
 /*
  * Iterate over all active pools in the system.
  */
 typedef int (*zpool_iter_f)(zpool_handle_t *, void *);
 extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *);
 extern boolean_t zpool_skip_pool(const char *);
 
 /*
  * Functions to create and destroy pools
  */
 extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
     nvlist_t *, nvlist_t *);
 extern int zpool_destroy(zpool_handle_t *, const char *);
 extern int zpool_add(zpool_handle_t *, nvlist_t *);
 
 typedef struct splitflags {
 	/* do not split, but return the config that would be split off */
 	int dryrun : 1;
 
 	/* after splitting, import the pool */
 	int import : 1;
 } splitflags_t;
 
 /*
  * Functions to manipulate pool and vdev state
  */
 extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
 extern int zpool_initialize(zpool_handle_t *, pool_initialize_func_t,
     nvlist_t *);
 extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
 extern int zpool_reguid(zpool_handle_t *);
 extern int zpool_reopen(zpool_handle_t *);
 
 extern int zpool_vdev_online(zpool_handle_t *, const char *, int,
     vdev_state_t *);
 extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);
 extern int zpool_vdev_attach(zpool_handle_t *, const char *,
     const char *, nvlist_t *, int);
 extern int zpool_vdev_detach(zpool_handle_t *, const char *);
 extern int zpool_vdev_remove(zpool_handle_t *, const char *);
 extern int zpool_vdev_remove_cancel(zpool_handle_t *);
 extern int zpool_vdev_indirect_size(zpool_handle_t *, const char *, uint64_t *);
 extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *,
     splitflags_t);
 
 extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t);
 extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t);
 extern int zpool_vdev_clear(zpool_handle_t *, uint64_t);
 
 extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
     boolean_t *, boolean_t *);
 extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
     boolean_t *, boolean_t *, boolean_t *);
 extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, const char *,
     zpool_boot_label_t, uint64_t, int *);
 
 /*
  * Functions to manage pool properties
  */
 extern int zpool_set_prop(zpool_handle_t *, const char *, const char *);
 extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *,
     size_t proplen, zprop_source_t *, boolean_t);
 extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t,
     zprop_source_t *);
 
 extern const char *zpool_prop_to_name(zpool_prop_t);
 extern const char *zpool_prop_values(zpool_prop_t);
 
 /*
  * Pool health statistics.
  */
 typedef enum {
 	/*
 	 * The following correspond to faults as defined in the (fault.fs.zfs.*)
 	 * event namespace.  Each is associated with a corresponding message ID.
 	 */
 	ZPOOL_STATUS_CORRUPT_CACHE,	/* corrupt /kernel/drv/zpool.cache */
 	ZPOOL_STATUS_MISSING_DEV_R,	/* missing device with replicas */
 	ZPOOL_STATUS_MISSING_DEV_NR,	/* missing device with no replicas */
 	ZPOOL_STATUS_CORRUPT_LABEL_R,	/* bad device label with replicas */
 	ZPOOL_STATUS_CORRUPT_LABEL_NR,	/* bad device label with no replicas */
 	ZPOOL_STATUS_BAD_GUID_SUM,	/* sum of device guids didn't match */
 	ZPOOL_STATUS_CORRUPT_POOL,	/* pool metadata is corrupted */
 	ZPOOL_STATUS_CORRUPT_DATA,	/* data errors in user (meta)data */
 	ZPOOL_STATUS_FAILING_DEV,	/* device experiencing errors */
 	ZPOOL_STATUS_VERSION_NEWER,	/* newer on-disk version */
 	ZPOOL_STATUS_HOSTID_MISMATCH,	/* last accessed by another system */
 	ZPOOL_STATUS_IO_FAILURE_WAIT,	/* failed I/O, failmode 'wait' */
 	ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
 	ZPOOL_STATUS_BAD_LOG,		/* cannot read log chain(s) */
 
 	/*
 	 * If the pool has unsupported features but can still be opened in
 	 * read-only mode, its status is ZPOOL_STATUS_UNSUP_FEAT_WRITE. If the
 	 * pool has unsupported features but cannot be opened at all, its
 	 * status is ZPOOL_STATUS_UNSUP_FEAT_READ.
 	 */
 	ZPOOL_STATUS_UNSUP_FEAT_READ,	/* unsupported features for read */
 	ZPOOL_STATUS_UNSUP_FEAT_WRITE,	/* unsupported features for write */
 
 	/*
 	 * These faults have no corresponding message ID.  At the time we are
 	 * checking the status, the original reason for the FMA fault (I/O or
 	 * checksum errors) has been lost.
 	 */
 	ZPOOL_STATUS_FAULTED_DEV_R,	/* faulted device with replicas */
 	ZPOOL_STATUS_FAULTED_DEV_NR,	/* faulted device with no replicas */
 
 	/*
 	 * The following are not faults per se, but still an error possibly
 	 * requiring administrative attention.  There is no corresponding
 	 * message ID.
 	 */
 	ZPOOL_STATUS_VERSION_OLDER,	/* older legacy on-disk version */
 	ZPOOL_STATUS_FEAT_DISABLED,	/* supported features are disabled */
 	ZPOOL_STATUS_RESILVERING,	/* device being resilvered */
 	ZPOOL_STATUS_OFFLINE_DEV,	/* device offline */
 	ZPOOL_STATUS_REMOVED_DEV,	/* removed device */
 	ZPOOL_STATUS_NON_NATIVE_ASHIFT,	/* (e.g. 512e dev with ashift of 9) */
 
 	/*
 	 * Finally, the following indicates a healthy pool.
 	 */
 	ZPOOL_STATUS_OK
 } zpool_status_t;
 
 extern zpool_status_t zpool_get_status(zpool_handle_t *, char **);
 extern zpool_status_t zpool_import_status(nvlist_t *, char **);
 extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh);
 
 /*
  * Statistics and configuration functions.
  */
 extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **);
 extern nvlist_t *zpool_get_features(zpool_handle_t *);
 extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *);
 extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **);
 extern boolean_t zpool_is_bootable(zpool_handle_t *);
 
 /*
  * Import and export functions
  */
 extern int zpool_export(zpool_handle_t *, boolean_t, const char *);
 extern int zpool_export_force(zpool_handle_t *, const char *);
 extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
     char *altroot);
 extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
     nvlist_t *, int);
 extern void zpool_print_unsup_feat(nvlist_t *config);
 
 /*
  * Search for pools to import
  */
 
 typedef struct importargs {
 	char **path;		/* a list of paths to search		*/
 	int paths;		/* number of paths to search		*/
 	char *poolname;		/* name of a pool to find		*/
 	uint64_t guid;		/* guid of a pool to find		*/
 	char *cachefile;	/* cachefile to use for import		*/
 	int can_be_active : 1;	/* can the pool be active?		*/
 	int unique : 1;		/* does 'poolname' already exist?	*/
 	int exists : 1;		/* set on return if pool already exists	*/
 	nvlist_t *policy;	/* load policy (max txg, rewind, etc.)	*/
 } importargs_t;
 
 extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *);
 
 /* legacy pool search routines */
 extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
 extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *,
     char *, uint64_t);
 
 /*
  * Miscellaneous pool functions
  */
 struct zfs_cmd;
 
 extern const char *zfs_history_event_names[];
 
 extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *,
     boolean_t verbose);
 extern int zpool_upgrade(zpool_handle_t *, uint64_t);
 extern int zpool_get_history(zpool_handle_t *, nvlist_t **);
 extern int zpool_history_unpack(char *, uint64_t, uint64_t *,
     nvlist_t ***, uint_t *);
 extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
     size_t len);
 extern int zfs_ioctl(libzfs_handle_t *, int request, struct zfs_cmd *);
 extern int zpool_get_physpath(zpool_handle_t *, char *, size_t);
 extern void zpool_explain_recover(libzfs_handle_t *, const char *, int,
     nvlist_t *);
 extern int zpool_checkpoint(zpool_handle_t *);
 extern int zpool_discard_checkpoint(zpool_handle_t *);
 
 /*
  * Basic handle manipulations.  These functions do not create or destroy the
  * underlying datasets, only the references to them.
  */
 extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int);
 extern zfs_handle_t *zfs_handle_dup(zfs_handle_t *);
 extern void zfs_close(zfs_handle_t *);
 extern zfs_type_t zfs_get_type(const zfs_handle_t *);
 extern const char *zfs_get_name(const zfs_handle_t *);
 extern zpool_handle_t *zfs_get_pool_handle(const zfs_handle_t *);
 extern const char *zfs_get_pool_name(const zfs_handle_t *);
 
 /*
  * Property management functions.  Some functions are shared with the kernel,
  * and are found in sys/fs/zfs.h.
  */
 
 /*
  * zfs dataset property management
  */
 extern const char *zfs_prop_default_string(zfs_prop_t);
 extern uint64_t zfs_prop_default_numeric(zfs_prop_t);
 extern const char *zfs_prop_column_name(zfs_prop_t);
 extern boolean_t zfs_prop_align_right(zfs_prop_t);
 
 extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t,
     nvlist_t *, uint64_t, zfs_handle_t *, zpool_handle_t *, const char *);
 
 extern const char *zfs_prop_to_name(zfs_prop_t);
 extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
 extern int zfs_prop_set_list(zfs_handle_t *, nvlist_t *);
 extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
     zprop_source_t *, char *, size_t, boolean_t);
 extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t,
     boolean_t);
 extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
     zprop_source_t *, char *, size_t);
 extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
     uint64_t *propvalue);
 extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
     char *propbuf, int proplen, boolean_t literal);
 extern int zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname,
     uint64_t *propvalue);
 extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname,
     char *propbuf, int proplen, boolean_t literal);
 extern int zfs_prop_get_feature(zfs_handle_t *zhp, const char *propname,
     char *buf, size_t len);
 extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
 extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t);
 extern const char *zfs_prop_values(zfs_prop_t);
 extern int zfs_prop_is_string(zfs_prop_t prop);
 extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
 extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
 extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *);
 
 
 typedef struct zprop_list {
 	int		pl_prop;
 	char		*pl_user_prop;
 	struct zprop_list *pl_next;
 	boolean_t	pl_all;
 	size_t		pl_width;
 	size_t		pl_recvd_width;
 	boolean_t	pl_fixed;
 } zprop_list_t;
 
 extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t,
     boolean_t);
 extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *);
 
 #define	ZFS_MOUNTPOINT_NONE	"none"
 #define	ZFS_MOUNTPOINT_LEGACY	"legacy"
 
 #define	ZFS_FEATURE_DISABLED	"disabled"
 #define	ZFS_FEATURE_ENABLED	"enabled"
 #define	ZFS_FEATURE_ACTIVE	"active"
 
 #define	ZFS_UNSUPPORTED_INACTIVE	"inactive"
 #define	ZFS_UNSUPPORTED_READONLY	"readonly"
 
 /*
  * zpool property management
  */
 extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **);
 extern int zpool_prop_get_feature(zpool_handle_t *, const char *, char *,
     size_t);
 extern const char *zpool_prop_default_string(zpool_prop_t);
 extern uint64_t zpool_prop_default_numeric(zpool_prop_t);
 extern const char *zpool_prop_column_name(zpool_prop_t);
 extern boolean_t zpool_prop_align_right(zpool_prop_t);
 
 /*
  * Functions shared by zfs and zpool property management.
  */
 extern int zprop_iter(zprop_func func, void *cb, boolean_t show_all,
     boolean_t ordered, zfs_type_t type);
 extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **,
     zfs_type_t);
 extern void zprop_free_list(zprop_list_t *);
 
 #define	ZFS_GET_NCOLS	5
 
 typedef enum {
 	GET_COL_NONE,
 	GET_COL_NAME,
 	GET_COL_PROPERTY,
 	GET_COL_VALUE,
 	GET_COL_RECVD,
 	GET_COL_SOURCE
 } zfs_get_column_t;
 
 /*
  * Functions for printing zfs or zpool properties
  */
 typedef struct zprop_get_cbdata {
 	int cb_sources;
 	zfs_get_column_t cb_columns[ZFS_GET_NCOLS];
 	int cb_colwidths[ZFS_GET_NCOLS + 1];
 	boolean_t cb_scripted;
 	boolean_t cb_literal;
 	boolean_t cb_first;
 	zprop_list_t *cb_proplist;
 	zfs_type_t cb_type;
 } zprop_get_cbdata_t;
 
 void zprop_print_one_property(const char *, zprop_get_cbdata_t *,
     const char *, const char *, zprop_source_t, const char *,
     const char *);
 
 /*
  * Iterator functions.
  */
 typedef int (*zfs_iter_f)(zfs_handle_t *, void *);
 extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *);
 extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *);
 extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *);
 extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *);
 extern int zfs_iter_snapshots(zfs_handle_t *, boolean_t, zfs_iter_f, void *);
 extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *);
 extern int zfs_iter_snapspec(zfs_handle_t *, const char *, zfs_iter_f, void *);
 extern int zfs_iter_bookmarks(zfs_handle_t *, zfs_iter_f, void *);
 
 typedef struct get_all_cb {
 	zfs_handle_t	**cb_handles;
 	size_t		cb_alloc;
 	size_t		cb_used;
 } get_all_cb_t;
 
 void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **, size_t,
     zfs_iter_f, void*, boolean_t);
 
 void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *);
 
 /*
  * Functions to create and destroy datasets.
  */
 extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
     nvlist_t *);
 extern int zfs_create_ancestors(libzfs_handle_t *, const char *);
 extern int zfs_destroy(zfs_handle_t *, boolean_t);
 extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t);
 extern int zfs_destroy_snaps_nvl(libzfs_handle_t *, nvlist_t *, boolean_t);
 extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
 extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
 extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps,
     nvlist_t *props);
 extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
 
 typedef struct renameflags {
 	/* recursive rename */
 	int recurse : 1;
 
 	/* don't unmount file systems */
 	int nounmount : 1;
 
 	/* force unmount file systems */
 	int forceunmount : 1;
 } renameflags_t;
 
 extern int zfs_rename(zfs_handle_t *, const char *, const char *,
     renameflags_t flags);
 
 typedef struct sendflags {
 	/* print informational messages (ie, -v was specified) */
 	boolean_t verbose;
 
 	/* recursive send  (ie, -R) */
 	boolean_t replicate;
 
 	/* for incrementals, do all intermediate snapshots */
 	boolean_t doall;
 
 	/* if dataset is a clone, do incremental from its origin */
 	boolean_t fromorigin;
 
 	/* do deduplication */
 	boolean_t dedup;
 
 	/* send properties (ie, -p) */
 	boolean_t props;
 
 	/* do not send (no-op, ie. -n) */
 	boolean_t dryrun;
 
 	/* parsable verbose output (ie. -P) */
 	boolean_t parsable;
 
 	/* show progress (ie. -v) */
 	boolean_t progress;
 
 	/* large blocks (>128K) are permitted */
 	boolean_t largeblock;
 
 	/* WRITE_EMBEDDED records of type DATA are permitted */
 	boolean_t embed_data;
 
 	/* compressed WRITE records are permitted */
 	boolean_t compress;
 
 	/* show progress as process title(ie. -V) */
 	boolean_t progressastitle;
 } sendflags_t;
 
 typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
 
 extern int zfs_send(zfs_handle_t *, const char *, const char *,
     sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **);
-extern int zfs_send_one(zfs_handle_t *, const char *, int, enum lzc_send_flags);
+extern int zfs_send_one(zfs_handle_t *, const char *, int, sendflags_t flags);
 extern int zfs_send_resume(libzfs_handle_t *, sendflags_t *, int outfd,
     const char *);
 extern nvlist_t *zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl,
     const char *token);
 
 extern int zfs_promote(zfs_handle_t *);
 extern int zfs_hold(zfs_handle_t *, const char *, const char *,
     boolean_t, int);
 extern int zfs_hold_nvl(zfs_handle_t *, int, nvlist_t *);
 extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
 extern int zfs_get_holds(zfs_handle_t *, nvlist_t **);
 extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);
 
 typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
     uid_t rid, uint64_t space);
 
 extern int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t,
     zfs_userspace_cb_t, void *);
 
 extern int zfs_get_fsacl(zfs_handle_t *, nvlist_t **);
 extern int zfs_set_fsacl(zfs_handle_t *, boolean_t, nvlist_t *);
 
 typedef struct recvflags {
 	/* print informational messages (ie, -v was specified) */
 	boolean_t verbose;
 
 	/* the destination is a prefix, not the exact fs (ie, -d) */
 	boolean_t isprefix;
 
 	/*
 	 * Only the tail of the sent snapshot path is appended to the
 	 * destination to determine the received snapshot name (ie, -e).
 	 */
 	boolean_t istail;
 
 	/* do not actually do the recv, just check if it would work (ie, -n) */
 	boolean_t dryrun;
 
 	/* rollback/destroy filesystems as necessary (eg, -F) */
 	boolean_t force;
 
 	/* set "canmount=off" on all modified filesystems */
 	boolean_t canmountoff;
 
 	/*
 	 * Mark the file systems as "resumable" and do not destroy them if the
 	 * receive is interrupted
 	 */
 	boolean_t resumable;
 
 	/* byteswap flag is used internally; callers need not specify */
 	boolean_t byteswap;
 
 	/* do not mount file systems as they are extracted (private) */
 	boolean_t nomount;
 } recvflags_t;
 
 extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *,
     recvflags_t *, int, avl_tree_t *);
 
 typedef enum diff_flags {
 	ZFS_DIFF_PARSEABLE = 0x1,
 	ZFS_DIFF_TIMESTAMP = 0x2,
 	ZFS_DIFF_CLASSIFY = 0x4
 } diff_flags_t;
 
 extern int zfs_show_diffs(zfs_handle_t *, int, const char *, const char *,
     int);
 
 /*
  * Miscellaneous functions.
  */
 extern const char *zfs_type_to_name(zfs_type_t);
 extern void zfs_refresh_properties(zfs_handle_t *);
 extern int zfs_name_valid(const char *, zfs_type_t);
 extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t);
 extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
     zfs_type_t);
 extern int zfs_spa_version(zfs_handle_t *, int *);
 extern boolean_t zfs_bookmark_exists(const char *path);
 
 /*
  * Mount support functions.
  */
 extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **);
 extern boolean_t zfs_is_mounted(zfs_handle_t *, char **);
 extern int zfs_mount(zfs_handle_t *, const char *, int);
 extern int zfs_unmount(zfs_handle_t *, const char *, int);
 extern int zfs_unmountall(zfs_handle_t *, int);
 
 /*
  * Share support functions.
  */
 extern boolean_t zfs_is_shared(zfs_handle_t *);
 extern int zfs_share(zfs_handle_t *);
 extern int zfs_unshare(zfs_handle_t *);
 
 /*
  * Protocol-specific share support functions.
  */
 extern boolean_t zfs_is_shared_nfs(zfs_handle_t *, char **);
 extern boolean_t zfs_is_shared_smb(zfs_handle_t *, char **);
 extern int zfs_share_nfs(zfs_handle_t *);
 extern int zfs_share_smb(zfs_handle_t *);
 extern int zfs_shareall(zfs_handle_t *);
 extern int zfs_unshare_nfs(zfs_handle_t *, const char *);
 extern int zfs_unshare_smb(zfs_handle_t *, const char *);
 extern int zfs_unshareall_nfs(zfs_handle_t *);
 extern int zfs_unshareall_smb(zfs_handle_t *);
 extern int zfs_unshareall_bypath(zfs_handle_t *, const char *);
 extern int zfs_unshareall(zfs_handle_t *);
 extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *,
     void *, void *, int, zfs_share_op_t);
 
 /*
  * FreeBSD-specific jail support function.
  */
 extern int zfs_jail(zfs_handle_t *, int, int);
 
 /*
  * When dealing with nvlists, verify() is extremely useful
  */
 #ifndef verify
 #ifdef NDEBUG
 #define	verify(EX)	((void)(EX))
 #else
 #define	verify(EX)	assert(EX)
 #endif
 #endif
 
 /*
  * Utility function to convert a number to a human-readable form.
  */
 extern void zfs_nicenum(uint64_t, char *, size_t);
 extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);
 
 /*
  * Given a device or file, determine if it is part of a pool.
  */
 extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
     boolean_t *);
 
 /*
  * Label manipulation.
  */
 extern int zpool_read_label(int, nvlist_t **);
 extern int zpool_read_all_labels(int, nvlist_t **);
 extern int zpool_clear_label(int);
 
 /* is this zvol valid for use as a dump device? */
 extern int zvol_check_dump_config(char *);
 
 /*
  * Management interfaces for SMB ACL files
  */
 
 int zfs_smb_acl_add(libzfs_handle_t *, char *, char *, char *);
 int zfs_smb_acl_remove(libzfs_handle_t *, char *, char *, char *);
 int zfs_smb_acl_purge(libzfs_handle_t *, char *, char *);
 int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *);
 
 /*
  * Enable and disable datasets within a pool by mounting/unmounting and
  * sharing/unsharing them.
  */
 extern int zpool_enable_datasets(zpool_handle_t *, const char *, int);
 extern int zpool_disable_datasets(zpool_handle_t *, boolean_t);
 
 /*
  * Mappings between vdev and FRU.
  */
 extern void libzfs_fru_refresh(libzfs_handle_t *);
 extern const char *libzfs_fru_lookup(libzfs_handle_t *, const char *);
 extern const char *libzfs_fru_devpath(libzfs_handle_t *, const char *);
 extern boolean_t libzfs_fru_compare(libzfs_handle_t *, const char *,
     const char *);
 extern boolean_t libzfs_fru_notself(libzfs_handle_t *, const char *);
 extern int zpool_fru_set(zpool_handle_t *, uint64_t, const char *);
 
 #ifndef illumos
 extern int zmount(const char *, const char *, int, char *, char *, int, char *,
     int);
 #endif
 extern int zfs_remap_indirects(libzfs_handle_t *hdl, const char *);
 
 /* Allow consumers to initialize libshare externally for optimal performance */
 extern int zfs_init_libshare_arg(libzfs_handle_t *, int, void *);
 /*
  * For most consumers, zfs_init_libshare_arg is sufficient on its own, and
  * zfs_uninit_libshare is unnecessary. zfs_uninit_libshare should only be called
  * if the caller has already initialized libshare for one set of zfs handles,
  * and wishes to share or unshare filesystems outside of that set. In that case,
  * the caller should uninitialize libshare, and then re-initialize it with the
  * new handles being shared or unshared.
  */
 extern void zfs_uninit_libshare(libzfs_handle_t *);
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _LIBZFS_H */
Index: projects/clang900-import/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
===================================================================
--- projects/clang900-import/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c	(revision 352536)
+++ projects/clang900-import/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c	(revision 352537)
@@ -1,3834 +1,3855 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright (c) 2012 Pawel Jakub Dawidek. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
  */
 
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
 #include <libintl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
 #include <stddef.h>
 #include <fcntl.h>
 #include <sys/param.h>
 #include <sys/mount.h>
 #include <pthread.h>
 #include <umem.h>
 #include <time.h>
 
 #include <libzfs.h>
 #include <libzfs_core.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "zfs_fletcher.h"
 #include "libzfs_impl.h"
 #include <zlib.h>
 #include <sha2.h>
 #include <sys/zio_checksum.h>
 #include <sys/ddt.h>
 
 #ifdef __FreeBSD__
 extern int zfs_ioctl_version;
 #endif
 
 /* in libzfs_dataset.c */
 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
 /* We need to use something for ENODATA. */
 #define	ENODATA	EIDRM
 
 static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
     recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int,
     uint64_t *, const char *);
 static int guid_to_name(libzfs_handle_t *, const char *,
     uint64_t, boolean_t, char *);
 
 static const zio_cksum_t zero_cksum = { 0 };
 
 typedef struct dedup_arg {
 	int	inputfd;
 	int	outputfd;
 	libzfs_handle_t  *dedup_hdl;
 } dedup_arg_t;
 
 typedef struct progress_arg {
 	zfs_handle_t *pa_zhp;
 	int pa_fd;
 	boolean_t pa_parsable;
 	boolean_t pa_astitle;
 	uint64_t pa_size;
 } progress_arg_t;
 
 typedef struct dataref {
 	uint64_t ref_guid;
 	uint64_t ref_object;
 	uint64_t ref_offset;
 } dataref_t;
 
 typedef struct dedup_entry {
 	struct dedup_entry	*dde_next;
 	zio_cksum_t dde_chksum;
 	uint64_t dde_prop;
 	dataref_t dde_ref;
 } dedup_entry_t;
 
 #define	MAX_DDT_PHYSMEM_PERCENT		20
 #define	SMALLEST_POSSIBLE_MAX_DDT_MB		128
 
 typedef struct dedup_table {
 	dedup_entry_t	**dedup_hash_array;
 	umem_cache_t	*ddecache;
 	uint64_t	max_ddt_size;  /* max dedup table size in bytes */
 	uint64_t	cur_ddt_size;  /* current dedup table size in bytes */
 	uint64_t	ddt_count;
 	int		numhashbits;
 	boolean_t	ddt_full;
 } dedup_table_t;
 
 static int
 high_order_bit(uint64_t n)
 {
 	int count;
 
 	for (count = 0; n != 0; count++)
 		n >>= 1;
 	return (count);
 }
 
 static size_t
 ssread(void *buf, size_t len, FILE *stream)
 {
 	size_t outlen;
 
 	if ((outlen = fread(buf, len, 1, stream)) == 0)
 		return (0);
 
 	return (outlen);
 }
 
 static void
 ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
     zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
 {
 	dedup_entry_t	*dde;
 
 	if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
 		if (ddt->ddt_full == B_FALSE) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "Dedup table full.  Deduplication will continue "
 			    "with existing table entries"));
 			ddt->ddt_full = B_TRUE;
 		}
 		return;
 	}
 
 	if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
 	    != NULL) {
 		assert(*ddepp == NULL);
 		dde->dde_next = NULL;
 		dde->dde_chksum = *cs;
 		dde->dde_prop = prop;
 		dde->dde_ref = *dr;
 		*ddepp = dde;
 		ddt->cur_ddt_size += sizeof (dedup_entry_t);
 		ddt->ddt_count++;
 	}
 }
 
 /*
  * Using the specified dedup table, do a lookup for an entry with
  * the checksum cs.  If found, return the block's reference info
  * in *dr. Otherwise, insert a new entry in the dedup table, using
  * the reference information specified by *dr.
  *
  * return value:  true - entry was found
  *		  false - entry was not found
  */
 static boolean_t
 ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
     uint64_t prop, dataref_t *dr)
 {
 	uint32_t hashcode;
 	dedup_entry_t **ddepp;
 
 	hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
 
 	for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
 	    ddepp = &((*ddepp)->dde_next)) {
 		if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
 		    (*ddepp)->dde_prop == prop) {
 			*dr = (*ddepp)->dde_ref;
 			return (B_TRUE);
 		}
 	}
 	ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
 	return (B_FALSE);
 }
 
 static int
 dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
     zio_cksum_t *zc, int outfd)
 {
 	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
 	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
 	(void) fletcher_4_incremental_native(drr,
 	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
 	if (drr->drr_type != DRR_BEGIN) {
 		ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
 		    drr_checksum.drr_checksum));
 		drr->drr_u.drr_checksum.drr_checksum = *zc;
 	}
 	(void) fletcher_4_incremental_native(
 	    &drr->drr_u.drr_checksum.drr_checksum, sizeof (zio_cksum_t), zc);
 	if (write(outfd, drr, sizeof (*drr)) == -1)
 		return (errno);
 	if (payload_len != 0) {
 		(void) fletcher_4_incremental_native(payload, payload_len, zc);
 		if (write(outfd, payload, payload_len) == -1)
 			return (errno);
 	}
 	return (0);
 }
 
 /*
  * This function is started in a separate thread when the dedup option
  * has been requested.  The main send thread determines the list of
  * snapshots to be included in the send stream and makes the ioctl calls
  * for each one.  But instead of having the ioctl send the output to the
  * the output fd specified by the caller of zfs_send()), the
  * ioctl is told to direct the output to a pipe, which is read by the
  * alternate thread running THIS function.  This function does the
  * dedup'ing by:
  *  1. building a dedup table (the DDT)
  *  2. doing checksums on each data block and inserting a record in the DDT
  *  3. looking for matching checksums, and
  *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
  *      a duplicate block is found.
  * The output of this function then goes to the output fd requested
  * by the caller of zfs_send().
  */
 static void *
 cksummer(void *arg)
 {
 	dedup_arg_t *dda = arg;
 	char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
 	dmu_replay_record_t thedrr;
 	dmu_replay_record_t *drr = &thedrr;
 	FILE *ofp;
 	int outfd;
 	dedup_table_t ddt;
 	zio_cksum_t stream_cksum;
 	uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
 	uint64_t numbuckets;
 
 	ddt.max_ddt_size =
 	    MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100,
 	    SMALLEST_POSSIBLE_MAX_DDT_MB << 20);
 
 	numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t));
 
 	/*
 	 * numbuckets must be a power of 2.  Increase number to
 	 * a power of 2 if necessary.
 	 */
 	if (!ISP2(numbuckets))
 		numbuckets = 1 << high_order_bit(numbuckets);
 
 	ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
 	ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
 	    NULL, NULL, NULL, NULL, NULL, 0);
 	ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
 	ddt.numhashbits = high_order_bit(numbuckets) - 1;
 	ddt.ddt_full = B_FALSE;
 
 	outfd = dda->outputfd;
 	ofp = fdopen(dda->inputfd, "r");
 	while (ssread(drr, sizeof (*drr), ofp) != 0) {
 
 		/*
 		 * kernel filled in checksum, we are going to write same
 		 * record, but need to regenerate checksum.
 		 */
 		if (drr->drr_type != DRR_BEGIN) {
 			bzero(&drr->drr_u.drr_checksum.drr_checksum,
 			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
 		}
 
 		switch (drr->drr_type) {
 		case DRR_BEGIN:
 		{
 			struct drr_begin *drrb = &drr->drr_u.drr_begin;
 			int fflags;
 			int sz = 0;
 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
 
 			ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
 
 			/* set the DEDUP feature flag for this stream */
 			fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
 			fflags |= (DMU_BACKUP_FEATURE_DEDUP |
 			    DMU_BACKUP_FEATURE_DEDUPPROPS);
 			DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
 
 			if (drr->drr_payloadlen != 0) {
 				sz = drr->drr_payloadlen;
 
 				if (sz > SPA_MAXBLOCKSIZE) {
 					buf = zfs_realloc(dda->dedup_hdl, buf,
 					    SPA_MAXBLOCKSIZE, sz);
 				}
 				(void) ssread(buf, sz, ofp);
 				if (ferror(stdin))
 					perror("fread");
 			}
 			if (dump_record(drr, buf, sz, &stream_cksum,
 			    outfd) != 0)
 				goto out;
 			break;
 		}
 
 		case DRR_END:
 		{
 			struct drr_end *drre = &drr->drr_u.drr_end;
 			/* use the recalculated checksum */
 			drre->drr_checksum = stream_cksum;
 			if (dump_record(drr, NULL, 0, &stream_cksum,
 			    outfd) != 0)
 				goto out;
 			break;
 		}
 
 		case DRR_OBJECT:
 		{
 			struct drr_object *drro = &drr->drr_u.drr_object;
 			if (drro->drr_bonuslen > 0) {
 				(void) ssread(buf,
 				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
 				    ofp);
 			}
 			if (dump_record(drr, buf,
 			    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
 			    &stream_cksum, outfd) != 0)
 				goto out;
 			break;
 		}
 
 		case DRR_SPILL:
 		{
 			struct drr_spill *drrs = &drr->drr_u.drr_spill;
 			(void) ssread(buf, drrs->drr_length, ofp);
 			if (dump_record(drr, buf, drrs->drr_length,
 			    &stream_cksum, outfd) != 0)
 				goto out;
 			break;
 		}
 
 		case DRR_FREEOBJECTS:
 		{
 			if (dump_record(drr, NULL, 0, &stream_cksum,
 			    outfd) != 0)
 				goto out;
 			break;
 		}
 
 		case DRR_WRITE:
 		{
 			struct drr_write *drrw = &drr->drr_u.drr_write;
 			dataref_t	dataref;
 			uint64_t	payload_size;
 
 			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
 			(void) ssread(buf, payload_size, ofp);
 
 			/*
 			 * Use the existing checksum if it's dedup-capable,
 			 * else calculate a SHA256 checksum for it.
 			 */
 
 			if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
 			    zero_cksum) ||
 			    !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
 				SHA256_CTX	ctx;
 				zio_cksum_t	tmpsha256;
 
 				SHA256Init(&ctx);
 				SHA256Update(&ctx, buf, payload_size);
 				SHA256Final(&tmpsha256, &ctx);
 				drrw->drr_key.ddk_cksum.zc_word[0] =
 				    BE_64(tmpsha256.zc_word[0]);
 				drrw->drr_key.ddk_cksum.zc_word[1] =
 				    BE_64(tmpsha256.zc_word[1]);
 				drrw->drr_key.ddk_cksum.zc_word[2] =
 				    BE_64(tmpsha256.zc_word[2]);
 				drrw->drr_key.ddk_cksum.zc_word[3] =
 				    BE_64(tmpsha256.zc_word[3]);
 				drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
 				drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
 			}
 
 			dataref.ref_guid = drrw->drr_toguid;
 			dataref.ref_object = drrw->drr_object;
 			dataref.ref_offset = drrw->drr_offset;
 
 			if (ddt_update(dda->dedup_hdl, &ddt,
 			    &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
 			    &dataref)) {
 				dmu_replay_record_t wbr_drr = {0};
 				struct drr_write_byref *wbr_drrr =
 				    &wbr_drr.drr_u.drr_write_byref;
 
 				/* block already present in stream */
 				wbr_drr.drr_type = DRR_WRITE_BYREF;
 
 				wbr_drrr->drr_object = drrw->drr_object;
 				wbr_drrr->drr_offset = drrw->drr_offset;
 				wbr_drrr->drr_length = drrw->drr_logical_size;
 				wbr_drrr->drr_toguid = drrw->drr_toguid;
 				wbr_drrr->drr_refguid = dataref.ref_guid;
 				wbr_drrr->drr_refobject =
 				    dataref.ref_object;
 				wbr_drrr->drr_refoffset =
 				    dataref.ref_offset;
 
 				wbr_drrr->drr_checksumtype =
 				    drrw->drr_checksumtype;
 				wbr_drrr->drr_checksumflags =
 				    drrw->drr_checksumtype;
 				wbr_drrr->drr_key.ddk_cksum =
 				    drrw->drr_key.ddk_cksum;
 				wbr_drrr->drr_key.ddk_prop =
 				    drrw->drr_key.ddk_prop;
 
 				if (dump_record(&wbr_drr, NULL, 0,
 				    &stream_cksum, outfd) != 0)
 					goto out;
 			} else {
 				/* block not previously seen */
 				if (dump_record(drr, buf, payload_size,
 				    &stream_cksum, outfd) != 0)
 					goto out;
 			}
 			break;
 		}
 
 		case DRR_WRITE_EMBEDDED:
 		{
 			struct drr_write_embedded *drrwe =
 			    &drr->drr_u.drr_write_embedded;
 			(void) ssread(buf,
 			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
 			if (dump_record(drr, buf,
 			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
 			    &stream_cksum, outfd) != 0)
 				goto out;
 			break;
 		}
 
 		case DRR_FREE:
 		{
 			if (dump_record(drr, NULL, 0, &stream_cksum,
 			    outfd) != 0)
 				goto out;
 			break;
 		}
 
 		default:
 			(void) fprintf(stderr, "INVALID record type 0x%x\n",
 			    drr->drr_type);
 			/* should never happen, so assert */
 			assert(B_FALSE);
 		}
 	}
 out:
 	umem_cache_destroy(ddt.ddecache);
 	free(ddt.dedup_hash_array);
 	free(buf);
 	(void) fclose(ofp);
 
 	return (NULL);
 }
 
 /*
  * Routines for dealing with the AVL tree of fs-nvlists
  */
 typedef struct fsavl_node {
 	avl_node_t fn_node;
 	nvlist_t *fn_nvfs;
 	char *fn_snapname;
 	uint64_t fn_guid;
 } fsavl_node_t;
 
 static int
 fsavl_compare(const void *arg1, const void *arg2)
 {
 	const fsavl_node_t *fn1 = (const fsavl_node_t *)arg1;
 	const fsavl_node_t *fn2 = (const fsavl_node_t *)arg2;
 
 	return (AVL_CMP(fn1->fn_guid, fn2->fn_guid));
 }
 
 /*
  * Given the GUID of a snapshot, find its containing filesystem and
  * (optionally) name.
  */
 static nvlist_t *
 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
 {
 	fsavl_node_t fn_find;
 	fsavl_node_t *fn;
 
 	fn_find.fn_guid = snapguid;
 
 	fn = avl_find(avl, &fn_find, NULL);
 	if (fn) {
 		if (snapname)
 			*snapname = fn->fn_snapname;
 		return (fn->fn_nvfs);
 	}
 	return (NULL);
 }
 
 static void
 fsavl_destroy(avl_tree_t *avl)
 {
 	fsavl_node_t *fn;
 	void *cookie;
 
 	if (avl == NULL)
 		return;
 
 	cookie = NULL;
 	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
 		free(fn);
 	avl_destroy(avl);
 	free(avl);
 }
 
 /*
  * Given an nvlist, produce an avl tree of snapshots, ordered by guid
  */
 static avl_tree_t *
 fsavl_create(nvlist_t *fss)
 {
 	avl_tree_t *fsavl;
 	nvpair_t *fselem = NULL;
 
 	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
 		return (NULL);
 
 	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
 	    offsetof(fsavl_node_t, fn_node));
 
 	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
 		nvlist_t *nvfs, *snaps;
 		nvpair_t *snapelem = NULL;
 
 		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
 		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
 
 		while ((snapelem =
 		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
 			fsavl_node_t *fn;
 			uint64_t guid;
 
 			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
 			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
 				fsavl_destroy(fsavl);
 				return (NULL);
 			}
 			fn->fn_nvfs = nvfs;
 			fn->fn_snapname = nvpair_name(snapelem);
 			fn->fn_guid = guid;
 
 			/*
 			 * Note: if there are multiple snaps with the
 			 * same GUID, we ignore all but one.
 			 */
 			if (avl_find(fsavl, fn, NULL) == NULL)
 				avl_add(fsavl, fn);
 			else
 				free(fn);
 		}
 	}
 
 	return (fsavl);
 }
 
 /*
  * Routines for dealing with the giant nvlist of fs-nvlists, etc.
  */
 typedef struct send_data {
 	/*
 	 * assigned inside every recursive call,
 	 * restored from *_save on return:
 	 *
 	 * guid of fromsnap snapshot in parent dataset
 	 * txg of fromsnap snapshot in current dataset
 	 * txg of tosnap snapshot in current dataset
 	 */
 
 	uint64_t parent_fromsnap_guid;
 	uint64_t fromsnap_txg;
 	uint64_t tosnap_txg;
 
 	/* the nvlists get accumulated during depth-first traversal */
 	nvlist_t *parent_snaps;
 	nvlist_t *fss;
 	nvlist_t *snapprops;
 
 	/* send-receive configuration, does not change during traversal */
 	const char *fsname;
 	const char *fromsnap;
 	const char *tosnap;
 	boolean_t recursive;
 	boolean_t verbose;
 
 	/*
 	 * The header nvlist is of the following format:
 	 * {
 	 *   "tosnap" -> string
 	 *   "fromsnap" -> string (if incremental)
 	 *   "fss" -> {
 	 *	id -> {
 	 *
 	 *	 "name" -> string (full name; for debugging)
 	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
 	 *
 	 *	 "props" -> { name -> value (only if set here) }
 	 *	 "snaps" -> { name (lastname) -> number (guid) }
 	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
 	 *
 	 *	 "origin" -> number (guid) (if clone)
 	 *	 "sent" -> boolean (not on-disk)
 	 *	}
 	 *   }
 	 * }
 	 *
 	 */
 } send_data_t;
 
 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
 
 static int
 send_iterate_snap(zfs_handle_t *zhp, void *arg)
 {
 	send_data_t *sd = arg;
 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
 	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
 	char *snapname;
 	nvlist_t *nv;
 
 	snapname = strrchr(zhp->zfs_name, '@')+1;
 
 	if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
 		if (sd->verbose) {
 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 			    "skipping snapshot %s because it was created "
 			    "after the destination snapshot (%s)\n"),
 			    zhp->zfs_name, sd->tosnap);
 		}
 		zfs_close(zhp);
 		return (0);
 	}
 
 	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
 	/*
 	 * NB: if there is no fromsnap here (it's a newly created fs in
 	 * an incremental replication), we will substitute the tosnap.
 	 */
 	if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
 	    (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
 	    strcmp(snapname, sd->tosnap) == 0)) {
 		sd->parent_fromsnap_guid = guid;
 	}
 
 	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
 	send_iterate_prop(zhp, nv);
 	VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
 	nvlist_free(nv);
 
 	zfs_close(zhp);
 	return (0);
 }
 
 static void
 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
 {
 	nvpair_t *elem = NULL;
 
 	while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
 		char *propname = nvpair_name(elem);
 		zfs_prop_t prop = zfs_name_to_prop(propname);
 		nvlist_t *propnv;
 
 		if (!zfs_prop_user(propname)) {
 			/*
 			 * Realistically, this should never happen.  However,
 			 * we want the ability to add DSL properties without
 			 * needing to make incompatible version changes.  We
 			 * need to ignore unknown properties to allow older
 			 * software to still send datasets containing these
 			 * properties, with the unknown properties elided.
 			 */
 			if (prop == ZPROP_INVAL)
 				continue;
 
 			if (zfs_prop_readonly(prop))
 				continue;
 		}
 
 		verify(nvpair_value_nvlist(elem, &propnv) == 0);
 		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
 		    prop == ZFS_PROP_REFQUOTA ||
 		    prop == ZFS_PROP_REFRESERVATION) {
 			char *source;
 			uint64_t value;
 			verify(nvlist_lookup_uint64(propnv,
 			    ZPROP_VALUE, &value) == 0);
 			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
 				continue;
 			/*
 			 * May have no source before SPA_VERSION_RECVD_PROPS,
 			 * but is still modifiable.
 			 */
 			if (nvlist_lookup_string(propnv,
 			    ZPROP_SOURCE, &source) == 0) {
 				if ((strcmp(source, zhp->zfs_name) != 0) &&
 				    (strcmp(source,
 				    ZPROP_SOURCE_VAL_RECVD) != 0))
 					continue;
 			}
 		} else {
 			char *source;
 			if (nvlist_lookup_string(propnv,
 			    ZPROP_SOURCE, &source) != 0)
 				continue;
 			if ((strcmp(source, zhp->zfs_name) != 0) &&
 			    (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
 				continue;
 		}
 
 		if (zfs_prop_user(propname) ||
 		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
 			char *value;
 			verify(nvlist_lookup_string(propnv,
 			    ZPROP_VALUE, &value) == 0);
 			VERIFY(0 == nvlist_add_string(nv, propname, value));
 		} else {
 			uint64_t value;
 			verify(nvlist_lookup_uint64(propnv,
 			    ZPROP_VALUE, &value) == 0);
 			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
 		}
 	}
 }
 
 /*
  * returns snapshot creation txg
  * and returns 0 if the snapshot does not exist
  */
 static uint64_t
 get_snap_txg(libzfs_handle_t *hdl, const char *fs, const char *snap)
 {
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 	uint64_t txg = 0;
 
 	if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
 		return (txg);
 
 	(void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
 	if (zfs_dataset_exists(hdl, name, ZFS_TYPE_SNAPSHOT)) {
 		zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
 		if (zhp != NULL) {
 			txg = zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG);
 			zfs_close(zhp);
 		}
 	}
 
 	return (txg);
 }
 
 /*
  * recursively generate nvlists describing datasets.  See comment
  * for the data structure send_data_t above for description of contents
  * of the nvlist.
  */
 static int
 send_iterate_fs(zfs_handle_t *zhp, void *arg)
 {
 	send_data_t *sd = arg;
 	nvlist_t *nvfs, *nv;
 	int rv = 0;
 	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
 	uint64_t fromsnap_txg_save = sd->fromsnap_txg;
 	uint64_t tosnap_txg_save = sd->tosnap_txg;
 	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
 	uint64_t fromsnap_txg, tosnap_txg;
 	char guidstring[64];
 
 	fromsnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->fromsnap);
 	if (fromsnap_txg != 0)
 		sd->fromsnap_txg = fromsnap_txg;
 
 	tosnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->tosnap);
 	if (tosnap_txg != 0)
 		sd->tosnap_txg = tosnap_txg;
 
 	/*
 	 * on the send side, if the current dataset does not have tosnap,
 	 * perform two additional checks:
 	 *
 	 * - skip sending the current dataset if it was created later than
 	 *   the parent tosnap
 	 * - return error if the current dataset was created earlier than
 	 *   the parent tosnap
 	 */
 	if (sd->tosnap != NULL && tosnap_txg == 0) {
 		if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
 			if (sd->verbose) {
 				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 				    "skipping dataset %s: snapshot %s does "
 				    "not exist\n"), zhp->zfs_name, sd->tosnap);
 			}
 		} else {
 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 			    "cannot send %s@%s%s: snapshot %s@%s does not "
 			    "exist\n"), sd->fsname, sd->tosnap, sd->recursive ?
 			    dgettext(TEXT_DOMAIN, " recursively") : "",
 			    zhp->zfs_name, sd->tosnap);
 			rv = -1;
 		}
 		goto out;
 	}
 
 	VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
 	VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
 	VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
 	    sd->parent_fromsnap_guid));
 
 	if (zhp->zfs_dmustats.dds_origin[0]) {
 		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
 		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
 		if (origin == NULL) {
 			rv = -1;
 			goto out;
 		}
 		VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
 		    origin->zfs_dmustats.dds_guid));
 	}
 
 	/* iterate over props */
 	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
 	send_iterate_prop(zhp, nv);
 	VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
 	nvlist_free(nv);
 
 	/* iterate over snaps, and set sd->parent_fromsnap_guid */
 	sd->parent_fromsnap_guid = 0;
 	VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
 	VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
 	(void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd);
 	VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
 	VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
 	nvlist_free(sd->parent_snaps);
 	nvlist_free(sd->snapprops);
 
 	/* add this fs to nvlist */
 	(void) snprintf(guidstring, sizeof (guidstring),
 	    "0x%llx", (longlong_t)guid);
 	VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
 	nvlist_free(nvfs);
 
 	/* iterate over children */
 	if (sd->recursive)
 		rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
 
 out:
 	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
 	sd->fromsnap_txg = fromsnap_txg_save;
 	sd->tosnap_txg = tosnap_txg_save;
 
 	zfs_close(zhp);
 	return (rv);
 }
 
 static int
 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
     const char *tosnap, boolean_t recursive, boolean_t verbose,
     nvlist_t **nvlp, avl_tree_t **avlp)
 {
 	zfs_handle_t *zhp;
 	send_data_t sd = { 0 };
 	int error;
 
 	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 	if (zhp == NULL)
 		return (EZFS_BADTYPE);
 
 	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
 	sd.fsname = fsname;
 	sd.fromsnap = fromsnap;
 	sd.tosnap = tosnap;
 	sd.recursive = recursive;
 	sd.verbose = verbose;
 
 	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
 		nvlist_free(sd.fss);
 		if (avlp != NULL)
 			*avlp = NULL;
 		*nvlp = NULL;
 		return (error);
 	}
 
 	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
 		nvlist_free(sd.fss);
 		*nvlp = NULL;
 		return (EZFS_NOMEM);
 	}
 
 	*nvlp = sd.fss;
 	return (0);
 }
 
 /*
  * Routines specific to "zfs send"
  */
 typedef struct send_dump_data {
 	/* these are all just the short snapname (the part after the @) */
 	const char *fromsnap;
 	const char *tosnap;
 	char prevsnap[ZFS_MAX_DATASET_NAME_LEN];
 	uint64_t prevsnap_obj;
 	boolean_t seenfrom, seento, replicate, doall, fromorigin;
 	boolean_t verbose, dryrun, parsable, progress, embed_data, std_out;
 	boolean_t progressastitle;
 	boolean_t large_block, compress;
 	int outfd;
 	boolean_t err;
 	nvlist_t *fss;
 	nvlist_t *snapholds;
 	avl_tree_t *fsavl;
 	snapfilter_cb_t *filter_cb;
 	void *filter_cb_arg;
 	nvlist_t *debugnv;
 	char holdtag[ZFS_MAX_DATASET_NAME_LEN];
 	int cleanup_fd;
 	uint64_t size;
 } send_dump_data_t;
 
 static int
 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
     boolean_t fromorigin, enum lzc_send_flags flags, uint64_t *sizep)
 {
 	zfs_cmd_t zc = { 0 };
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 	assert(fromsnap_obj == 0 || !fromorigin);
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	zc.zc_obj = fromorigin;
 	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
 	zc.zc_fromobj = fromsnap_obj;
 	zc.zc_guid = 1;  /* estimate flag */
 	zc.zc_flags = flags;
 
 	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
 		char errbuf[1024];
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "warning: cannot estimate space for '%s'"), zhp->zfs_name);
 
 		switch (errno) {
 		case EXDEV:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "not an earlier snapshot from the same fs"));
 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
 
 		case ENOENT:
 			if (zfs_dataset_exists(hdl, zc.zc_name,
 			    ZFS_TYPE_SNAPSHOT)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "incremental source (@%s) does not exist"),
 				    zc.zc_value);
 			}
 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 
 		case EDQUOT:
 		case EFBIG:
 		case EIO:
 		case ENOLINK:
 		case ENOSPC:
 		case ENXIO:
 		case EPIPE:
 		case ERANGE:
 		case EFAULT:
 		case EROFS:
 			zfs_error_aux(hdl, strerror(errno));
 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 
 		default:
 			return (zfs_standard_error(hdl, errno, errbuf));
 		}
 	}
 
 	*sizep = zc.zc_objset_type;
 
 	return (0);
 }
 
 /*
  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
  * NULL) to the file descriptor specified by outfd.
  */
 static int
 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
     boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
     nvlist_t *debugnv)
 {
 	zfs_cmd_t zc = { 0 };
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	nvlist_t *thisdbg;
 
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 	assert(fromsnap_obj == 0 || !fromorigin);
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	zc.zc_cookie = outfd;
 	zc.zc_obj = fromorigin;
 	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
 	zc.zc_fromobj = fromsnap_obj;
 	zc.zc_flags = flags;
 
 	VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
 	if (fromsnap && fromsnap[0] != '\0') {
 		VERIFY(0 == nvlist_add_string(thisdbg,
 		    "fromsnap", fromsnap));
 	}
 
 	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
 		char errbuf[1024];
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "warning: cannot send '%s'"), zhp->zfs_name);
 
 		VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
 		if (debugnv) {
 			VERIFY(0 == nvlist_add_nvlist(debugnv,
 			    zhp->zfs_name, thisdbg));
 		}
 		nvlist_free(thisdbg);
 
 		switch (errno) {
 		case EXDEV:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "not an earlier snapshot from the same fs"));
 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
 
 		case ENOENT:
 			if (zfs_dataset_exists(hdl, zc.zc_name,
 			    ZFS_TYPE_SNAPSHOT)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "incremental source (@%s) does not exist"),
 				    zc.zc_value);
 			}
 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 
 		case EDQUOT:
 		case EFBIG:
 		case EIO:
 		case ENOLINK:
 		case ENOSPC:
 #ifdef illumos
 		case ENOSTR:
 #endif
 		case ENXIO:
 		case EPIPE:
 		case ERANGE:
 		case EFAULT:
 		case EROFS:
 			zfs_error_aux(hdl, strerror(errno));
 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 
 		default:
 			return (zfs_standard_error(hdl, errno, errbuf));
 		}
 	}
 
 	if (debugnv)
 		VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
 	nvlist_free(thisdbg);
 
 	return (0);
 }
 
 static void
 gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
 {
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 
 	/*
 	 * zfs_send() only sets snapholds for sends that need them,
 	 * e.g. replication and doall.
 	 */
 	if (sdd->snapholds == NULL)
 		return;
 
 	fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
 }
 
 static void *
 send_progress_thread(void *arg)
 {
 	progress_arg_t *pa = arg;
 	zfs_cmd_t zc = { 0 };
 	zfs_handle_t *zhp = pa->pa_zhp;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	unsigned long long bytes, total;
 	char buf[16];
 	time_t t;
 	struct tm *tm;
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	if (!pa->pa_parsable && !pa->pa_astitle)
 		(void) fprintf(stderr, "TIME        SENT   SNAPSHOT\n");
 
 	/*
 	 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
 	 */
 	for (;;) {
 		(void) sleep(1);
 
 		zc.zc_cookie = pa->pa_fd;
 		if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
 			return ((void *)-1);
 
 		(void) time(&t);
 		tm = localtime(&t);
 		bytes = zc.zc_cookie;
 
 		if (pa->pa_astitle) {
 			int pct;
 			if (pa->pa_size > bytes)
 				pct = 100 * bytes / pa->pa_size;
 			else
 				pct = 100;
 
 			setproctitle("sending %s (%d%%: %llu/%llu)",
 			    zhp->zfs_name, pct, bytes, pa->pa_size);
 		} else if (pa->pa_parsable) {
 			(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
 			    tm->tm_hour, tm->tm_min, tm->tm_sec,
 			    bytes, zhp->zfs_name);
 		} else {
 			zfs_nicenum(bytes, buf, sizeof (buf));
 			(void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
 			    tm->tm_hour, tm->tm_min, tm->tm_sec,
 			    buf, zhp->zfs_name);
 		}
 	}
 }
 
 static void
 send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap,
     uint64_t size, boolean_t parsable)
 {
 	if (parsable) {
 		if (fromsnap != NULL) {
 			(void) fprintf(fout, "incremental\t%s\t%s",
 			    fromsnap, tosnap);
 		} else {
 			(void) fprintf(fout, "full\t%s",
 			    tosnap);
 		}
 	} else {
 		if (fromsnap != NULL) {
 			if (strchr(fromsnap, '@') == NULL &&
 			    strchr(fromsnap, '#') == NULL) {
 				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
 				    "send from @%s to %s"),
 				    fromsnap, tosnap);
 			} else {
 				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
 				    "send from %s to %s"),
 				    fromsnap, tosnap);
 			}
 		} else {
 			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
 			    "full send of %s"),
 			    tosnap);
 		}
 	}
 
-	if (size != 0) {
-		if (parsable) {
-			(void) fprintf(fout, "\t%llu",
-			    (longlong_t)size);
-		} else {
-			char buf[16];
-			zfs_nicenum(size, buf, sizeof (buf));
-			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
-			    " estimated size is %s"), buf);
-		}
+	if (parsable) {
+		(void) fprintf(fout, "\t%llu",
+		    (longlong_t)size);
+	} else if (size != 0) {
+		char buf[16];
+		zfs_nicenum(size, buf, sizeof (buf));
+		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
+		    " estimated size is %s"), buf);
 	}
 	(void) fprintf(fout, "\n");
 }
 
 static int
 dump_snapshot(zfs_handle_t *zhp, void *arg)
 {
 	send_dump_data_t *sdd = arg;
 	progress_arg_t pa = { 0 };
 	pthread_t tid;
 	char *thissnap;
 	enum lzc_send_flags flags = 0;
 	int err;
 	boolean_t isfromsnap, istosnap, fromorigin;
 	boolean_t exclude = B_FALSE;
 	FILE *fout = sdd->std_out ? stdout : stderr;
 	uint64_t size = 0;
 
 	err = 0;
 	thissnap = strchr(zhp->zfs_name, '@') + 1;
 	isfromsnap = (sdd->fromsnap != NULL &&
 	    strcmp(sdd->fromsnap, thissnap) == 0);
 
 	if (!sdd->seenfrom && isfromsnap) {
 		gather_holds(zhp, sdd);
 		sdd->seenfrom = B_TRUE;
 		(void) strcpy(sdd->prevsnap, thissnap);
 		sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
 		zfs_close(zhp);
 		return (0);
 	}
 
 	if (sdd->seento || !sdd->seenfrom) {
 		zfs_close(zhp);
 		return (0);
 	}
 
 	istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
 	if (istosnap)
 		sdd->seento = B_TRUE;
 
 	if (sdd->large_block)
 		flags |= LZC_SEND_FLAG_LARGE_BLOCK;
 	if (sdd->embed_data)
 		flags |= LZC_SEND_FLAG_EMBED_DATA;
 	if (sdd->compress)
 		flags |= LZC_SEND_FLAG_COMPRESS;
 
 	if (!sdd->doall && !isfromsnap && !istosnap) {
 		if (sdd->replicate) {
 			char *snapname;
 			nvlist_t *snapprops;
 			/*
 			 * Filter out all intermediate snapshots except origin
 			 * snapshots needed to replicate clones.
 			 */
 			nvlist_t *nvfs = fsavl_find(sdd->fsavl,
 			    zhp->zfs_dmustats.dds_guid, &snapname);
 
 			VERIFY(0 == nvlist_lookup_nvlist(nvfs,
 			    "snapprops", &snapprops));
 			VERIFY(0 == nvlist_lookup_nvlist(snapprops,
 			    thissnap, &snapprops));
 			exclude = !nvlist_exists(snapprops, "is_clone_origin");
 		} else {
 			exclude = B_TRUE;
 		}
 	}
 
 	/*
 	 * If a filter function exists, call it to determine whether
 	 * this snapshot will be sent.
 	 */
 	if (exclude || (sdd->filter_cb != NULL &&
 	    sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
 		/*
 		 * This snapshot is filtered out.  Don't send it, and don't
 		 * set prevsnap_obj, so it will be as if this snapshot didn't
 		 * exist, and the next accepted snapshot will be sent as
 		 * an incremental from the last accepted one, or as the
 		 * first (and full) snapshot in the case of a replication,
 		 * non-incremental send.
 		 */
 		zfs_close(zhp);
 		return (0);
 	}
 
 	gather_holds(zhp, sdd);
 	fromorigin = sdd->prevsnap[0] == '\0' &&
 	    (sdd->fromorigin || sdd->replicate);
 
 	if (sdd->progress && sdd->dryrun) {
 		(void) estimate_ioctl(zhp, sdd->prevsnap_obj,
 		    fromorigin, flags, &size);
 		sdd->size += size;
 	}
 
 	if (sdd->verbose) {
 		send_print_verbose(fout, zhp->zfs_name,
 		    sdd->prevsnap[0] ? sdd->prevsnap : NULL,
 		    size, sdd->parsable);
 	}
 
 	if (!sdd->dryrun) {
 		/*
 		 * If progress reporting is requested, spawn a new thread to
 		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
 		 */
 		if (sdd->progress) {
 			pa.pa_zhp = zhp;
 			pa.pa_fd = sdd->outfd;
 			pa.pa_parsable = sdd->parsable;
 			pa.pa_size = sdd->size;
 			pa.pa_astitle = sdd->progressastitle;
 
 			if ((err = pthread_create(&tid, NULL,
 			    send_progress_thread, &pa)) != 0) {
 				zfs_close(zhp);
 				return (err);
 			}
 		}
 
 		err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
 		    fromorigin, sdd->outfd, flags, sdd->debugnv);
 
 		if (sdd->progress) {
 			(void) pthread_cancel(tid);
 			(void) pthread_join(tid, NULL);
 		}
 	}
 
 	(void) strcpy(sdd->prevsnap, thissnap);
 	sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
 	zfs_close(zhp);
 	return (err);
 }
 
 static int
 dump_filesystem(zfs_handle_t *zhp, void *arg)
 {
 	int rv = 0;
 	send_dump_data_t *sdd = arg;
 	boolean_t missingfrom = B_FALSE;
 	zfs_cmd_t zc = { 0 };
 
 	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
 	    zhp->zfs_name, sdd->tosnap);
 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 		    "WARNING: could not send %s@%s: does not exist\n"),
 		    zhp->zfs_name, sdd->tosnap);
 		sdd->err = B_TRUE;
 		return (0);
 	}
 
 	if (sdd->replicate && sdd->fromsnap) {
 		/*
 		 * If this fs does not have fromsnap, and we're doing
 		 * recursive, we need to send a full stream from the
 		 * beginning (or an incremental from the origin if this
 		 * is a clone).  If we're doing non-recursive, then let
 		 * them get the error.
 		 */
 		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
 		    zhp->zfs_name, sdd->fromsnap);
 		if (ioctl(zhp->zfs_hdl->libzfs_fd,
 		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
 			missingfrom = B_TRUE;
 		}
 	}
 
 	sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
 	sdd->prevsnap_obj = 0;
 	if (sdd->fromsnap == NULL || missingfrom)
 		sdd->seenfrom = B_TRUE;
 
 	rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
 	if (!sdd->seenfrom) {
 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 		    "WARNING: could not send %s@%s:\n"
 		    "incremental source (%s@%s) does not exist\n"),
 		    zhp->zfs_name, sdd->tosnap,
 		    zhp->zfs_name, sdd->fromsnap);
 		sdd->err = B_TRUE;
 	} else if (!sdd->seento) {
 		if (sdd->fromsnap) {
 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 			    "WARNING: could not send %s@%s:\n"
 			    "incremental source (%s@%s) "
 			    "is not earlier than it\n"),
 			    zhp->zfs_name, sdd->tosnap,
 			    zhp->zfs_name, sdd->fromsnap);
 		} else {
 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 			    "WARNING: "
 			    "could not send %s@%s: does not exist\n"),
 			    zhp->zfs_name, sdd->tosnap);
 		}
 		sdd->err = B_TRUE;
 	}
 
 	return (rv);
 }
 
 static int
 dump_filesystems(zfs_handle_t *rzhp, void *arg)
 {
 	send_dump_data_t *sdd = arg;
 	nvpair_t *fspair;
 	boolean_t needagain, progress;
 
 	if (!sdd->replicate)
 		return (dump_filesystem(rzhp, sdd));
 
 	/* Mark the clone origin snapshots. */
 	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
 	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
 		nvlist_t *nvfs;
 		uint64_t origin_guid = 0;
 
 		VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
 		(void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
 		if (origin_guid != 0) {
 			char *snapname;
 			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
 			    origin_guid, &snapname);
 			if (origin_nv != NULL) {
 				nvlist_t *snapprops;
 				VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
 				    "snapprops", &snapprops));
 				VERIFY(0 == nvlist_lookup_nvlist(snapprops,
 				    snapname, &snapprops));
 				VERIFY(0 == nvlist_add_boolean(
 				    snapprops, "is_clone_origin"));
 			}
 		}
 	}
 again:
 	needagain = progress = B_FALSE;
 	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
 	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
 		nvlist_t *fslist, *parent_nv;
 		char *fsname;
 		zfs_handle_t *zhp;
 		int err;
 		uint64_t origin_guid = 0;
 		uint64_t parent_guid = 0;
 
 		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
 		if (nvlist_lookup_boolean(fslist, "sent") == 0)
 			continue;
 
 		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
 		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
 		(void) nvlist_lookup_uint64(fslist, "parentfromsnap",
 		    &parent_guid);
 
 		if (parent_guid != 0) {
 			parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
 			if (!nvlist_exists(parent_nv, "sent")) {
 				/* parent has not been sent; skip this one */
 				needagain = B_TRUE;
 				continue;
 			}
 		}
 
 		if (origin_guid != 0) {
 			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
 			    origin_guid, NULL);
 			if (origin_nv != NULL &&
 			    !nvlist_exists(origin_nv, "sent")) {
 				/*
 				 * origin has not been sent yet;
 				 * skip this clone.
 				 */
 				needagain = B_TRUE;
 				continue;
 			}
 		}
 
 		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
 		if (zhp == NULL)
 			return (-1);
 		err = dump_filesystem(zhp, sdd);
 		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
 		progress = B_TRUE;
 		zfs_close(zhp);
 		if (err)
 			return (err);
 	}
 	if (needagain) {
 		assert(progress);
 		goto again;
 	}
 
 	/* clean out the sent flags in case we reuse this fss */
 	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
 	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
 		nvlist_t *fslist;
 
 		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
 		(void) nvlist_remove_all(fslist, "sent");
 	}
 
 	return (0);
 }
 
 nvlist_t *
 zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token)
 {
 	unsigned int version;
 	int nread;
 	unsigned long long checksum, packed_len;
 
 	/*
 	 * Decode token header, which is:
 	 *   <token version>-<checksum of payload>-<uncompressed payload length>
 	 * Note that the only supported token version is 1.
 	 */
 	nread = sscanf(token, "%u-%llx-%llx-",
 	    &version, &checksum, &packed_len);
 	if (nread != 3) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "resume token is corrupt (invalid format)"));
 		return (NULL);
 	}
 
 	if (version != ZFS_SEND_RESUME_TOKEN_VERSION) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "resume token is corrupt (invalid version %u)"),
 		    version);
 		return (NULL);
 	}
 
 	/* convert hexadecimal representation to binary */
 	token = strrchr(token, '-') + 1;
 	int len = strlen(token) / 2;
 	unsigned char *compressed = zfs_alloc(hdl, len);
 	for (int i = 0; i < len; i++) {
 		nread = sscanf(token + i * 2, "%2hhx", compressed + i);
 		if (nread != 1) {
 			free(compressed);
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "resume token is corrupt "
 			    "(payload is not hex-encoded)"));
 			return (NULL);
 		}
 	}
 
 	/* verify checksum */
 	zio_cksum_t cksum;
 	fletcher_4_native(compressed, len, NULL, &cksum);
 	if (cksum.zc_word[0] != checksum) {
 		free(compressed);
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "resume token is corrupt (incorrect checksum)"));
 		return (NULL);
 	}
 
 	/* uncompress */
 	void *packed = zfs_alloc(hdl, packed_len);
 	uLongf packed_len_long = packed_len;
 	if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK ||
 	    packed_len_long != packed_len) {
 		free(packed);
 		free(compressed);
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "resume token is corrupt (decompression failed)"));
 		return (NULL);
 	}
 
 	/* unpack nvlist */
 	nvlist_t *nv;
 	int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP);
 	free(packed);
 	free(compressed);
 	if (error != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "resume token is corrupt (nvlist_unpack failed)"));
 		return (NULL);
 	}
 	return (nv);
 }
 
 int
 zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
     const char *resume_token)
 {
 	char errbuf[1024];
 	char *toname;
 	char *fromname = NULL;
 	uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
 	zfs_handle_t *zhp;
 	int error = 0;
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 	enum lzc_send_flags lzc_flags = 0;
 	uint64_t size = 0;
 	FILE *fout = (flags->verbose && flags->dryrun) ? stdout : stderr;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot resume send"));
 
 	nvlist_t *resume_nvl =
 	    zfs_send_resume_token_to_nvlist(hdl, resume_token);
 	if (resume_nvl == NULL) {
 		/*
 		 * zfs_error_aux has already been set by
 		 * zfs_send_resume_token_to_nvlist
 		 */
 		return (zfs_error(hdl, EZFS_FAULT, errbuf));
 	}
 	if (flags->verbose) {
 		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
 		    "resume token contents:\n"));
 		nvlist_print(fout, resume_nvl);
 	}
 
 	if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 ||
 	    nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 ||
 	    nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 ||
 	    nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
 	    nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "resume token is corrupt"));
 		return (zfs_error(hdl, EZFS_FAULT, errbuf));
 	}
 	fromguid = 0;
 	(void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
 
 	if (flags->largeblock || nvlist_exists(resume_nvl, "largeblockok"))
 		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
 	if (flags->embed_data || nvlist_exists(resume_nvl, "embedok"))
 		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
 	if (flags->compress || nvlist_exists(resume_nvl, "compressok"))
 		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
 
 	if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) {
 		if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "'%s' is no longer the same snapshot used in "
 			    "the initial send"), toname);
 		} else {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "'%s' used in the initial send no longer exists"),
 			    toname);
 		}
 		return (zfs_error(hdl, EZFS_BADPATH, errbuf));
 	}
 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
 	if (zhp == NULL) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "unable to access '%s'"), name);
 		return (zfs_error(hdl, EZFS_BADPATH, errbuf));
 	}
 
 	if (fromguid != 0) {
 		if (guid_to_name(hdl, toname, fromguid, B_TRUE, name) != 0) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "incremental source %#llx no longer exists"),
 			    (longlong_t)fromguid);
 			return (zfs_error(hdl, EZFS_BADPATH, errbuf));
 		}
 		fromname = name;
 	}
 
 	if (flags->progress) {
 		error = lzc_send_space(zhp->zfs_name, fromname,
 		    lzc_flags, &size);
 		if (error == 0)
 			size = MAX(0, (int64_t)(size - bytes));
 	}
 	if (flags->verbose) {
 		send_print_verbose(fout, zhp->zfs_name, fromname,
 		    size, flags->parsable);
 	}
 
 	if (!flags->dryrun) {
 		progress_arg_t pa = { 0 };
 		pthread_t tid;
 		/*
 		 * If progress reporting is requested, spawn a new thread to
 		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
 		 */
 		if (flags->progress) {
 			pa.pa_zhp = zhp;
 			pa.pa_fd = outfd;
 			pa.pa_parsable = flags->parsable;
 			pa.pa_size = size;
 			pa.pa_astitle = flags->progressastitle;
 
 			error = pthread_create(&tid, NULL,
 			    send_progress_thread, &pa);
 			if (error != 0) {
 				zfs_close(zhp);
 				return (error);
 			}
 		}
 
 		error = lzc_send_resume(zhp->zfs_name, fromname, outfd,
 		    lzc_flags, resumeobj, resumeoff);
 
 		if (flags->progress) {
 			(void) pthread_cancel(tid);
 			(void) pthread_join(tid, NULL);
 		}
 
 		char errbuf[1024];
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "warning: cannot send '%s'"), zhp->zfs_name);
 
 		zfs_close(zhp);
 
 		switch (error) {
 		case 0:
 			return (0);
 		case EXDEV:
 		case ENOENT:
 		case EDQUOT:
 		case EFBIG:
 		case EIO:
 		case ENOLINK:
 		case ENOSPC:
 #ifdef illumos
 		case ENOSTR:
 #endif
 		case ENXIO:
 		case EPIPE:
 		case ERANGE:
 		case EFAULT:
 		case EROFS:
 			zfs_error_aux(hdl, strerror(errno));
 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 
 		default:
 			return (zfs_standard_error(hdl, errno, errbuf));
 		}
 	}
 
 
 	zfs_close(zhp);
 
 	return (error);
 }
 
 /*
  * Generate a send stream for the dataset identified by the argument zhp.
  *
  * The content of the send stream is the snapshot identified by
  * 'tosnap'.  Incremental streams are requested in two ways:
  *     - from the snapshot identified by "fromsnap" (if non-null) or
  *     - from the origin of the dataset identified by zhp, which must
  *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
  *	 is TRUE.
  *
  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
  * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
  * if "replicate" is set.  If "doall" is set, dump all the intermediate
  * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
  * case too. If "props" is set, send properties.
  */
 int
 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
     sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
     void *cb_arg, nvlist_t **debugnvp)
 {
 	char errbuf[1024];
 	send_dump_data_t sdd = { 0 };
 	int err = 0;
 	nvlist_t *fss = NULL;
 	avl_tree_t *fsavl = NULL;
 	static uint64_t holdseq;
 	int spa_version;
 	pthread_t tid = 0;
 	int pipefd[2];
 	dedup_arg_t dda = { 0 };
 	int featureflags = 0;
 	FILE *fout;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot send '%s'"), zhp->zfs_name);
 
 	if (fromsnap && fromsnap[0] == '\0') {
 		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 		    "zero-length incremental source"));
 		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
 	}
 
 	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
 		uint64_t version;
 		version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
 		if (version >= ZPL_VERSION_SA) {
 			featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
 		}
 	}
 
 	if (flags->dedup && !flags->dryrun) {
 		featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
 		    DMU_BACKUP_FEATURE_DEDUPPROPS);
 		if ((err = pipe(pipefd)) != 0) {
 			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
 			return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
 			    errbuf));
 		}
 		dda.outputfd = outfd;
 		dda.inputfd = pipefd[1];
 		dda.dedup_hdl = zhp->zfs_hdl;
 		if ((err = pthread_create(&tid, NULL, cksummer, &dda)) != 0) {
 			(void) close(pipefd[0]);
 			(void) close(pipefd[1]);
 			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
 			return (zfs_error(zhp->zfs_hdl,
 			    EZFS_THREADCREATEFAILED, errbuf));
 		}
 	}
 
 	if (flags->replicate || flags->doall || flags->props) {
 		dmu_replay_record_t drr = { 0 };
 		char *packbuf = NULL;
 		size_t buflen = 0;
 		zio_cksum_t zc = { 0 };
 
 		if (flags->replicate || flags->props) {
 			nvlist_t *hdrnv;
 
 			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
 			if (fromsnap) {
 				VERIFY(0 == nvlist_add_string(hdrnv,
 				    "fromsnap", fromsnap));
 			}
 			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
 			if (!flags->replicate) {
 				VERIFY(0 == nvlist_add_boolean(hdrnv,
 				    "not_recursive"));
 			}
 
 			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
 			    fromsnap, tosnap, flags->replicate, flags->verbose,
 			    &fss, &fsavl);
 			if (err)
 				goto err_out;
 			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
 			err = nvlist_pack(hdrnv, &packbuf, &buflen,
 			    NV_ENCODE_XDR, 0);
 			if (debugnvp)
 				*debugnvp = hdrnv;
 			else
 				nvlist_free(hdrnv);
 			if (err)
 				goto stderr_out;
 		}
 
 		if (!flags->dryrun) {
 			/* write first begin record */
 			drr.drr_type = DRR_BEGIN;
 			drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
 			DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
 			    drr_versioninfo, DMU_COMPOUNDSTREAM);
 			DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
 			    drr_versioninfo, featureflags);
 			(void) snprintf(drr.drr_u.drr_begin.drr_toname,
 			    sizeof (drr.drr_u.drr_begin.drr_toname),
 			    "%s@%s", zhp->zfs_name, tosnap);
 			drr.drr_payloadlen = buflen;
 
 			err = dump_record(&drr, packbuf, buflen, &zc, outfd);
 			free(packbuf);
 			if (err != 0)
 				goto stderr_out;
 
 			/* write end record */
 			bzero(&drr, sizeof (drr));
 			drr.drr_type = DRR_END;
 			drr.drr_u.drr_end.drr_checksum = zc;
 			err = write(outfd, &drr, sizeof (drr));
 			if (err == -1) {
 				err = errno;
 				goto stderr_out;
 			}
 
 			err = 0;
 		}
 	}
 
 	/* dump each stream */
 	sdd.fromsnap = fromsnap;
 	sdd.tosnap = tosnap;
 	if (tid != 0)
 		sdd.outfd = pipefd[0];
 	else
 		sdd.outfd = outfd;
 	sdd.replicate = flags->replicate;
 	sdd.doall = flags->doall;
 	sdd.fromorigin = flags->fromorigin;
 	sdd.fss = fss;
 	sdd.fsavl = fsavl;
 	sdd.verbose = flags->verbose;
 	sdd.parsable = flags->parsable;
 	sdd.progress = flags->progress;
 	sdd.progressastitle = flags->progressastitle;
 	sdd.dryrun = flags->dryrun;
 	sdd.large_block = flags->largeblock;
 	sdd.embed_data = flags->embed_data;
 	sdd.compress = flags->compress;
 	sdd.filter_cb = filter_func;
 	sdd.filter_cb_arg = cb_arg;
 	if (debugnvp)
 		sdd.debugnv = *debugnvp;
 	if (sdd.verbose && sdd.dryrun)
 		sdd.std_out = B_TRUE;
 	fout = sdd.std_out ? stdout : stderr;
 
 	/*
 	 * Some flags require that we place user holds on the datasets that are
 	 * being sent so they don't get destroyed during the send. We can skip
 	 * this step if the pool is imported read-only since the datasets cannot
 	 * be destroyed.
 	 */
 	if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
 	    ZPOOL_PROP_READONLY, NULL) &&
 	    zfs_spa_version(zhp, &spa_version) == 0 &&
 	    spa_version >= SPA_VERSION_USERREFS &&
 	    (flags->doall || flags->replicate)) {
 		++holdseq;
 		(void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
 		    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
 		sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
 		if (sdd.cleanup_fd < 0) {
 			err = errno;
 			goto stderr_out;
 		}
 		sdd.snapholds = fnvlist_alloc();
 	} else {
 		sdd.cleanup_fd = -1;
 		sdd.snapholds = NULL;
 	}
 	if (flags->progress || sdd.snapholds != NULL) {
 		/*
 		 * Do a verbose no-op dry run to get all the verbose output
 		 * or to gather snapshot hold's before generating any data,
 		 * then do a non-verbose real run to generate the streams.
 		 */
 		sdd.dryrun = B_TRUE;
 		err = dump_filesystems(zhp, &sdd);
 
 		if (err != 0)
 			goto stderr_out;
 
 		if (flags->verbose) {
 			if (flags->parsable) {
 				(void) fprintf(fout, "size\t%llu\n",
 				    (longlong_t)sdd.size);
 			} else {
 				char buf[16];
 				zfs_nicenum(sdd.size, buf, sizeof (buf));
 				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
 				    "total estimated size is %s\n"), buf);
 			}
 		}
 
 		/* Ensure no snaps found is treated as an error. */
 		if (!sdd.seento) {
 			err = ENOENT;
 			goto err_out;
 		}
 
 		/* Skip the second run if dryrun was requested. */
 		if (flags->dryrun)
 			goto err_out;
 
 		if (sdd.snapholds != NULL) {
 			err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
 			if (err != 0)
 				goto stderr_out;
 
 			fnvlist_free(sdd.snapholds);
 			sdd.snapholds = NULL;
 		}
 
 		sdd.dryrun = B_FALSE;
 		sdd.verbose = B_FALSE;
 	}
 
 	err = dump_filesystems(zhp, &sdd);
 	fsavl_destroy(fsavl);
 	nvlist_free(fss);
 
 	/* Ensure no snaps found is treated as an error. */
 	if (err == 0 && !sdd.seento)
 		err = ENOENT;
 
 	if (tid != 0) {
 		if (err != 0)
 			(void) pthread_cancel(tid);
 		(void) close(pipefd[0]);
 		(void) pthread_join(tid, NULL);
 	}
 
 	if (sdd.cleanup_fd != -1) {
 		VERIFY(0 == close(sdd.cleanup_fd));
 		sdd.cleanup_fd = -1;
 	}
 
 	if (!flags->dryrun && (flags->replicate || flags->doall ||
 	    flags->props)) {
 		/*
 		 * write final end record.  NB: want to do this even if
 		 * there was some error, because it might not be totally
 		 * failed.
 		 */
 		dmu_replay_record_t drr = { 0 };
 		drr.drr_type = DRR_END;
 		if (write(outfd, &drr, sizeof (drr)) == -1) {
 			return (zfs_standard_error(zhp->zfs_hdl,
 			    errno, errbuf));
 		}
 	}
 
 	return (err || sdd.err);
 
 stderr_out:
 	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
 err_out:
 	fsavl_destroy(fsavl);
 	nvlist_free(fss);
 	fnvlist_free(sdd.snapholds);
 
 	if (sdd.cleanup_fd != -1)
 		VERIFY(0 == close(sdd.cleanup_fd));
 	if (tid != 0) {
 		(void) pthread_cancel(tid);
 		(void) close(pipefd[0]);
 		(void) pthread_join(tid, NULL);
 	}
 	return (err);
 }
 
 int
-zfs_send_one(zfs_handle_t *zhp, const char *from, int fd,
-    enum lzc_send_flags flags)
+zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t flags)
 {
-	int err;
+	int err = 0;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
-
+	enum lzc_send_flags lzc_flags = 0;
+	FILE *fout = (flags.verbose && flags.dryrun) ? stdout : stderr;
 	char errbuf[1024];
+
+	if (flags.largeblock)
+		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
+	if (flags.embed_data)
+		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
+	if (flags.compress)
+		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
+
+	if (flags.verbose) {
+		uint64_t size = 0;
+		err = lzc_send_space(zhp->zfs_name, from, lzc_flags, &size);
+		if (err == 0) {
+			send_print_verbose(fout, zhp->zfs_name, from, size,
+			    flags.parsable);
+		} else {
+			(void) fprintf(stderr, "Cannot estimate send size: "
+			    "%s\n", strerror(errno));
+		}
+	}
+
+	if (flags.dryrun)
+		return (err);
+
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "warning: cannot send '%s'"), zhp->zfs_name);
 
-	err = lzc_send(zhp->zfs_name, from, fd, flags);
+	err = lzc_send(zhp->zfs_name, from, fd, lzc_flags);
 	if (err != 0) {
 		switch (errno) {
 		case EXDEV:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "not an earlier snapshot from the same fs"));
 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
 
 		case ENOENT:
 		case ESRCH:
 			if (lzc_exists(zhp->zfs_name)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "incremental source (%s) does not exist"),
 				    from);
 			}
 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 
 		case EBUSY:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "target is busy; if a filesystem, "
 			    "it must not be mounted"));
 			return (zfs_error(hdl, EZFS_BUSY, errbuf));
 
 		case EDQUOT:
 		case EFBIG:
 		case EIO:
 		case ENOLINK:
 		case ENOSPC:
 #ifdef illumos
 		case ENOSTR:
 #endif
 		case ENXIO:
 		case EPIPE:
 		case ERANGE:
 		case EFAULT:
 		case EROFS:
 			zfs_error_aux(hdl, strerror(errno));
 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 
 		default:
 			return (zfs_standard_error(hdl, errno, errbuf));
 		}
 	}
 	return (err != 0);
 }
 
 /*
  * Routines specific to "zfs recv"
  */
 
 static int
 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
     boolean_t byteswap, zio_cksum_t *zc)
 {
 	char *cp = buf;
 	int rv;
 	int len = ilen;
 
 	assert(ilen <= SPA_MAXBLOCKSIZE);
 
 	do {
 		rv = read(fd, cp, len);
 		cp += rv;
 		len -= rv;
 	} while (rv > 0);
 
 	if (rv < 0 || len != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "failed to read from stream"));
 		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
 		    "cannot receive")));
 	}
 
 	if (zc) {
 		if (byteswap)
 			(void) fletcher_4_incremental_byteswap(buf, ilen, zc);
 		else
 			(void) fletcher_4_incremental_native(buf, ilen, zc);
 	}
 	return (0);
 }
 
 static int
 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
     boolean_t byteswap, zio_cksum_t *zc)
 {
 	char *buf;
 	int err;
 
 	buf = zfs_alloc(hdl, len);
 	if (buf == NULL)
 		return (ENOMEM);
 
 	err = recv_read(hdl, fd, buf, len, byteswap, zc);
 	if (err != 0) {
 		free(buf);
 		return (err);
 	}
 
 	err = nvlist_unpack(buf, len, nvp, 0);
 	free(buf);
 	if (err != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
 		    "stream (malformed nvlist)"));
 		return (EINVAL);
 	}
 	return (0);
 }
 
 static int
 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
     int baselen, char *newname, recvflags_t *flags)
 {
 	static int seq;
 	int err;
 	prop_changelist_t *clp;
 	zfs_handle_t *zhp;
 
 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
 	if (zhp == NULL)
 		return (-1);
 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
 	    flags->force ? MS_FORCE : 0);
 	zfs_close(zhp);
 	if (clp == NULL)
 		return (-1);
 	err = changelist_prefix(clp);
 	if (err)
 		return (err);
 
 	if (tryname) {
 		(void) strcpy(newname, tryname);
 		if (flags->verbose) {
 			(void) printf("attempting rename %s to %s\n",
 			    name, newname);
 		}
 		err = lzc_rename(name, newname);
 		if (err == 0)
 			changelist_rename(clp, name, tryname);
 	} else {
 		err = ENOENT;
 	}
 
 	if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
 		seq++;
 
 		(void) snprintf(newname, ZFS_MAX_DATASET_NAME_LEN,
 		    "%.*srecv-%u-%u", baselen, name, getpid(), seq);
 		if (flags->verbose) {
 			(void) printf("failed - trying rename %s to %s\n",
 			    name, newname);
 		}
 		err = lzc_rename(name, newname);
 		if (err == 0)
 			changelist_rename(clp, name, newname);
 		if (err && flags->verbose) {
 			(void) printf("failed (%u) - "
 			    "will try again on next pass\n", errno);
 		}
 		err = EAGAIN;
 	} else if (flags->verbose) {
 		if (err == 0)
 			(void) printf("success\n");
 		else
 			(void) printf("failed (%u)\n", errno);
 	}
 
 	(void) changelist_postfix(clp);
 	changelist_free(clp);
 
 	return (err);
 }
 
 static int
 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
     char *newname, recvflags_t *flags)
 {
 	int err = 0;
 	prop_changelist_t *clp;
 	zfs_handle_t *zhp;
 	boolean_t defer = B_FALSE;
 	int spa_version;
 
 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
 	if (zhp == NULL)
 		return (-1);
 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
 	    flags->force ? MS_FORCE : 0);
 	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
 	    zfs_spa_version(zhp, &spa_version) == 0 &&
 	    spa_version >= SPA_VERSION_USERREFS)
 		defer = B_TRUE;
 	zfs_close(zhp);
 	if (clp == NULL)
 		return (-1);
 	err = changelist_prefix(clp);
 	if (err)
 		return (err);
 
 	if (flags->verbose)
 		(void) printf("attempting destroy %s\n", name);
 	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
 		nvlist_t *nv = fnvlist_alloc();
 		fnvlist_add_boolean(nv, name);
 		err = lzc_destroy_snaps(nv, defer, NULL);
 		fnvlist_free(nv);
 	} else {
 		err = lzc_destroy(name);
 	}
 	if (err == 0) {
 		if (flags->verbose)
 			(void) printf("success\n");
 		changelist_remove(clp, name);
 	}
 
 	(void) changelist_postfix(clp);
 	changelist_free(clp);
 
 	/*
 	 * Deferred destroy might destroy the snapshot or only mark it to be
 	 * destroyed later, and it returns success in either case.
 	 */
 	if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
 	    ZFS_TYPE_SNAPSHOT))) {
 		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
 	}
 
 	return (err);
 }
 
 typedef struct guid_to_name_data {
 	uint64_t guid;
 	boolean_t bookmark_ok;
 	char *name;
 	char *skip;
 } guid_to_name_data_t;
 
 static int
 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
 {
 	guid_to_name_data_t *gtnd = arg;
 	const char *slash;
 	int err;
 
 	if (gtnd->skip != NULL &&
 	    (slash = strrchr(zhp->zfs_name, '/')) != NULL &&
 	    strcmp(slash + 1, gtnd->skip) == 0) {
 		zfs_close(zhp);
 		return (0);
 	}
 
 	if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid) {
 		(void) strcpy(gtnd->name, zhp->zfs_name);
 		zfs_close(zhp);
 		return (EEXIST);
 	}
 
 	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
 	if (err != EEXIST && gtnd->bookmark_ok)
 		err = zfs_iter_bookmarks(zhp, guid_to_name_cb, gtnd);
 	zfs_close(zhp);
 	return (err);
 }
 
 /*
  * Attempt to find the local dataset associated with this guid.  In the case of
  * multiple matches, we attempt to find the "best" match by searching
  * progressively larger portions of the hierarchy.  This allows one to send a
  * tree of datasets individually and guarantee that we will find the source
  * guid within that hierarchy, even if there are multiple matches elsewhere.
  */
 static int
 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
     boolean_t bookmark_ok, char *name)
 {
 	char pname[ZFS_MAX_DATASET_NAME_LEN];
 	guid_to_name_data_t gtnd;
 
 	gtnd.guid = guid;
 	gtnd.bookmark_ok = bookmark_ok;
 	gtnd.name = name;
 	gtnd.skip = NULL;
 
 	/*
 	 * Search progressively larger portions of the hierarchy, starting
 	 * with the filesystem specified by 'parent'.  This will
 	 * select the "most local" version of the origin snapshot in the case
 	 * that there are multiple matching snapshots in the system.
 	 */
 	(void) strlcpy(pname, parent, sizeof (pname));
 	char *cp = strrchr(pname, '@');
 	if (cp == NULL)
 		cp = strchr(pname, '\0');
 	for (; cp != NULL; cp = strrchr(pname, '/')) {
 		/* Chop off the last component and open the parent */
 		*cp = '\0';
 		zfs_handle_t *zhp = make_dataset_handle(hdl, pname);
 
 		if (zhp == NULL)
 			continue;
 		int err = guid_to_name_cb(zfs_handle_dup(zhp), &gtnd);
 		if (err != EEXIST)
 			err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
 		if (err != EEXIST && bookmark_ok)
 			err = zfs_iter_bookmarks(zhp, guid_to_name_cb, &gtnd);
 		zfs_close(zhp);
 		if (err == EEXIST)
 			return (0);
 
 		/*
 		 * Remember the last portion of the dataset so we skip it next
 		 * time through (as we've already searched that portion of the
 		 * hierarchy).
 		 */
 		gtnd.skip = strrchr(pname, '/') + 1;
 	}
 
 	return (ENOENT);
 }
 
 /*
  * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
  * guid1 is after guid2.
  */
 static int
 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
     uint64_t guid1, uint64_t guid2)
 {
 	nvlist_t *nvfs;
 	char *fsname, *snapname;
 	char buf[ZFS_MAX_DATASET_NAME_LEN];
 	int rv;
 	zfs_handle_t *guid1hdl, *guid2hdl;
 	uint64_t create1, create2;
 
 	if (guid2 == 0)
 		return (0);
 	if (guid1 == 0)
 		return (1);
 
 	nvfs = fsavl_find(avl, guid1, &snapname);
 	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
 	guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
 	if (guid1hdl == NULL)
 		return (-1);
 
 	nvfs = fsavl_find(avl, guid2, &snapname);
 	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
 	guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
 	if (guid2hdl == NULL) {
 		zfs_close(guid1hdl);
 		return (-1);
 	}
 
 	create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
 	create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
 
 	if (create1 < create2)
 		rv = -1;
 	else if (create1 > create2)
 		rv = +1;
 	else
 		rv = 0;
 
 	zfs_close(guid1hdl);
 	zfs_close(guid2hdl);
 
 	return (rv);
 }
 
 static int
 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
     recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
     nvlist_t *renamed)
 {
 	nvlist_t *local_nv, *deleted = NULL;
 	avl_tree_t *local_avl;
 	nvpair_t *fselem, *nextfselem;
 	char *fromsnap;
 	char newname[ZFS_MAX_DATASET_NAME_LEN];
 	char guidname[32];
 	int error;
 	boolean_t needagain, progress, recursive;
 	char *s1, *s2;
 
 	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
 
 	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
 	    ENOENT);
 
 	if (flags->dryrun)
 		return (0);
 
 again:
 	needagain = progress = B_FALSE;
 
 	VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0));
 
 	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
 	    recursive, B_FALSE, &local_nv, &local_avl)) != 0)
 		return (error);
 
 	/*
 	 * Process deletes and renames
 	 */
 	for (fselem = nvlist_next_nvpair(local_nv, NULL);
 	    fselem; fselem = nextfselem) {
 		nvlist_t *nvfs, *snaps;
 		nvlist_t *stream_nvfs = NULL;
 		nvpair_t *snapelem, *nextsnapelem;
 		uint64_t fromguid = 0;
 		uint64_t originguid = 0;
 		uint64_t stream_originguid = 0;
 		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
 		char *fsname, *stream_fsname;
 
 		nextfselem = nvlist_next_nvpair(local_nv, fselem);
 
 		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
 		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
 		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
 		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
 		    &parent_fromsnap_guid));
 		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
 
 		/*
 		 * First find the stream's fs, so we can check for
 		 * a different origin (due to "zfs promote")
 		 */
 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
 		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
 			uint64_t thisguid;
 
 			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
 			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
 
 			if (stream_nvfs != NULL)
 				break;
 		}
 
 		/* check for promote */
 		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
 		    &stream_originguid);
 		if (stream_nvfs && originguid != stream_originguid) {
 			switch (created_before(hdl, local_avl,
 			    stream_originguid, originguid)) {
 			case 1: {
 				/* promote it! */
 				zfs_cmd_t zc = { 0 };
 				nvlist_t *origin_nvfs;
 				char *origin_fsname;
 
 				if (flags->verbose)
 					(void) printf("promoting %s\n", fsname);
 
 				origin_nvfs = fsavl_find(local_avl, originguid,
 				    NULL);
 				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
 				    "name", &origin_fsname));
 				(void) strlcpy(zc.zc_value, origin_fsname,
 				    sizeof (zc.zc_value));
 				(void) strlcpy(zc.zc_name, fsname,
 				    sizeof (zc.zc_name));
 				error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
 				if (error == 0)
 					progress = B_TRUE;
 				break;
 			}
 			default:
 				break;
 			case -1:
 				fsavl_destroy(local_avl);
 				nvlist_free(local_nv);
 				return (-1);
 			}
 			/*
 			 * We had/have the wrong origin, therefore our
 			 * list of snapshots is wrong.  Need to handle
 			 * them on the next pass.
 			 */
 			needagain = B_TRUE;
 			continue;
 		}
 
 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
 		    snapelem; snapelem = nextsnapelem) {
 			uint64_t thisguid;
 			char *stream_snapname;
 			nvlist_t *found, *props;
 
 			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
 
 			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
 			found = fsavl_find(stream_avl, thisguid,
 			    &stream_snapname);
 
 			/* check for delete */
 			if (found == NULL) {
 				char name[ZFS_MAX_DATASET_NAME_LEN];
 
 				if (!flags->force)
 					continue;
 
 				(void) snprintf(name, sizeof (name), "%s@%s",
 				    fsname, nvpair_name(snapelem));
 
 				error = recv_destroy(hdl, name,
 				    strlen(fsname)+1, newname, flags);
 				if (error)
 					needagain = B_TRUE;
 				else
 					progress = B_TRUE;
 				sprintf(guidname, "%" PRIu64, thisguid);
 				nvlist_add_boolean(deleted, guidname);
 				continue;
 			}
 
 			stream_nvfs = found;
 
 			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
 			    &props) && 0 == nvlist_lookup_nvlist(props,
 			    stream_snapname, &props)) {
 				zfs_cmd_t zc = { 0 };
 
 				zc.zc_cookie = B_TRUE; /* received */
 				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
 				    "%s@%s", fsname, nvpair_name(snapelem));
 				if (zcmd_write_src_nvlist(hdl, &zc,
 				    props) == 0) {
 					(void) zfs_ioctl(hdl,
 					    ZFS_IOC_SET_PROP, &zc);
 					zcmd_free_nvlists(&zc);
 				}
 			}
 
 			/* check for different snapname */
 			if (strcmp(nvpair_name(snapelem),
 			    stream_snapname) != 0) {
 				char name[ZFS_MAX_DATASET_NAME_LEN];
 				char tryname[ZFS_MAX_DATASET_NAME_LEN];
 
 				(void) snprintf(name, sizeof (name), "%s@%s",
 				    fsname, nvpair_name(snapelem));
 				(void) snprintf(tryname, sizeof (name), "%s@%s",
 				    fsname, stream_snapname);
 
 				error = recv_rename(hdl, name, tryname,
 				    strlen(fsname)+1, newname, flags);
 				if (error)
 					needagain = B_TRUE;
 				else
 					progress = B_TRUE;
 			}
 
 			if (strcmp(stream_snapname, fromsnap) == 0)
 				fromguid = thisguid;
 		}
 
 		/* check for delete */
 		if (stream_nvfs == NULL) {
 			if (!flags->force)
 				continue;
 
 			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
 			    newname, flags);
 			if (error)
 				needagain = B_TRUE;
 			else
 				progress = B_TRUE;
 			sprintf(guidname, "%" PRIu64, parent_fromsnap_guid);
 			nvlist_add_boolean(deleted, guidname);
 			continue;
 		}
 
 		if (fromguid == 0) {
 			if (flags->verbose) {
 				(void) printf("local fs %s does not have "
 				    "fromsnap (%s in stream); must have "
 				    "been deleted locally; ignoring\n",
 				    fsname, fromsnap);
 			}
 			continue;
 		}
 
 		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
 		    "name", &stream_fsname));
 		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
 		    "parentfromsnap", &stream_parent_fromsnap_guid));
 
 		s1 = strrchr(fsname, '/');
 		s2 = strrchr(stream_fsname, '/');
 
 		/*
 		 * Check if we're going to rename based on parent guid change
 		 * and the current parent guid was also deleted. If it was then
 		 * rename will fail and is likely unneeded, so avoid this and
 		 * force an early retry to determine the new
 		 * parent_fromsnap_guid.
 		 */
 		if (stream_parent_fromsnap_guid != 0 &&
                     parent_fromsnap_guid != 0 &&
                     stream_parent_fromsnap_guid != parent_fromsnap_guid) {
 			sprintf(guidname, "%" PRIu64, parent_fromsnap_guid);
 			if (nvlist_exists(deleted, guidname)) {
 				progress = B_TRUE;
 				needagain = B_TRUE;
 				goto doagain;
 			}
 		}
 
 		/*
 		 * Check for rename. If the exact receive path is specified, it
 		 * does not count as a rename, but we still need to check the
 		 * datasets beneath it.
 		 */
 		if ((stream_parent_fromsnap_guid != 0 &&
 		    parent_fromsnap_guid != 0 &&
 		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
 		    ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
 		    (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
 			nvlist_t *parent;
 			char tryname[ZFS_MAX_DATASET_NAME_LEN];
 
 			parent = fsavl_find(local_avl,
 			    stream_parent_fromsnap_guid, NULL);
 			/*
 			 * NB: parent might not be found if we used the
 			 * tosnap for stream_parent_fromsnap_guid,
 			 * because the parent is a newly-created fs;
 			 * we'll be able to rename it after we recv the
 			 * new fs.
 			 */
 			if (parent != NULL) {
 				char *pname;
 
 				VERIFY(0 == nvlist_lookup_string(parent, "name",
 				    &pname));
 				(void) snprintf(tryname, sizeof (tryname),
 				    "%s%s", pname, strrchr(stream_fsname, '/'));
 			} else {
 				tryname[0] = '\0';
 				if (flags->verbose) {
 					(void) printf("local fs %s new parent "
 					    "not found\n", fsname);
 				}
 			}
 
 			newname[0] = '\0';
 
 			error = recv_rename(hdl, fsname, tryname,
 			    strlen(tofs)+1, newname, flags);
 
 			if (renamed != NULL && newname[0] != '\0') {
 				VERIFY(0 == nvlist_add_boolean(renamed,
 				    newname));
 			}
 
 			if (error)
 				needagain = B_TRUE;
 			else
 				progress = B_TRUE;
 		}
 	}
 
 doagain:
 	fsavl_destroy(local_avl);
 	nvlist_free(local_nv);
 	nvlist_free(deleted);
 
 	if (needagain && progress) {
 		/* do another pass to fix up temporary names */
 		if (flags->verbose)
 			(void) printf("another pass:\n");
 		goto again;
 	}
 
 	return (needagain);
 }
 
 static int
 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
     recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
     char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
 {
 	nvlist_t *stream_nv = NULL;
 	avl_tree_t *stream_avl = NULL;
 	char *fromsnap = NULL;
 	char *sendsnap = NULL;
 	char *cp;
 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
 	char sendfs[ZFS_MAX_DATASET_NAME_LEN];
 	char errbuf[1024];
 	dmu_replay_record_t drre;
 	int error;
 	boolean_t anyerr = B_FALSE;
 	boolean_t softerr = B_FALSE;
 	boolean_t recursive;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot receive"));
 
 	assert(drr->drr_type == DRR_BEGIN);
 	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
 	assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
 	    DMU_COMPOUNDSTREAM);
 
 	/*
 	 * Read in the nvlist from the stream.
 	 */
 	if (drr->drr_payloadlen != 0) {
 		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
 		    &stream_nv, flags->byteswap, zc);
 		if (error) {
 			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 			goto out;
 		}
 	}
 
 	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
 	    ENOENT);
 
 	if (recursive && strchr(destname, '@')) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "cannot specify snapshot name for multi-snapshot stream"));
 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 		goto out;
 	}
 
 	/*
 	 * Read in the end record and verify checksum.
 	 */
 	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
 	    flags->byteswap, NULL)))
 		goto out;
 	if (flags->byteswap) {
 		drre.drr_type = BSWAP_32(drre.drr_type);
 		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
 		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
 		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
 		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
 	}
 	if (drre.drr_type != DRR_END) {
 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 		goto out;
 	}
 	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "incorrect header checksum"));
 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 		goto out;
 	}
 
 	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
 
 	if (drr->drr_payloadlen != 0) {
 		nvlist_t *stream_fss;
 
 		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
 		    &stream_fss));
 		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "couldn't allocate avl tree"));
 			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
 			goto out;
 		}
 
 		if (fromsnap != NULL && recursive) {
 			nvlist_t *renamed = NULL;
 			nvpair_t *pair = NULL;
 
 			(void) strlcpy(tofs, destname, sizeof (tofs));
 			if (flags->isprefix) {
 				struct drr_begin *drrb = &drr->drr_u.drr_begin;
 				int i;
 
 				if (flags->istail) {
 					cp = strrchr(drrb->drr_toname, '/');
 					if (cp == NULL) {
 						(void) strlcat(tofs, "/",
 						    sizeof (tofs));
 						i = 0;
 					} else {
 						i = (cp - drrb->drr_toname);
 					}
 				} else {
 					i = strcspn(drrb->drr_toname, "/@");
 				}
 				/* zfs_receive_one() will create_parents() */
 				(void) strlcat(tofs, &drrb->drr_toname[i],
 				    sizeof (tofs));
 				*strchr(tofs, '@') = '\0';
 			}
 
 			if (!flags->dryrun && !flags->nomount) {
 				VERIFY(0 == nvlist_alloc(&renamed,
 				    NV_UNIQUE_NAME, 0));
 			}
 
 			softerr = recv_incremental_replication(hdl, tofs, flags,
 			    stream_nv, stream_avl, renamed);
 
 			/* Unmount renamed filesystems before receiving. */
 			while ((pair = nvlist_next_nvpair(renamed,
 			    pair)) != NULL) {
 				zfs_handle_t *zhp;
 				prop_changelist_t *clp = NULL;
 
 				zhp = zfs_open(hdl, nvpair_name(pair),
 				    ZFS_TYPE_FILESYSTEM);
 				if (zhp != NULL) {
 					clp = changelist_gather(zhp,
 					    ZFS_PROP_MOUNTPOINT, 0, 0);
 					zfs_close(zhp);
 					if (clp != NULL) {
 						softerr |=
 						    changelist_prefix(clp);
 						changelist_free(clp);
 					}
 				}
 			}
 
 			nvlist_free(renamed);
 		}
 	}
 
 	/*
 	 * Get the fs specified by the first path in the stream (the top level
 	 * specified by 'zfs send') and pass it to each invocation of
 	 * zfs_receive_one().
 	 */
 	(void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
 	    sizeof (sendfs));
 	if ((cp = strchr(sendfs, '@')) != NULL) {
 		*cp = '\0';
 		/*
 		 * Find the "sendsnap", the final snapshot in a replication
 		 * stream.  zfs_receive_one() handles certain errors
 		 * differently, depending on if the contained stream is the
 		 * last one or not.
 		 */
 		sendsnap = (cp + 1);
 	}
 
 	/* Finally, receive each contained stream */
 	do {
 		/*
 		 * we should figure out if it has a recoverable
 		 * error, in which case do a recv_skip() and drive on.
 		 * Note, if we fail due to already having this guid,
 		 * zfs_receive_one() will take care of it (ie,
 		 * recv_skip() and return 0).
 		 */
 		error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
 		    sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
 		    action_handlep, sendsnap);
 		if (error == ENODATA) {
 			error = 0;
 			break;
 		}
 		anyerr |= error;
 	} while (error == 0);
 
 	if (drr->drr_payloadlen != 0 && recursive && fromsnap != NULL) {
 		/*
 		 * Now that we have the fs's they sent us, try the
 		 * renames again.
 		 */
 		softerr = recv_incremental_replication(hdl, tofs, flags,
 		    stream_nv, stream_avl, NULL);
 	}
 
 out:
 	fsavl_destroy(stream_avl);
 	nvlist_free(stream_nv);
 	if (softerr)
 		error = -2;
 	if (anyerr)
 		error = -1;
 	return (error);
 }
 
 static void
 trunc_prop_errs(int truncated)
 {
 	ASSERT(truncated != 0);
 
 	if (truncated == 1)
 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 		    "1 more property could not be set\n"));
 	else
 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 		    "%d more properties could not be set\n"), truncated);
 }
 
 static int
 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
 {
 	dmu_replay_record_t *drr;
 	void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
 	char errbuf[1024];
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot receive:"));
 
 	/* XXX would be great to use lseek if possible... */
 	drr = buf;
 
 	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
 	    byteswap, NULL) == 0) {
 		if (byteswap)
 			drr->drr_type = BSWAP_32(drr->drr_type);
 
 		switch (drr->drr_type) {
 		case DRR_BEGIN:
 			if (drr->drr_payloadlen != 0) {
 				(void) recv_read(hdl, fd, buf,
 				    drr->drr_payloadlen, B_FALSE, NULL);
 			}
 			break;
 
 		case DRR_END:
 			free(buf);
 			return (0);
 
 		case DRR_OBJECT:
 			if (byteswap) {
 				drr->drr_u.drr_object.drr_bonuslen =
 				    BSWAP_32(drr->drr_u.drr_object.
 				    drr_bonuslen);
 			}
 			(void) recv_read(hdl, fd, buf,
 			    P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
 			    B_FALSE, NULL);
 			break;
 
 		case DRR_WRITE:
 			if (byteswap) {
 				drr->drr_u.drr_write.drr_logical_size =
 				    BSWAP_64(
 				    drr->drr_u.drr_write.drr_logical_size);
 				drr->drr_u.drr_write.drr_compressed_size =
 				    BSWAP_64(
 				    drr->drr_u.drr_write.drr_compressed_size);
 			}
 			uint64_t payload_size =
 			    DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write);
 			(void) recv_read(hdl, fd, buf,
 			    payload_size, B_FALSE, NULL);
 			break;
 		case DRR_SPILL:
 			if (byteswap) {
 				drr->drr_u.drr_spill.drr_length =
 				    BSWAP_64(drr->drr_u.drr_spill.drr_length);
 			}
 			(void) recv_read(hdl, fd, buf,
 			    drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
 			break;
 		case DRR_WRITE_EMBEDDED:
 			if (byteswap) {
 				drr->drr_u.drr_write_embedded.drr_psize =
 				    BSWAP_32(drr->drr_u.drr_write_embedded.
 				    drr_psize);
 			}
 			(void) recv_read(hdl, fd, buf,
 			    P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
 			    8), B_FALSE, NULL);
 			break;
 		case DRR_WRITE_BYREF:
 		case DRR_FREEOBJECTS:
 		case DRR_FREE:
 			break;
 
 		default:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid record type"));
 			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 		}
 	}
 
 	free(buf);
 	return (-1);
 }
 
 static void
 recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap,
     boolean_t resumable)
 {
 	char target_fs[ZFS_MAX_DATASET_NAME_LEN];
 
 	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 	    "checksum mismatch or incomplete stream"));
 
 	if (!resumable)
 		return;
 	(void) strlcpy(target_fs, target_snap, sizeof (target_fs));
 	*strchr(target_fs, '@') = '\0';
 	zfs_handle_t *zhp = zfs_open(hdl, target_fs,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 	if (zhp == NULL)
 		return;
 
 	char token_buf[ZFS_MAXPROPLEN];
 	int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
 	    token_buf, sizeof (token_buf),
 	    NULL, NULL, 0, B_TRUE);
 	if (error == 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "checksum mismatch or incomplete stream.\n"
 		    "Partially received snapshot is saved.\n"
 		    "A resuming stream can be generated on the sending "
 		    "system by running:\n"
 		    "    zfs send -t %s"),
 		    token_buf);
 	}
 	zfs_close(zhp);
 }
 
 /*
  * Restores a backup of tosnap from the file descriptor specified by infd.
  */
 static int
 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
     const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
     dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
     avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
     uint64_t *action_handlep, const char *finalsnap)
 {
 	zfs_cmd_t zc = { 0 };
 	time_t begin_time;
 	int ioctl_err, ioctl_errno, err;
 	char *cp;
 	struct drr_begin *drrb = &drr->drr_u.drr_begin;
 	char errbuf[1024];
 	char prop_errbuf[1024];
 	const char *chopprefix;
 	boolean_t newfs = B_FALSE;
 	boolean_t stream_wantsnewfs;
 	uint64_t parent_snapguid = 0;
 	prop_changelist_t *clp = NULL;
 	nvlist_t *snapprops_nvlist = NULL;
 	zprop_errflags_t prop_errflags;
 	boolean_t recursive;
 	char *snapname = NULL;
 
 	begin_time = time(NULL);
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot receive"));
 
 	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
 	    ENOENT);
 
 	if (stream_avl != NULL) {
 		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
 		    &snapname);
 		nvlist_t *props;
 		int ret;
 
 		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
 		    &parent_snapguid);
 		err = nvlist_lookup_nvlist(fs, "props", &props);
 		if (err)
 			VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
 
 		if (flags->canmountoff) {
 			VERIFY(0 == nvlist_add_uint64(props,
 			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
 		}
 		ret = zcmd_write_src_nvlist(hdl, &zc, props);
 		if (err)
 			nvlist_free(props);
 
 		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
 			VERIFY(0 == nvlist_lookup_nvlist(props,
 			    snapname, &snapprops_nvlist));
 		}
 
 		if (ret != 0)
 			return (-1);
 	}
 
 	cp = NULL;
 
 	/*
 	 * Determine how much of the snapshot name stored in the stream
 	 * we are going to tack on to the name they specified on the
 	 * command line, and how much we are going to chop off.
 	 *
 	 * If they specified a snapshot, chop the entire name stored in
 	 * the stream.
 	 */
 	if (flags->istail) {
 		/*
 		 * A filesystem was specified with -e. We want to tack on only
 		 * the tail of the sent snapshot path.
 		 */
 		if (strchr(tosnap, '@')) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
 			    "argument - snapshot not allowed with -e"));
 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 		}
 
 		chopprefix = strrchr(sendfs, '/');
 
 		if (chopprefix == NULL) {
 			/*
 			 * The tail is the poolname, so we need to
 			 * prepend a path separator.
 			 */
 			int len = strlen(drrb->drr_toname);
 			cp = malloc(len + 2);
 			cp[0] = '/';
 			(void) strcpy(&cp[1], drrb->drr_toname);
 			chopprefix = cp;
 		} else {
 			chopprefix = drrb->drr_toname + (chopprefix - sendfs);
 		}
 	} else if (flags->isprefix) {
 		/*
 		 * A filesystem was specified with -d. We want to tack on
 		 * everything but the first element of the sent snapshot path
 		 * (all but the pool name).
 		 */
 		if (strchr(tosnap, '@')) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
 			    "argument - snapshot not allowed with -d"));
 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 		}
 
 		chopprefix = strchr(drrb->drr_toname, '/');
 		if (chopprefix == NULL)
 			chopprefix = strchr(drrb->drr_toname, '@');
 	} else if (strchr(tosnap, '@') == NULL) {
 		/*
 		 * If a filesystem was specified without -d or -e, we want to
 		 * tack on everything after the fs specified by 'zfs send'.
 		 */
 		chopprefix = drrb->drr_toname + strlen(sendfs);
 	} else {
 		/* A snapshot was specified as an exact path (no -d or -e). */
 		if (recursive) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "cannot specify snapshot name for multi-snapshot "
 			    "stream"));
 			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 		}
 		chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
 	}
 
 	ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
 	ASSERT(chopprefix > drrb->drr_toname);
 	ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
 	ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
 	    chopprefix[0] == '\0');
 
 	/*
 	 * Determine name of destination snapshot, store in zc_value.
 	 */
 	(void) strcpy(zc.zc_value, tosnap);
 	(void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
 #ifdef __FreeBSD__
 	if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
 		zfs_ioctl_version = get_zfs_ioctl_version();
 	/*
 	 * For forward compatibility hide tosnap in zc_value
 	 */
 	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
 		(void) strcpy(zc.zc_value + strlen(zc.zc_value) + 1, tosnap);
 #endif
 	free(cp);
 	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
 		zcmd_free_nvlists(&zc);
 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 	}
 
 	/*
 	 * Determine the name of the origin snapshot, store in zc_string.
 	 */
 	if (originsnap) {
 		(void) strncpy(zc.zc_string, originsnap, sizeof (zc.zc_string));
 		if (flags->verbose)
 			(void) printf("using provided clone origin %s\n",
 			    zc.zc_string);
 	} else if (drrb->drr_flags & DRR_FLAG_CLONE) {
 		if (guid_to_name(hdl, zc.zc_value,
 		    drrb->drr_fromguid, B_FALSE, zc.zc_string) != 0) {
 			zcmd_free_nvlists(&zc);
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "local origin for clone %s does not exist"),
 			    zc.zc_value);
 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 		}
 		if (flags->verbose)
 			(void) printf("found clone origin %s\n", zc.zc_string);
 	}
 
 	boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
 	    DMU_BACKUP_FEATURE_RESUMING;
 	stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
 	    (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
 
 	if (stream_wantsnewfs) {
 		/*
 		 * if the parent fs does not exist, look for it based on
 		 * the parent snap GUID
 		 */
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot receive new filesystem stream"));
 
 		(void) strcpy(zc.zc_name, zc.zc_value);
 		cp = strrchr(zc.zc_name, '/');
 		if (cp)
 			*cp = '\0';
 		if (cp &&
 		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
 			char suffix[ZFS_MAX_DATASET_NAME_LEN];
 			(void) strcpy(suffix, strrchr(zc.zc_value, '/'));
 			if (guid_to_name(hdl, zc.zc_name, parent_snapguid,
 			    B_FALSE, zc.zc_value) == 0) {
 				*strchr(zc.zc_value, '@') = '\0';
 				(void) strcat(zc.zc_value, suffix);
 			}
 		}
 	} else {
 		/*
 		 * If the fs does not exist, look for it based on the
 		 * fromsnap GUID.
 		 */
 		if (resuming) {
 			(void) snprintf(errbuf, sizeof (errbuf),
 			    dgettext(TEXT_DOMAIN,
 			    "cannot receive resume stream"));
 		} else {
 			(void) snprintf(errbuf, sizeof (errbuf),
 			    dgettext(TEXT_DOMAIN,
 			    "cannot receive incremental stream"));
 		}
 
 		(void) strcpy(zc.zc_name, zc.zc_value);
 		*strchr(zc.zc_name, '@') = '\0';
 
 		/*
 		 * If the exact receive path was specified and this is the
 		 * topmost path in the stream, then if the fs does not exist we
 		 * should look no further.
 		 */
 		if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
 		    strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
 		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
 			char snap[ZFS_MAX_DATASET_NAME_LEN];
 			(void) strcpy(snap, strchr(zc.zc_value, '@'));
 			if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid,
 			    B_FALSE, zc.zc_value) == 0) {
 				*strchr(zc.zc_value, '@') = '\0';
 				(void) strcat(zc.zc_value, snap);
 			}
 		}
 	}
 
 	(void) strcpy(zc.zc_name, zc.zc_value);
 	*strchr(zc.zc_name, '@') = '\0';
 
 	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
 		zfs_handle_t *zhp;
 
 		/*
 		 * Destination fs exists.  It must be one of these cases:
 		 *  - an incremental send stream
 		 *  - the stream specifies a new fs (full stream or clone)
 		 *    and they want us to blow away the existing fs (and
 		 *    have therefore specified -F and removed any snapshots)
 		 *  - we are resuming a failed receive.
 		 */
 		if (stream_wantsnewfs) {
 			if (!flags->force) {
 				zcmd_free_nvlists(&zc);
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "destination '%s' exists\n"
 				    "must specify -F to overwrite it"),
 				    zc.zc_name);
 				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 			}
 			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
 			    &zc) == 0) {
 				zcmd_free_nvlists(&zc);
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "destination has snapshots (eg. %s)\n"
 				    "must destroy them to overwrite it"),
 				    zc.zc_name);
 				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 			}
 		}
 
 		if ((zhp = zfs_open(hdl, zc.zc_name,
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
 			zcmd_free_nvlists(&zc);
 			return (-1);
 		}
 
 		if (stream_wantsnewfs &&
 		    zhp->zfs_dmustats.dds_origin[0]) {
 			zcmd_free_nvlists(&zc);
 			zfs_close(zhp);
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "destination '%s' is a clone\n"
 			    "must destroy it to overwrite it"),
 			    zc.zc_name);
 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 		}
 
 		if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
 		    stream_wantsnewfs) {
 			/* We can't do online recv in this case */
 			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
 			if (clp == NULL) {
 				zfs_close(zhp);
 				zcmd_free_nvlists(&zc);
 				return (-1);
 			}
 			if (changelist_prefix(clp) != 0) {
 				changelist_free(clp);
 				zfs_close(zhp);
 				zcmd_free_nvlists(&zc);
 				return (-1);
 			}
 		}
 
 		/*
 		 * If we are resuming a newfs, set newfs here so that we will
 		 * mount it if the recv succeeds this time.  We can tell
 		 * that it was a newfs on the first recv because the fs
 		 * itself will be inconsistent (if the fs existed when we
 		 * did the first recv, we would have received it into
 		 * .../%recv).
 		 */
 		if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT))
 			newfs = B_TRUE;
 
 		zfs_close(zhp);
 	} else {
 		/*
 		 * Destination filesystem does not exist.  Therefore we better
 		 * be creating a new filesystem (either from a full backup, or
 		 * a clone).  It would therefore be invalid if the user
 		 * specified only the pool name (i.e. if the destination name
 		 * contained no slash character).
 		 */
 		if (!stream_wantsnewfs ||
 		    (cp = strrchr(zc.zc_name, '/')) == NULL) {
 			zcmd_free_nvlists(&zc);
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "destination '%s' does not exist"), zc.zc_name);
 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 		}
 
 		/*
 		 * Trim off the final dataset component so we perform the
 		 * recvbackup ioctl to the filesystems's parent.
 		 */
 		*cp = '\0';
 
 		if (flags->isprefix && !flags->istail && !flags->dryrun &&
 		    create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
 			zcmd_free_nvlists(&zc);
 			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
 		}
 
 		newfs = B_TRUE;
 	}
 
 	zc.zc_begin_record = *drr_noswap;
 	zc.zc_cookie = infd;
 	zc.zc_guid = flags->force;
 	zc.zc_resumable = flags->resumable;
 	if (flags->verbose) {
 		(void) printf("%s %s stream of %s into %s\n",
 		    flags->dryrun ? "would receive" : "receiving",
 		    drrb->drr_fromguid ? "incremental" : "full",
 		    drrb->drr_toname, zc.zc_value);
 		(void) fflush(stdout);
 	}
 
 	if (flags->dryrun) {
 		zcmd_free_nvlists(&zc);
 		return (recv_skip(hdl, infd, flags->byteswap));
 	}
 
 	zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
 	zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
 	zc.zc_cleanup_fd = cleanup_fd;
 	zc.zc_action_handle = *action_handlep;
 
 	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
 	ioctl_errno = errno;
 	prop_errflags = (zprop_errflags_t)zc.zc_obj;
 
 	if (err == 0) {
 		nvlist_t *prop_errors;
 		VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
 		    zc.zc_nvlist_dst_size, &prop_errors, 0));
 
 		nvpair_t *prop_err = NULL;
 
 		while ((prop_err = nvlist_next_nvpair(prop_errors,
 		    prop_err)) != NULL) {
 			char tbuf[1024];
 			zfs_prop_t prop;
 			int intval;
 
 			prop = zfs_name_to_prop(nvpair_name(prop_err));
 			(void) nvpair_value_int32(prop_err, &intval);
 			if (strcmp(nvpair_name(prop_err),
 			    ZPROP_N_MORE_ERRORS) == 0) {
 				trunc_prop_errs(intval);
 				break;
 			} else if (snapname == NULL || finalsnap == NULL ||
 			    strcmp(finalsnap, snapname) == 0 ||
 			    strcmp(nvpair_name(prop_err),
 			    zfs_prop_to_name(ZFS_PROP_REFQUOTA)) != 0) {
 				/*
 				 * Skip the special case of, for example,
 				 * "refquota", errors on intermediate
 				 * snapshots leading up to a final one.
 				 * That's why we have all of the checks above.
 				 *
 				 * See zfs_ioctl.c's extract_delay_props() for
 				 * a list of props which can fail on
 				 * intermediate snapshots, but shouldn't
 				 * affect the overall receive.
 				 */
 				(void) snprintf(tbuf, sizeof (tbuf),
 				    dgettext(TEXT_DOMAIN,
 				    "cannot receive %s property on %s"),
 				    nvpair_name(prop_err), zc.zc_name);
 				zfs_setprop_error(hdl, prop, intval, tbuf);
 			}
 		}
 		nvlist_free(prop_errors);
 	}
 
 	zc.zc_nvlist_dst = 0;
 	zc.zc_nvlist_dst_size = 0;
 	zcmd_free_nvlists(&zc);
 
 	if (err == 0 && snapprops_nvlist) {
 		zfs_cmd_t zc2 = { 0 };
 
 		(void) strcpy(zc2.zc_name, zc.zc_value);
 		zc2.zc_cookie = B_TRUE; /* received */
 		if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
 			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
 			zcmd_free_nvlists(&zc2);
 		}
 	}
 
 	if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
 		/*
 		 * It may be that this snapshot already exists,
 		 * in which case we want to consume & ignore it
 		 * rather than failing.
 		 */
 		avl_tree_t *local_avl;
 		nvlist_t *local_nv, *fs;
 		cp = strchr(zc.zc_value, '@');
 
 		/*
 		 * XXX Do this faster by just iterating over snaps in
 		 * this fs.  Also if zc_value does not exist, we will
 		 * get a strange "does not exist" error message.
 		 */
 		*cp = '\0';
 		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
 		    B_FALSE, &local_nv, &local_avl) == 0) {
 			*cp = '@';
 			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
 			fsavl_destroy(local_avl);
 			nvlist_free(local_nv);
 
 			if (fs != NULL) {
 				if (flags->verbose) {
 					(void) printf("snap %s already exists; "
 					    "ignoring\n", zc.zc_value);
 				}
 				err = ioctl_err = recv_skip(hdl, infd,
 				    flags->byteswap);
 			}
 		}
 		*cp = '@';
 	}
 
 	if (ioctl_err != 0) {
 		switch (ioctl_errno) {
 		case ENODEV:
 			cp = strchr(zc.zc_value, '@');
 			*cp = '\0';
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "most recent snapshot of %s does not\n"
 			    "match incremental source"), zc.zc_value);
 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
 			*cp = '@';
 			break;
 		case ETXTBSY:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "destination %s has been modified\n"
 			    "since most recent snapshot"), zc.zc_name);
 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
 			break;
 		case EEXIST:
 			cp = strchr(zc.zc_value, '@');
 			if (newfs) {
 				/* it's the containing fs that exists */
 				*cp = '\0';
 			}
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "destination already exists"));
 			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
 			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
 			    zc.zc_value);
 			*cp = '@';
 			break;
 		case EINVAL:
 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 			break;
 		case ECKSUM:
 			recv_ecksum_set_aux(hdl, zc.zc_value, flags->resumable);
 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 			break;
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded to receive this stream."));
 			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
 			break;
 		case EDQUOT:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "destination %s space quota exceeded"), zc.zc_name);
 			(void) zfs_error(hdl, EZFS_NOSPC, errbuf);
 			break;
 		default:
 			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
 		}
 	}
 
 	/*
 	 * Mount the target filesystem (if created).  Also mount any
 	 * children of the target filesystem if we did a replication
 	 * receive (indicated by stream_avl being non-NULL).
 	 */
 	cp = strchr(zc.zc_value, '@');
 	if (cp && (ioctl_err == 0 || !newfs)) {
 		zfs_handle_t *h;
 
 		*cp = '\0';
 		h = zfs_open(hdl, zc.zc_value,
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 		if (h != NULL) {
 			if (h->zfs_type == ZFS_TYPE_VOLUME) {
 				*cp = '@';
 			} else if (newfs || stream_avl) {
 				/*
 				 * Track the first/top of hierarchy fs,
 				 * for mounting and sharing later.
 				 */
 				if (top_zfs && *top_zfs == NULL)
 					*top_zfs = zfs_strdup(hdl, zc.zc_value);
 			}
 			zfs_close(h);
 		}
 		*cp = '@';
 	}
 
 	if (clp) {
 		if (!flags->nomount)
 			err |= changelist_postfix(clp);
 		changelist_free(clp);
 	}
 
 	if (prop_errflags & ZPROP_ERR_NOCLEAR) {
 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
 		    "failed to clear unreceived properties on %s"),
 		    zc.zc_name);
 		(void) fprintf(stderr, "\n");
 	}
 	if (prop_errflags & ZPROP_ERR_NORESTORE) {
 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
 		    "failed to restore original properties on %s"),
 		    zc.zc_name);
 		(void) fprintf(stderr, "\n");
 	}
 
 	if (err || ioctl_err)
 		return (-1);
 
 	*action_handlep = zc.zc_action_handle;
 
 	if (flags->verbose) {
 		char buf1[64];
 		char buf2[64];
 		uint64_t bytes = zc.zc_cookie;
 		time_t delta = time(NULL) - begin_time;
 		if (delta == 0)
 			delta = 1;
 		zfs_nicenum(bytes, buf1, sizeof (buf1));
 		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
 
 		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
 		    buf1, delta, buf2);
 	}
 
 	return (0);
 }
 
 static int
 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
     const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
     nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
     uint64_t *action_handlep, const char *finalsnap)
 {
 	int err;
 	dmu_replay_record_t drr, drr_noswap;
 	struct drr_begin *drrb = &drr.drr_u.drr_begin;
 	char errbuf[1024];
 	zio_cksum_t zcksum = { 0 };
 	uint64_t featureflags;
 	int hdrtype;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot receive"));
 
 	if (flags->isprefix &&
 	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
 		    "(%s) does not exist"), tosnap);
 		return (zfs_error(hdl, EZFS_NOENT, errbuf));
 	}
 	if (originsnap &&
 	    !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
 		    "(%s) does not exist"), originsnap);
 		return (zfs_error(hdl, EZFS_NOENT, errbuf));
 	}
 
 	/* read in the BEGIN record */
 	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
 	    &zcksum)))
 		return (err);
 
 	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
 		/* It's the double end record at the end of a package */
 		return (ENODATA);
 	}
 
 	/* the kernel needs the non-byteswapped begin record */
 	drr_noswap = drr;
 
 	flags->byteswap = B_FALSE;
 	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
 		/*
 		 * We computed the checksum in the wrong byteorder in
 		 * recv_read() above; do it again correctly.
 		 */
 		bzero(&zcksum, sizeof (zio_cksum_t));
 		(void) fletcher_4_incremental_byteswap(&drr,
 		    sizeof (drr), &zcksum);
 		flags->byteswap = B_TRUE;
 
 		drr.drr_type = BSWAP_32(drr.drr_type);
 		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
 		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
 		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
 		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
 		drrb->drr_type = BSWAP_32(drrb->drr_type);
 		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
 		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
 		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
 	}
 
 	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
 		    "stream (bad magic number)"));
 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 	}
 
 	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
 	hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
 
 	if (!DMU_STREAM_SUPPORTED(featureflags) ||
 	    (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "stream has unsupported feature, feature flags = %lx"),
 		    featureflags);
 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 	}
 
 	if (strchr(drrb->drr_toname, '@') == NULL) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
 		    "stream (bad snapshot name)"));
 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 	}
 
 	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
 		char nonpackage_sendfs[ZFS_MAX_DATASET_NAME_LEN];
 		if (sendfs == NULL) {
 			/*
 			 * We were not called from zfs_receive_package(). Get
 			 * the fs specified by 'zfs send'.
 			 */
 			char *cp;
 			(void) strlcpy(nonpackage_sendfs,
 			    drr.drr_u.drr_begin.drr_toname,
 			    sizeof (nonpackage_sendfs));
 			if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
 				*cp = '\0';
 			sendfs = nonpackage_sendfs;
 			VERIFY(finalsnap == NULL);
 		}
 		return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
 		    &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
 		    cleanup_fd, action_handlep, finalsnap));
 	} else {
 		assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
 		    DMU_COMPOUNDSTREAM);
 		return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
 		    &zcksum, top_zfs, cleanup_fd, action_handlep));
 	}
 }
 
 /*
  * Restores a backup of tosnap from the file descriptor specified by infd.
  * Return 0 on total success, -2 if some things couldn't be
  * destroyed/renamed/promoted, -1 if some things couldn't be received.
  * (-1 will override -2, if -1 and the resumable flag was specified the
  * transfer can be resumed if the sending side supports it).
  */
 int
 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
     recvflags_t *flags, int infd, avl_tree_t *stream_avl)
 {
 	char *top_zfs = NULL;
 	int err;
 	int cleanup_fd;
 	uint64_t action_handle = 0;
 	char *originsnap = NULL;
 	if (props) {
 		err = nvlist_lookup_string(props, "origin", &originsnap);
 		if (err && err != ENOENT)
 			return (err);
 	}
 
 	cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
 	VERIFY(cleanup_fd >= 0);
 
 	err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
 	    stream_avl, &top_zfs, cleanup_fd, &action_handle, NULL);
 
 	VERIFY(0 == close(cleanup_fd));
 
 	if (err == 0 && !flags->nomount && top_zfs) {
 		zfs_handle_t *zhp;
 		prop_changelist_t *clp;
 
 		zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
 		if (zhp != NULL) {
 			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
 			    CL_GATHER_MOUNT_ALWAYS, 0);
 			zfs_close(zhp);
 			if (clp != NULL) {
 				/* mount and share received datasets */
 				err = changelist_postfix(clp);
 				changelist_free(clp);
 			}
 		}
 		if (zhp == NULL || clp == NULL || err)
 			err = -1;
 	}
 	if (top_zfs)
 		free(top_zfs);
 
 	return (err);
 }
Index: projects/clang900-import/cddl/contrib/opensolaris/lib/libzfs
===================================================================
--- projects/clang900-import/cddl/contrib/opensolaris/lib/libzfs	(revision 352536)
+++ projects/clang900-import/cddl/contrib/opensolaris/lib/libzfs	(revision 352537)

Property changes on: projects/clang900-import/cddl/contrib/opensolaris/lib/libzfs
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/cddl/contrib/opensolaris/lib/libzfs:r352105-352536
Index: projects/clang900-import/cddl/contrib/opensolaris
===================================================================
--- projects/clang900-import/cddl/contrib/opensolaris	(revision 352536)
+++ projects/clang900-import/cddl/contrib/opensolaris	(revision 352537)

Property changes on: projects/clang900-import/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/cddl/contrib/opensolaris:r352308-352536
Index: projects/clang900-import/cddl
===================================================================
--- projects/clang900-import/cddl	(revision 352536)
+++ projects/clang900-import/cddl	(revision 352537)

Property changes on: projects/clang900-import/cddl
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/cddl:r352308-352536
Index: projects/clang900-import/contrib/jemalloc/src/jemalloc.c
===================================================================
--- projects/clang900-import/contrib/jemalloc/src/jemalloc.c	(revision 352536)
+++ projects/clang900-import/contrib/jemalloc/src/jemalloc.c	(revision 352537)
@@ -1,3434 +1,3419 @@
 #define JEMALLOC_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/log.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
 /* Data. */
 
 /* Work around <http://llvm.org/bugs/show_bug.cgi?id=12623>: */
 const char	*__malloc_options_1_0 = NULL;
 __sym_compat(_malloc_options, __malloc_options_1_0, FBSD_1.0);
 
 /* Runtime configuration options. */
 const char	*je_malloc_conf
 #ifndef _WIN32
     JEMALLOC_ATTR(weak)
 #endif
     ;
 bool	opt_abort =
 #ifdef JEMALLOC_DEBUG
     true
 #else
     false
 #endif
     ;
 bool	opt_abort_conf =
 #ifdef JEMALLOC_DEBUG
     true
 #else
     false
 #endif
     ;
 const char	*opt_junk =
 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
     "true"
 #else
     "false"
 #endif
     ;
 bool	opt_junk_alloc =
 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
     true
 #else
     false
 #endif
     ;
 bool	opt_junk_free =
 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
     true
 #else
     false
 #endif
     ;
 
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
 bool	opt_zero = false;
 unsigned	opt_narenas = 0;
 
 unsigned	ncpus;
 
 /* Protects arenas initialization. */
 malloc_mutex_t arenas_lock;
 /*
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
  *
  * arenas[0..narenas_auto) are used for automatic multiplexing of threads and
  * arenas.  arenas[narenas_auto..narenas_total) are only used if the application
  * takes some action to create them and allocate from them.
  *
  * Points to an arena_t.
  */
 JEMALLOC_ALIGNED(CACHELINE)
 atomic_p_t		arenas[MALLOCX_ARENA_LIMIT];
 static atomic_u_t	narenas_total; /* Use narenas_total_*(). */
 static arena_t		*a0; /* arenas[0]; read-only after initialization. */
 unsigned		narenas_auto; /* Read-only after initialization. */
 
 typedef enum {
 	malloc_init_uninitialized	= 3,
 	malloc_init_a0_initialized	= 2,
 	malloc_init_recursible		= 1,
 	malloc_init_initialized		= 0 /* Common case --> jnz. */
 } malloc_init_t;
 static malloc_init_t	malloc_init_state = malloc_init_uninitialized;
 
 /* False should be the common case.  Set to true to trigger initialization. */
 bool			malloc_slow = true;
 
 /* When malloc_slow is true, set the corresponding bits for sanity check. */
 enum {
 	flag_opt_junk_alloc	= (1U),
 	flag_opt_junk_free	= (1U << 1),
 	flag_opt_zero		= (1U << 2),
 	flag_opt_utrace		= (1U << 3),
 	flag_opt_xmalloc	= (1U << 4)
 };
 static uint8_t	malloc_slow_flags;
 
 #ifdef JEMALLOC_THREADED_INIT
 /* Used to let the initializing thread recursively allocate. */
 #  define NO_INITIALIZER	((unsigned long)0)
 #  define INITIALIZER		pthread_self()
 #  define IS_INITIALIZER	(malloc_initializer == pthread_self())
 static pthread_t		malloc_initializer = NO_INITIALIZER;
 #else
 #  define NO_INITIALIZER	false
 #  define INITIALIZER		true
 #  define IS_INITIALIZER	malloc_initializer
 static bool			malloc_initializer = NO_INITIALIZER;
 #endif
 
 /* Used to avoid initialization races. */
 #ifdef _WIN32
 #if _WIN32_WINNT >= 0x0600
 static malloc_mutex_t	init_lock = SRWLOCK_INIT;
 #else
 static malloc_mutex_t	init_lock;
 static bool init_lock_initialized = false;
 
 JEMALLOC_ATTR(constructor)
 static void WINAPI
 _init_init_lock(void) {
 	/*
 	 * If another constructor in the same binary is using mallctl to e.g.
 	 * set up extent hooks, it may end up running before this one, and
 	 * malloc_init_hard will crash trying to lock the uninitialized lock. So
 	 * we force an initialization of the lock in malloc_init_hard as well.
 	 * We don't try to care about atomicity of the accessed to the
 	 * init_lock_initialized boolean, since it really only matters early in
 	 * the process creation, before any separate thread normally starts
 	 * doing anything.
 	 */
 	if (!init_lock_initialized) {
 		malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT,
 		    malloc_mutex_rank_exclusive);
 	}
 	init_lock_initialized = true;
 }
 
 #ifdef _MSC_VER
 #  pragma section(".CRT$XCU", read)
 JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used)
 static const void (WINAPI *init_init_lock)(void) = _init_init_lock;
 #endif
 #endif
 #else
 static malloc_mutex_t	init_lock = MALLOC_MUTEX_INITIALIZER;
 #endif
 
 typedef struct {
 	void	*p;	/* Input pointer (as in realloc(p, s)). */
 	size_t	s;	/* Request size. */
 	void	*r;	/* Result pointer. */
 } malloc_utrace_t;
 
 #ifdef JEMALLOC_UTRACE
 #  define UTRACE(a, b, c) do {						\
 	if (unlikely(opt_utrace)) {					\
 		int utrace_serrno = errno;				\
 		malloc_utrace_t ut;					\
 		ut.p = (a);						\
 		ut.s = (b);						\
 		ut.r = (c);						\
 		utrace(&ut, sizeof(ut));				\
 		errno = utrace_serrno;					\
 	}								\
 } while (0)
 #else
 #  define UTRACE(a, b, c)
 #endif
 
 /* Whether encountered any invalid config options. */
 static bool had_conf_error = false;
 
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
  * definition.
  */
 
 static bool	malloc_init_hard_a0(void);
 static bool	malloc_init_hard(void);
 
 /******************************************************************************/
 /*
  * Begin miscellaneous support functions.
  */
 
 bool
 malloc_initialized(void) {
 	return (malloc_init_state == malloc_init_initialized);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
 malloc_init_a0(void) {
 	if (unlikely(malloc_init_state == malloc_init_uninitialized)) {
 		return malloc_init_hard_a0();
 	}
 	return false;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
 malloc_init(void) {
 	if (unlikely(!malloc_initialized()) && malloc_init_hard()) {
 		return true;
 	}
 	return false;
 }
 
 /*
  * The a0*() functions are used instead of i{d,}alloc() in situations that
  * cannot tolerate TLS variable access.
  */
 
 static void *
 a0ialloc(size_t size, bool zero, bool is_internal) {
 	if (unlikely(malloc_init_a0())) {
 		return NULL;
 	}
 
 	return iallocztm(TSDN_NULL, size, sz_size2index(size), zero, NULL,
 	    is_internal, arena_get(TSDN_NULL, 0, true), true);
 }
 
 static void
 a0idalloc(void *ptr, bool is_internal) {
 	idalloctm(TSDN_NULL, ptr, NULL, NULL, is_internal, true);
 }
 
 void *
 a0malloc(size_t size) {
 	return a0ialloc(size, false, true);
 }
 
 void
 a0dalloc(void *ptr) {
 	a0idalloc(ptr, true);
 }
 
 /*
  * FreeBSD's libc uses the bootstrap_*() functions in bootstrap-senstive
  * situations that cannot tolerate TLS variable access (TLS allocation and very
  * early internal data structure initialization).
  */
 
 void *
 bootstrap_malloc(size_t size) {
 	if (unlikely(size == 0)) {
 		size = 1;
 	}
 
 	return a0ialloc(size, false, false);
 }
 
 void *
 bootstrap_calloc(size_t num, size_t size) {
 	size_t num_size;
 
 	num_size = num * size;
 	if (unlikely(num_size == 0)) {
 		assert(num == 0 || size == 0);
 		num_size = 1;
 	}
 
 	return a0ialloc(num_size, true, false);
 }
 
 void
 bootstrap_free(void *ptr) {
 	if (unlikely(ptr == NULL)) {
 		return;
 	}
 
 	a0idalloc(ptr, false);
 }
 
 void
 arena_set(unsigned ind, arena_t *arena) {
 	atomic_store_p(&arenas[ind], arena, ATOMIC_RELEASE);
 }
 
 static void
 narenas_total_set(unsigned narenas) {
 	atomic_store_u(&narenas_total, narenas, ATOMIC_RELEASE);
 }
 
 static void
 narenas_total_inc(void) {
 	atomic_fetch_add_u(&narenas_total, 1, ATOMIC_RELEASE);
 }
 
 unsigned
 narenas_total_get(void) {
 	return atomic_load_u(&narenas_total, ATOMIC_ACQUIRE);
 }
 
 /* Create a new arena and insert it into the arenas array at index ind. */
 static arena_t *
 arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	arena_t *arena;
 
 	assert(ind <= narenas_total_get());
 	if (ind >= MALLOCX_ARENA_LIMIT) {
 		return NULL;
 	}
 	if (ind == narenas_total_get()) {
 		narenas_total_inc();
 	}
 
 	/*
 	 * Another thread may have already initialized arenas[ind] if it's an
 	 * auto arena.
 	 */
 	arena = arena_get(tsdn, ind, false);
 	if (arena != NULL) {
 		assert(ind < narenas_auto);
 		return arena;
 	}
 
 	/* Actually initialize the arena. */
 	arena = arena_new(tsdn, ind, extent_hooks);
 
 	return arena;
 }
 
 static void
 arena_new_create_background_thread(tsdn_t *tsdn, unsigned ind) {
 	if (ind == 0) {
 		return;
 	}
 	if (have_background_thread) {
 		bool err;
 		malloc_mutex_lock(tsdn, &background_thread_lock);
 		err = background_thread_create(tsdn_tsd(tsdn), ind);
 		malloc_mutex_unlock(tsdn, &background_thread_lock);
 		if (err) {
 			malloc_printf("<jemalloc>: error in background thread "
 				      "creation for arena %u. Abort.\n", ind);
 			abort();
 		}
 	}
 }
 
 arena_t *
 arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	arena_t *arena;
 
 	malloc_mutex_lock(tsdn, &arenas_lock);
 	arena = arena_init_locked(tsdn, ind, extent_hooks);
 	malloc_mutex_unlock(tsdn, &arenas_lock);
 
 	arena_new_create_background_thread(tsdn, ind);
 
 	return arena;
 }
 
 static void
 arena_bind(tsd_t *tsd, unsigned ind, bool internal) {
 	arena_t *arena = arena_get(tsd_tsdn(tsd), ind, false);
 	arena_nthreads_inc(arena, internal);
 
 	if (internal) {
 		tsd_iarena_set(tsd, arena);
 	} else {
 		tsd_arena_set(tsd, arena);
 	}
 }
 
 void
 arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) {
 	arena_t *oldarena, *newarena;
 
 	oldarena = arena_get(tsd_tsdn(tsd), oldind, false);
 	newarena = arena_get(tsd_tsdn(tsd), newind, false);
 	arena_nthreads_dec(oldarena, false);
 	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
 }
 
 static void
 arena_unbind(tsd_t *tsd, unsigned ind, bool internal) {
 	arena_t *arena;
 
 	arena = arena_get(tsd_tsdn(tsd), ind, false);
 	arena_nthreads_dec(arena, internal);
 
 	if (internal) {
 		tsd_iarena_set(tsd, NULL);
 	} else {
 		tsd_arena_set(tsd, NULL);
 	}
 }
 
 arena_tdata_t *
 arena_tdata_get_hard(tsd_t *tsd, unsigned ind) {
 	arena_tdata_t *tdata, *arenas_tdata_old;
 	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
 	unsigned narenas_tdata_old, i;
 	unsigned narenas_tdata = tsd_narenas_tdata_get(tsd);
 	unsigned narenas_actual = narenas_total_get();
 
 	/*
 	 * Dissociate old tdata array (and set up for deallocation upon return)
 	 * if it's too small.
 	 */
 	if (arenas_tdata != NULL && narenas_tdata < narenas_actual) {
 		arenas_tdata_old = arenas_tdata;
 		narenas_tdata_old = narenas_tdata;
 		arenas_tdata = NULL;
 		narenas_tdata = 0;
 		tsd_arenas_tdata_set(tsd, arenas_tdata);
 		tsd_narenas_tdata_set(tsd, narenas_tdata);
 	} else {
 		arenas_tdata_old = NULL;
 		narenas_tdata_old = 0;
 	}
 
 	/* Allocate tdata array if it's missing. */
 	if (arenas_tdata == NULL) {
 		bool *arenas_tdata_bypassp = tsd_arenas_tdata_bypassp_get(tsd);
 		narenas_tdata = (ind < narenas_actual) ? narenas_actual : ind+1;
 
 		if (tsd_nominal(tsd) && !*arenas_tdata_bypassp) {
 			*arenas_tdata_bypassp = true;
 			arenas_tdata = (arena_tdata_t *)a0malloc(
 			    sizeof(arena_tdata_t) * narenas_tdata);
 			*arenas_tdata_bypassp = false;
 		}
 		if (arenas_tdata == NULL) {
 			tdata = NULL;
 			goto label_return;
 		}
 		assert(tsd_nominal(tsd) && !*arenas_tdata_bypassp);
 		tsd_arenas_tdata_set(tsd, arenas_tdata);
 		tsd_narenas_tdata_set(tsd, narenas_tdata);
 	}
 
 	/*
 	 * Copy to tdata array.  It's possible that the actual number of arenas
 	 * has increased since narenas_total_get() was called above, but that
 	 * causes no correctness issues unless two threads concurrently execute
 	 * the arenas.create mallctl, which we trust mallctl synchronization to
 	 * prevent.
 	 */
 
 	/* Copy/initialize tickers. */
 	for (i = 0; i < narenas_actual; i++) {
 		if (i < narenas_tdata_old) {
 			ticker_copy(&arenas_tdata[i].decay_ticker,
 			    &arenas_tdata_old[i].decay_ticker);
 		} else {
 			ticker_init(&arenas_tdata[i].decay_ticker,
 			    DECAY_NTICKS_PER_UPDATE);
 		}
 	}
 	if (narenas_tdata > narenas_actual) {
 		memset(&arenas_tdata[narenas_actual], 0, sizeof(arena_tdata_t)
 		    * (narenas_tdata - narenas_actual));
 	}
 
 	/* Read the refreshed tdata array. */
 	tdata = &arenas_tdata[ind];
 label_return:
 	if (arenas_tdata_old != NULL) {
 		a0dalloc(arenas_tdata_old);
 	}
 	return tdata;
 }
 
 /* Slow path, called only by arena_choose(). */
 arena_t *
 arena_choose_hard(tsd_t *tsd, bool internal) {
 	arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena)) {
 		unsigned choose = percpu_arena_choose();
 		ret = arena_get(tsd_tsdn(tsd), choose, true);
 		assert(ret != NULL);
 		arena_bind(tsd, arena_ind_get(ret), false);
 		arena_bind(tsd, arena_ind_get(ret), true);
 
 		return ret;
 	}
 
 	if (narenas_auto > 1) {
 		unsigned i, j, choose[2], first_null;
 		bool is_new_arena[2];
 
 		/*
 		 * Determine binding for both non-internal and internal
 		 * allocation.
 		 *
 		 *   choose[0]: For application allocation.
 		 *   choose[1]: For internal metadata allocation.
 		 */
 
 		for (j = 0; j < 2; j++) {
 			choose[j] = 0;
 			is_new_arena[j] = false;
 		}
 
 		first_null = narenas_auto;
 		malloc_mutex_lock(tsd_tsdn(tsd), &arenas_lock);
 		assert(arena_get(tsd_tsdn(tsd), 0, false) != NULL);
 		for (i = 1; i < narenas_auto; i++) {
 			if (arena_get(tsd_tsdn(tsd), i, false) != NULL) {
 				/*
 				 * Choose the first arena that has the lowest
 				 * number of threads assigned to it.
 				 */
 				for (j = 0; j < 2; j++) {
 					if (arena_nthreads_get(arena_get(
 					    tsd_tsdn(tsd), i, false), !!j) <
 					    arena_nthreads_get(arena_get(
 					    tsd_tsdn(tsd), choose[j], false),
 					    !!j)) {
 						choose[j] = i;
 					}
 				}
 			} else if (first_null == narenas_auto) {
 				/*
 				 * Record the index of the first uninitialized
 				 * arena, in case all extant arenas are in use.
 				 *
 				 * NB: It is possible for there to be
 				 * discontinuities in terms of initialized
 				 * versus uninitialized arenas, due to the
 				 * "thread.arena" mallctl.
 				 */
 				first_null = i;
 			}
 		}
 
 		for (j = 0; j < 2; j++) {
 			if (arena_nthreads_get(arena_get(tsd_tsdn(tsd),
 			    choose[j], false), !!j) == 0 || first_null ==
 			    narenas_auto) {
 				/*
 				 * Use an unloaded arena, or the least loaded
 				 * arena if all arenas are already initialized.
 				 */
 				if (!!j == internal) {
 					ret = arena_get(tsd_tsdn(tsd),
 					    choose[j], false);
 				}
 			} else {
 				arena_t *arena;
 
 				/* Initialize a new arena. */
 				choose[j] = first_null;
 				arena = arena_init_locked(tsd_tsdn(tsd),
 				    choose[j],
 				    (extent_hooks_t *)&extent_hooks_default);
 				if (arena == NULL) {
 					malloc_mutex_unlock(tsd_tsdn(tsd),
 					    &arenas_lock);
 					return NULL;
 				}
 				is_new_arena[j] = true;
 				if (!!j == internal) {
 					ret = arena;
 				}
 			}
 			arena_bind(tsd, choose[j], !!j);
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arenas_lock);
 
 		for (j = 0; j < 2; j++) {
 			if (is_new_arena[j]) {
 				assert(choose[j] > 0);
 				arena_new_create_background_thread(
 				    tsd_tsdn(tsd), choose[j]);
 			}
 		}
 
 	} else {
 		ret = arena_get(tsd_tsdn(tsd), 0, false);
 		arena_bind(tsd, 0, false);
 		arena_bind(tsd, 0, true);
 	}
 
 	return ret;
 }
 
 void
 iarena_cleanup(tsd_t *tsd) {
 	arena_t *iarena;
 
 	iarena = tsd_iarena_get(tsd);
 	if (iarena != NULL) {
 		arena_unbind(tsd, arena_ind_get(iarena), true);
 	}
 }
 
 void
 arena_cleanup(tsd_t *tsd) {
 	arena_t *arena;
 
 	arena = tsd_arena_get(tsd);
 	if (arena != NULL) {
 		arena_unbind(tsd, arena_ind_get(arena), false);
 	}
 }
 
 void
 arenas_tdata_cleanup(tsd_t *tsd) {
 	arena_tdata_t *arenas_tdata;
 
 	/* Prevent tsd->arenas_tdata from being (re)created. */
 	*tsd_arenas_tdata_bypassp_get(tsd) = true;
 
 	arenas_tdata = tsd_arenas_tdata_get(tsd);
 	if (arenas_tdata != NULL) {
 		tsd_arenas_tdata_set(tsd, NULL);
 		a0dalloc(arenas_tdata);
 	}
 }
 
 static void
 stats_print_atexit(void) {
 	if (config_stats) {
 		tsdn_t *tsdn;
 		unsigned narenas, i;
 
 		tsdn = tsdn_fetch();
 
 		/*
 		 * Merge stats from extant threads.  This is racy, since
 		 * individual threads do not lock when recording tcache stats
 		 * events.  As a consequence, the final stats may be slightly
 		 * out of date by the time they are reported, if other threads
 		 * continue to allocate.
 		 */
 		for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 			arena_t *arena = arena_get(tsdn, i, false);
 			if (arena != NULL) {
 				tcache_t *tcache;
 
 				malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 				ql_foreach(tcache, &arena->tcache_ql, link) {
 					tcache_stats_merge(tsdn, tcache, arena);
 				}
 				malloc_mutex_unlock(tsdn,
 				    &arena->tcache_ql_mtx);
 			}
 		}
 	}
 	je_malloc_stats_print(NULL, NULL, opt_stats_print_opts);
 }
 
 /*
  * Ensure that we don't hold any locks upon entry to or exit from allocator
  * code (in a "broad" sense that doesn't count a reentrant allocation as an
  * entrance or exit).
  */
 JEMALLOC_ALWAYS_INLINE void
 check_entry_exit_locking(tsdn_t *tsdn) {
 	if (!config_debug) {
 		return;
 	}
 	if (tsdn_null(tsdn)) {
 		return;
 	}
 	tsd_t *tsd = tsdn_tsd(tsdn);
 	/*
 	 * It's possible we hold locks at entry/exit if we're in a nested
 	 * allocation.
 	 */
 	int8_t reentrancy_level = tsd_reentrancy_level_get(tsd);
 	if (reentrancy_level != 0) {
 		return;
 	}
 	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
 }
 
 /*
  * End miscellaneous support functions.
  */
 /******************************************************************************/
 /*
  * Begin initialization functions.
  */
 
 static char *
 jemalloc_secure_getenv(const char *name) {
 #ifdef JEMALLOC_HAVE_SECURE_GETENV
 	return secure_getenv(name);
 #else
 #  ifdef JEMALLOC_HAVE_ISSETUGID
 	if (issetugid() != 0) {
 		return NULL;
 	}
 #  endif
 	return getenv(name);
 #endif
 }
 
 static unsigned
 malloc_ncpus(void) {
 	long result;
 
 #ifdef _WIN32
 	SYSTEM_INFO si;
 	GetSystemInfo(&si);
 	result = si.dwNumberOfProcessors;
 #elif defined(JEMALLOC_GLIBC_MALLOC_HOOK) && defined(CPU_COUNT)
 	/*
 	 * glibc >= 2.6 has the CPU_COUNT macro.
 	 *
 	 * glibc's sysconf() uses isspace().  glibc allocates for the first time
 	 * *before* setting up the isspace tables.  Therefore we need a
 	 * different method to get the number of CPUs.
 	 */
 	{
 		cpu_set_t set;
 
 		pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
 		result = CPU_COUNT(&set);
 	}
 #else
 	result = sysconf(_SC_NPROCESSORS_ONLN);
 #endif
 	return ((result == -1) ? 1 : (unsigned)result);
 }
 
 static void
 init_opt_stats_print_opts(const char *v, size_t vlen) {
 	size_t opts_len = strlen(opt_stats_print_opts);
 	assert(opts_len <= stats_print_tot_num_options);
 
 	for (size_t i = 0; i < vlen; i++) {
 		switch (v[i]) {
 #define OPTION(o, v, d, s) case o: break;
 			STATS_PRINT_OPTIONS
 #undef OPTION
 		default: continue;
 		}
 
 		if (strchr(opt_stats_print_opts, v[i]) != NULL) {
 			/* Ignore repeated. */
 			continue;
 		}
 
 		opt_stats_print_opts[opts_len++] = v[i];
 		opt_stats_print_opts[opts_len] = '\0';
 		assert(opts_len <= stats_print_tot_num_options);
 	}
 	assert(opts_len == strlen(opt_stats_print_opts));
 }
 
 static bool
 malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
     char const **v_p, size_t *vlen_p) {
 	bool accept;
 	const char *opts = *opts_p;
 
 	*k_p = opts;
 
 	for (accept = false; !accept;) {
 		switch (*opts) {
 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 		case 'Y': case 'Z':
 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 		case 'y': case 'z':
 		case '0': case '1': case '2': case '3': case '4': case '5':
 		case '6': case '7': case '8': case '9':
 		case '_':
 			opts++;
 			break;
 		case ':':
 			opts++;
 			*klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p;
 			*v_p = opts;
 			accept = true;
 			break;
 		case '\0':
 			if (opts != *opts_p) {
 				malloc_write("<jemalloc>: Conf string ends "
 				    "with key\n");
 			}
 			return true;
 		default:
 			malloc_write("<jemalloc>: Malformed conf string\n");
 			return true;
 		}
 	}
 
 	for (accept = false; !accept;) {
 		switch (*opts) {
 		case ',':
 			opts++;
 			/*
 			 * Look ahead one character here, because the next time
 			 * this function is called, it will assume that end of
 			 * input has been cleanly reached if no input remains,
 			 * but we have optimistically already consumed the
 			 * comma if one exists.
 			 */
 			if (*opts == '\0') {
 				malloc_write("<jemalloc>: Conf string ends "
 				    "with comma\n");
 			}
 			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
 			accept = true;
 			break;
 		case '\0':
 			*vlen_p = (uintptr_t)opts - (uintptr_t)*v_p;
 			accept = true;
 			break;
 		default:
 			opts++;
 			break;
 		}
 	}
 
 	*opts_p = opts;
 	return false;
 }
 
 static void
 malloc_abort_invalid_conf(void) {
 	assert(opt_abort_conf);
 	malloc_printf("<jemalloc>: Abort (abort_conf:true) on invalid conf "
 	    "value (see above).\n");
 	abort();
 }
 
 static void
 malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
     size_t vlen) {
 	malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k,
 	    (int)vlen, v);
 	/* If abort_conf is set, error out after processing all options. */
 	had_conf_error = true;
 }
 
 static void
 malloc_slow_flag_init(void) {
 	/*
 	 * Combine the runtime options into malloc_slow for fast path.  Called
 	 * after processing all the options.
 	 */
 	malloc_slow_flags |= (opt_junk_alloc ? flag_opt_junk_alloc : 0)
 	    | (opt_junk_free ? flag_opt_junk_free : 0)
 	    | (opt_zero ? flag_opt_zero : 0)
 	    | (opt_utrace ? flag_opt_utrace : 0)
 	    | (opt_xmalloc ? flag_opt_xmalloc : 0);
 
 	malloc_slow = (malloc_slow_flags != 0);
 }
 
 static void
 malloc_conf_init(void) {
 	unsigned i;
 	char buf[PATH_MAX + 1];
 	const char *opts, *k, *v;
 	size_t klen, vlen;
 
 	for (i = 0; i < 4; i++) {
 		/* Get runtime configuration. */
 		switch (i) {
 		case 0:
 			opts = config_malloc_conf;
 			break;
 		case 1:
 			if (je_malloc_conf != NULL) {
 				/*
 				 * Use options that were compiled into the
 				 * program.
 				 */
 				opts = je_malloc_conf;
 			} else {
 				/* No configuration specified. */
 				buf[0] = '\0';
 				opts = buf;
 			}
 			break;
 		case 2: {
 			ssize_t linklen = 0;
 #ifndef _WIN32
 			int saved_errno = errno;
 			const char *linkname =
 #  ifdef JEMALLOC_PREFIX
 			    "/etc/"JEMALLOC_PREFIX"malloc.conf"
 #  else
 			    "/etc/malloc.conf"
 #  endif
 			    ;
 
 			/*
 			 * Try to use the contents of the "/etc/malloc.conf"
 			 * symbolic link's name.
 			 */
 			linklen = readlink(linkname, buf, sizeof(buf) - 1);
 			if (linklen == -1) {
 				/* No configuration specified. */
 				linklen = 0;
 				/* Restore errno. */
 				set_errno(saved_errno);
 			}
 #endif
 			buf[linklen] = '\0';
 			opts = buf;
 			break;
 		} case 3: {
 			const char *envname =
 #ifdef JEMALLOC_PREFIX
 			    JEMALLOC_CPREFIX"MALLOC_CONF"
 #else
 			    "MALLOC_CONF"
 #endif
 			    ;
 
 			if ((opts = jemalloc_secure_getenv(envname)) != NULL) {
 				/*
 				 * Do nothing; opts is already initialized to
 				 * the value of the MALLOC_CONF environment
 				 * variable.
 				 */
 			} else {
 				/* No configuration specified. */
 				buf[0] = '\0';
 				opts = buf;
 			}
 			break;
 		} default:
 			not_reached();
 			buf[0] = '\0';
 			opts = buf;
 		}
 
 		while (*opts != '\0' && !malloc_conf_next(&opts, &k, &klen, &v,
 		    &vlen)) {
 #define CONF_MATCH(n)							\
 	(sizeof(n)-1 == klen && strncmp(n, k, klen) == 0)
 #define CONF_MATCH_VALUE(n)						\
 	(sizeof(n)-1 == vlen && strncmp(n, v, vlen) == 0)
 #define CONF_HANDLE_BOOL(o, n)						\
 			if (CONF_MATCH(n)) {				\
 				if (CONF_MATCH_VALUE("true")) {		\
 					o = true;			\
 				} else if (CONF_MATCH_VALUE("false")) {	\
 					o = false;			\
 				} else {				\
 					malloc_conf_error(		\
 					    "Invalid conf value",	\
 					    k, klen, v, vlen);		\
 				}					\
 				continue;				\
 			}
 #define CONF_MIN_no(um, min)	false
 #define CONF_MIN_yes(um, min)	((um) < (min))
 #define CONF_MAX_no(um, max)	false
 #define CONF_MAX_yes(um, max)	((um) > (max))
 #define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)	\
 			if (CONF_MATCH(n)) {				\
 				uintmax_t um;				\
 				char *end;				\
 									\
 				set_errno(0);				\
 				um = malloc_strtoumax(v, &end, 0);	\
 				if (get_errno() != 0 || (uintptr_t)end -\
 				    (uintptr_t)v != vlen) {		\
 					malloc_conf_error(		\
 					    "Invalid conf value",	\
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
 					if (CONF_MIN_##check_min(um,	\
 					    (t)(min))) {		\
 						o = (t)(min);		\
 					} else if (			\
 					    CONF_MAX_##check_max(um,	\
 					    (t)(max))) {		\
 						o = (t)(max);		\
 					} else {			\
 						o = (t)um;		\
 					}				\
 				} else {				\
 					if (CONF_MIN_##check_min(um,	\
 					    (t)(min)) ||		\
 					    CONF_MAX_##check_max(um,	\
 					    (t)(max))) {		\
 						malloc_conf_error(	\
 						    "Out-of-range "	\
 						    "conf value",	\
 						    k, klen, v, vlen);	\
 					} else {			\
 						o = (t)um;		\
 					}				\
 				}					\
 				continue;				\
 			}
 #define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max,	\
     clip)								\
 			CONF_HANDLE_T_U(unsigned, o, n, min, max,	\
 			    check_min, check_max, clip)
 #define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)	\
 			CONF_HANDLE_T_U(size_t, o, n, min, max,		\
 			    check_min, check_max, clip)
 #define CONF_HANDLE_SSIZE_T(o, n, min, max)				\
 			if (CONF_MATCH(n)) {				\
 				long l;					\
 				char *end;				\
 									\
 				set_errno(0);				\
 				l = strtol(v, &end, 0);			\
 				if (get_errno() != 0 || (uintptr_t)end -\
 				    (uintptr_t)v != vlen) {		\
 					malloc_conf_error(		\
 					    "Invalid conf value",	\
 					    k, klen, v, vlen);		\
 				} else if (l < (ssize_t)(min) || l >	\
 				    (ssize_t)(max)) {			\
 					malloc_conf_error(		\
 					    "Out-of-range conf value",	\
 					    k, klen, v, vlen);		\
 				} else {				\
 					o = l;				\
 				}					\
 				continue;				\
 			}
 #define CONF_HANDLE_CHAR_P(o, n, d)					\
 			if (CONF_MATCH(n)) {				\
 				size_t cpylen = (vlen <=		\
 				    sizeof(o)-1) ? vlen :		\
 				    sizeof(o)-1;			\
 				strncpy(o, v, cpylen);			\
 				o[cpylen] = '\0';			\
 				continue;				\
 			}
 
 			CONF_HANDLE_BOOL(opt_abort, "abort")
 			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
 			if (strncmp("metadata_thp", k, klen) == 0) {
 				int i;
 				bool match = false;
 				for (i = 0; i < metadata_thp_mode_limit; i++) {
 					if (strncmp(metadata_thp_mode_names[i],
 					    v, vlen) == 0) {
 						opt_metadata_thp = i;
 						match = true;
 						break;
 					}
 				}
 				if (!match) {
 					malloc_conf_error("Invalid conf value",
 					    k, klen, v, vlen);
 				}
 				continue;
 			}
 			CONF_HANDLE_BOOL(opt_retain, "retain")
 			if (strncmp("dss", k, klen) == 0) {
 				int i;
 				bool match = false;
 				for (i = 0; i < dss_prec_limit; i++) {
 					if (strncmp(dss_prec_names[i], v, vlen)
 					    == 0) {
 						if (extent_dss_prec_set(i)) {
 							malloc_conf_error(
 							    "Error setting dss",
 							    k, klen, v, vlen);
 						} else {
 							opt_dss =
 							    dss_prec_names[i];
 							match = true;
 							break;
 						}
 					}
 				}
 				if (!match) {
 					malloc_conf_error("Invalid conf value",
 					    k, klen, v, vlen);
 				}
 				continue;
 			}
 			CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1,
 			    UINT_MAX, yes, no, false)
 			CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
 			    "dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
 			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
 			    SSIZE_MAX);
 			CONF_HANDLE_SSIZE_T(opt_muzzy_decay_ms,
 			    "muzzy_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
 			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
 			    SSIZE_MAX);
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
 			if (CONF_MATCH("stats_print_opts")) {
 				init_opt_stats_print_opts(v, vlen);
 				continue;
 			}
 			if (config_fill) {
 				if (CONF_MATCH("junk")) {
 					if (CONF_MATCH_VALUE("true")) {
 						opt_junk = "true";
 						opt_junk_alloc = opt_junk_free =
 						    true;
 					} else if (CONF_MATCH_VALUE("false")) {
 						opt_junk = "false";
 						opt_junk_alloc = opt_junk_free =
 						    false;
 					} else if (CONF_MATCH_VALUE("alloc")) {
 						opt_junk = "alloc";
 						opt_junk_alloc = true;
 						opt_junk_free = false;
 					} else if (CONF_MATCH_VALUE("free")) {
 						opt_junk = "free";
 						opt_junk_alloc = false;
 						opt_junk_free = true;
 					} else {
 						malloc_conf_error(
 						    "Invalid conf value", k,
 						    klen, v, vlen);
 					}
 					continue;
 				}
 				CONF_HANDLE_BOOL(opt_zero, "zero")
 			}
 			if (config_utrace) {
 				CONF_HANDLE_BOOL(opt_utrace, "utrace")
 			}
 			if (config_xmalloc) {
 				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
 			}
 			CONF_HANDLE_BOOL(opt_tcache, "tcache")
 			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
 			    "lg_extent_max_active_fit", 0,
 			    (sizeof(size_t) << 3), yes, yes, false)
 			CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max",
 			    -1, (sizeof(size_t) << 3) - 1)
 			if (strncmp("percpu_arena", k, klen) == 0) {
 				bool match = false;
 				for (int i = percpu_arena_mode_names_base; i <
 				    percpu_arena_mode_names_limit; i++) {
 					if (strncmp(percpu_arena_mode_names[i],
 					    v, vlen) == 0) {
 						if (!have_percpu_arena) {
 							malloc_conf_error(
 							    "No getcpu support",
 							    k, klen, v, vlen);
 						}
 						opt_percpu_arena = i;
 						match = true;
 						break;
 					}
 				}
 				if (!match) {
 					malloc_conf_error("Invalid conf value",
 					    k, klen, v, vlen);
 				}
 				continue;
 			}
 			CONF_HANDLE_BOOL(opt_background_thread,
 			    "background_thread");
 			CONF_HANDLE_SIZE_T(opt_max_background_threads,
 					   "max_background_threads", 1,
 					   opt_max_background_threads, yes, yes,
 					   true);
 			if (config_prof) {
 				CONF_HANDLE_BOOL(opt_prof, "prof")
 				CONF_HANDLE_CHAR_P(opt_prof_prefix,
 				    "prof_prefix", "jeprof")
 				CONF_HANDLE_BOOL(opt_prof_active, "prof_active")
 				CONF_HANDLE_BOOL(opt_prof_thread_active_init,
 				    "prof_thread_active_init")
 				CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
 				    "lg_prof_sample", 0, (sizeof(uint64_t) << 3)
 				    - 1, no, yes, true)
 				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
 				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
 				    "lg_prof_interval", -1,
 				    (sizeof(uint64_t) << 3) - 1)
 				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
 				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
 				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
 			}
 			if (config_log) {
 				if (CONF_MATCH("log")) {
 					size_t cpylen = (
 					    vlen <= sizeof(log_var_names) ?
 					    vlen : sizeof(log_var_names) - 1);
 					strncpy(log_var_names, v, cpylen);
 					log_var_names[cpylen] = '\0';
 					continue;
 				}
 			}
 			if (CONF_MATCH("thp")) {
 				bool match = false;
 				for (int i = 0; i < thp_mode_names_limit; i++) {
 					if (strncmp(thp_mode_names[i],v, vlen)
 					    == 0) {
 						if (!have_madvise_huge) {
 							malloc_conf_error(
 							    "No THP support",
 							    k, klen, v, vlen);
 						}
 						opt_thp = i;
 						match = true;
 						break;
 					}
 				}
 				if (!match) {
 					malloc_conf_error("Invalid conf value",
 					    k, klen, v, vlen);
 				}
 				continue;
 			}
 			malloc_conf_error("Invalid conf pair", k, klen, v,
 			    vlen);
 #undef CONF_MATCH
 #undef CONF_MATCH_VALUE
 #undef CONF_HANDLE_BOOL
 #undef CONF_MIN_no
 #undef CONF_MIN_yes
 #undef CONF_MAX_no
 #undef CONF_MAX_yes
 #undef CONF_HANDLE_T_U
 #undef CONF_HANDLE_UNSIGNED
 #undef CONF_HANDLE_SIZE_T
 #undef CONF_HANDLE_SSIZE_T
 #undef CONF_HANDLE_CHAR_P
 		}
 		if (opt_abort_conf && had_conf_error) {
 			malloc_abort_invalid_conf();
 		}
 	}
 	atomic_store_b(&log_init_done, true, ATOMIC_RELEASE);
 }
 
 static bool
 malloc_init_hard_needed(void) {
 	if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state ==
 	    malloc_init_recursible)) {
 		/*
 		 * Another thread initialized the allocator before this one
 		 * acquired init_lock, or this thread is the initializing
 		 * thread, and it is recursively allocating.
 		 */
 		return false;
 	}
 #ifdef JEMALLOC_THREADED_INIT
 	if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
 		/* Busy-wait until the initializing thread completes. */
 		spin_t spinner = SPIN_INITIALIZER;
 		do {
 			malloc_mutex_unlock(TSDN_NULL, &init_lock);
 			spin_adaptive(&spinner);
 			malloc_mutex_lock(TSDN_NULL, &init_lock);
 		} while (!malloc_initialized());
 		return false;
 	}
 #endif
 	return true;
 }
 
 static bool
 malloc_init_hard_a0_locked() {
 	malloc_initializer = INITIALIZER;
 
 	if (config_prof) {
 		prof_boot0();
 	}
 	malloc_conf_init();
 	if (opt_stats_print) {
 		/* Print statistics at exit. */
 		if (atexit(stats_print_atexit) != 0) {
 			malloc_write("<jemalloc>: Error in atexit()\n");
 			if (opt_abort) {
 				abort();
 			}
 		}
 	}
 	if (pages_boot()) {
 		return true;
 	}
 	if (base_boot(TSDN_NULL)) {
 		return true;
 	}
 	if (extent_boot()) {
 		return true;
 	}
 	if (ctl_boot()) {
 		return true;
 	}
 	if (config_prof) {
 		prof_boot1();
 	}
 	arena_boot();
 	if (tcache_boot(TSDN_NULL)) {
 		return true;
 	}
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	/*
 	 * Create enough scaffolding to allow recursive allocation in
 	 * malloc_ncpus().
 	 */
 	narenas_auto = 1;
 	memset(arenas, 0, sizeof(arena_t *) * narenas_auto);
 	/*
 	 * Initialize one arena here.  The rest are lazily created in
 	 * arena_choose_hard().
 	 */
 	if (arena_init(TSDN_NULL, 0, (extent_hooks_t *)&extent_hooks_default)
 	    == NULL) {
 		return true;
 	}
 	a0 = arena_get(TSDN_NULL, 0, false);
 	malloc_init_state = malloc_init_a0_initialized;
 
 	return false;
 }
 
 static bool
 malloc_init_hard_a0(void) {
 	bool ret;
 
 	malloc_mutex_lock(TSDN_NULL, &init_lock);
 	ret = malloc_init_hard_a0_locked();
 	malloc_mutex_unlock(TSDN_NULL, &init_lock);
 	return ret;
 }
 
 /* Initialize data structures which may trigger recursive allocation. */
 static bool
 malloc_init_hard_recursible(void) {
 	malloc_init_state = malloc_init_recursible;
 
 	ncpus = malloc_ncpus();
 
 #if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \
     && !defined(JEMALLOC_ZONE) && !defined(_WIN32) && \
     !defined(__native_client__))
 	/* LinuxThreads' pthread_atfork() allocates. */
 	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
 	    jemalloc_postfork_child) != 0) {
 		malloc_write("<jemalloc>: Error in pthread_atfork()\n");
 		if (opt_abort) {
 			abort();
 		}
 		return true;
 	}
 #endif
 
 	if (background_thread_boot0()) {
 		return true;
 	}
 
 	return false;
 }
 
 static unsigned
 malloc_narenas_default(void) {
 	assert(ncpus > 0);
 	/*
 	 * For SMP systems, create more than one arena per CPU by
 	 * default.
 	 */
 	if (ncpus > 1) {
 		return ncpus << 2;
 	} else {
 		return 1;
 	}
 }
 
 static percpu_arena_mode_t
 percpu_arena_as_initialized(percpu_arena_mode_t mode) {
 	assert(!malloc_initialized());
 	assert(mode <= percpu_arena_disabled);
 
 	if (mode != percpu_arena_disabled) {
 		mode += percpu_arena_mode_enabled_base;
 	}
 
 	return mode;
 }
 
 static bool
 malloc_init_narenas(void) {
 	assert(ncpus > 0);
 
 	if (opt_percpu_arena != percpu_arena_disabled) {
 		if (!have_percpu_arena || malloc_getcpu() < 0) {
 			opt_percpu_arena = percpu_arena_disabled;
 			malloc_printf("<jemalloc>: perCPU arena getcpu() not "
 			    "available. Setting narenas to %u.\n", opt_narenas ?
 			    opt_narenas : malloc_narenas_default());
 			if (opt_abort) {
 				abort();
 			}
 		} else {
 			if (ncpus >= MALLOCX_ARENA_LIMIT) {
 				malloc_printf("<jemalloc>: narenas w/ percpu"
 				    "arena beyond limit (%d)\n", ncpus);
 				if (opt_abort) {
 					abort();
 				}
 				return true;
 			}
 			/* NB: opt_percpu_arena isn't fully initialized yet. */
 			if (percpu_arena_as_initialized(opt_percpu_arena) ==
 			    per_phycpu_arena && ncpus % 2 != 0) {
 				malloc_printf("<jemalloc>: invalid "
 				    "configuration -- per physical CPU arena "
 				    "with odd number (%u) of CPUs (no hyper "
 				    "threading?).\n", ncpus);
 				if (opt_abort)
 					abort();
 			}
 			unsigned n = percpu_arena_ind_limit(
 			    percpu_arena_as_initialized(opt_percpu_arena));
 			if (opt_narenas < n) {
 				/*
 				 * If narenas is specified with percpu_arena
 				 * enabled, actual narenas is set as the greater
 				 * of the two. percpu_arena_choose will be free
 				 * to use any of the arenas based on CPU
 				 * id. This is conservative (at a small cost)
 				 * but ensures correctness.
 				 *
 				 * If for some reason the ncpus determined at
 				 * boot is not the actual number (e.g. because
 				 * of affinity setting from numactl), reserving
 				 * narenas this way provides a workaround for
 				 * percpu_arena.
 				 */
 				opt_narenas = n;
 			}
 		}
 	}
 	if (opt_narenas == 0) {
 		opt_narenas = malloc_narenas_default();
 	}
 	assert(opt_narenas > 0);
 
 	narenas_auto = opt_narenas;
 	/*
 	 * Limit the number of arenas to the indexing range of MALLOCX_ARENA().
 	 */
 	if (narenas_auto >= MALLOCX_ARENA_LIMIT) {
 		narenas_auto = MALLOCX_ARENA_LIMIT - 1;
 		malloc_printf("<jemalloc>: Reducing narenas to limit (%d)\n",
 		    narenas_auto);
 	}
 	narenas_total_set(narenas_auto);
 
 	return false;
 }
 
 static void
 malloc_init_percpu(void) {
 	opt_percpu_arena = percpu_arena_as_initialized(opt_percpu_arena);
 }
 
 static bool
 malloc_init_hard_finish(void) {
 	if (malloc_mutex_boot()) {
 		return true;
 	}
 
 	malloc_init_state = malloc_init_initialized;
 	malloc_slow_flag_init();
 
 	return false;
 }
 
 static void
 malloc_init_hard_cleanup(tsdn_t *tsdn, bool reentrancy_set) {
 	malloc_mutex_assert_owner(tsdn, &init_lock);
 	malloc_mutex_unlock(tsdn, &init_lock);
 	if (reentrancy_set) {
 		assert(!tsdn_null(tsdn));
 		tsd_t *tsd = tsdn_tsd(tsdn);
 		assert(tsd_reentrancy_level_get(tsd) > 0);
 		post_reentrancy(tsd);
 	}
 }
 
 static bool
 malloc_init_hard(void) {
 	tsd_t *tsd;
 
 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
 	_init_init_lock();
 #endif
 	malloc_mutex_lock(TSDN_NULL, &init_lock);
 
 #define UNLOCK_RETURN(tsdn, ret, reentrancy)		\
 	malloc_init_hard_cleanup(tsdn, reentrancy);	\
 	return ret;
 
 	if (!malloc_init_hard_needed()) {
 		UNLOCK_RETURN(TSDN_NULL, false, false)
 	}
 
 	if (malloc_init_state != malloc_init_a0_initialized &&
 	    malloc_init_hard_a0_locked()) {
 		UNLOCK_RETURN(TSDN_NULL, true, false)
 	}
 
 	malloc_mutex_unlock(TSDN_NULL, &init_lock);
 	/* Recursive allocation relies on functional tsd. */
 	tsd = malloc_tsd_boot0();
 	if (tsd == NULL) {
 		return true;
 	}
 	if (malloc_init_hard_recursible()) {
 		return true;
 	}
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
 	/* Set reentrancy level to 1 during init. */
 	pre_reentrancy(tsd, NULL);
 	/* Initialize narenas before prof_boot2 (for allocation). */
 	if (malloc_init_narenas() || background_thread_boot1(tsd_tsdn(tsd))) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
 	if (config_prof && prof_boot2(tsd)) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
 
 	malloc_init_percpu();
 
 	if (malloc_init_hard_finish()) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
 	post_reentrancy(tsd);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 
 	witness_assert_lockless(witness_tsd_tsdn(
 	    tsd_witness_tsdp_get_unsafe(tsd)));
 	malloc_tsd_boot1();
 	/* Update TSD after tsd_boot1. */
 	tsd = tsd_fetch();
 	if (opt_background_thread) {
 		assert(have_background_thread);
 		/*
 		 * Need to finish init & unlock first before creating background
 		 * threads (pthread_create depends on malloc).  ctl_init (which
 		 * sets isthreaded) needs to be called without holding any lock.
 		 */
 		background_thread_ctl_init(tsd_tsdn(tsd));
 
 		malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
 		bool err = background_thread_create(tsd, 0);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
 		if (err) {
 			return true;
 		}
 	}
 #undef UNLOCK_RETURN
 	return false;
 }
 
 /*
  * End initialization functions.
  */
 /******************************************************************************/
 /*
  * Begin allocation-path internal functions and data structures.
  */
 
 /*
  * Settings determined by the documented behavior of the allocation functions.
  */
 typedef struct static_opts_s static_opts_t;
 struct static_opts_s {
 	/* Whether or not allocation size may overflow. */
 	bool may_overflow;
 	/* Whether or not allocations of size 0 should be treated as size 1. */
 	bool bump_empty_alloc;
 	/*
 	 * Whether to assert that allocations are not of size 0 (after any
 	 * bumping).
 	 */
 	bool assert_nonempty_alloc;
 
 	/*
 	 * Whether or not to modify the 'result' argument to malloc in case of
 	 * error.
 	 */
 	bool null_out_result_on_error;
 	/* Whether to set errno when we encounter an error condition. */
 	bool set_errno_on_error;
 
 	/*
 	 * The minimum valid alignment for functions requesting aligned storage.
 	 */
 	size_t min_alignment;
 
 	/* The error string to use if we oom. */
 	const char *oom_string;
 	/* The error string to use if the passed-in alignment is invalid. */
 	const char *invalid_alignment_string;
 
 	/*
 	 * False if we're configured to skip some time-consuming operations.
 	 *
 	 * This isn't really a malloc "behavior", but it acts as a useful
 	 * summary of several other static (or at least, static after program
 	 * initialization) options.
 	 */
 	bool slow;
 };
 
 JEMALLOC_ALWAYS_INLINE void
 static_opts_init(static_opts_t *static_opts) {
 	static_opts->may_overflow = false;
 	static_opts->bump_empty_alloc = false;
 	static_opts->assert_nonempty_alloc = false;
 	static_opts->null_out_result_on_error = false;
 	static_opts->set_errno_on_error = false;
 	static_opts->min_alignment = 0;
 	static_opts->oom_string = "";
 	static_opts->invalid_alignment_string = "";
 	static_opts->slow = false;
 }
 
 /*
  * These correspond to the macros in jemalloc/jemalloc_macros.h.  Broadly, we
  * should have one constant here per magic value there.  Note however that the
  * representations need not be related.
  */
 #define TCACHE_IND_NONE ((unsigned)-1)
 #define TCACHE_IND_AUTOMATIC ((unsigned)-2)
 #define ARENA_IND_AUTOMATIC ((unsigned)-1)
 
 typedef struct dynamic_opts_s dynamic_opts_t;
 struct dynamic_opts_s {
 	void **result;
 	size_t num_items;
 	size_t item_size;
 	size_t alignment;
 	bool zero;
 	unsigned tcache_ind;
 	unsigned arena_ind;
 };
 
 JEMALLOC_ALWAYS_INLINE void
 dynamic_opts_init(dynamic_opts_t *dynamic_opts) {
 	dynamic_opts->result = NULL;
 	dynamic_opts->num_items = 0;
 	dynamic_opts->item_size = 0;
 	dynamic_opts->alignment = 0;
 	dynamic_opts->zero = false;
 	dynamic_opts->tcache_ind = TCACHE_IND_AUTOMATIC;
 	dynamic_opts->arena_ind = ARENA_IND_AUTOMATIC;
 }
 
 /* ind is ignored if dopts->alignment > 0. */
 JEMALLOC_ALWAYS_INLINE void *
 imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
     size_t size, size_t usize, szind_t ind) {
 	tcache_t *tcache;
 	arena_t *arena;
 
 	/* Fill in the tcache. */
 	if (dopts->tcache_ind == TCACHE_IND_AUTOMATIC) {
 		if (likely(!sopts->slow)) {
 			/* Getting tcache ptr unconditionally. */
 			tcache = tsd_tcachep_get(tsd);
 			assert(tcache == tcache_get(tsd));
 		} else {
 			tcache = tcache_get(tsd);
 		}
 	} else if (dopts->tcache_ind == TCACHE_IND_NONE) {
 		tcache = NULL;
 	} else {
 		tcache = tcaches_get(tsd, dopts->tcache_ind);
 	}
 
 	/* Fill in the arena. */
 	if (dopts->arena_ind == ARENA_IND_AUTOMATIC) {
 		/*
 		 * In case of automatic arena management, we defer arena
 		 * computation until as late as we can, hoping to fill the
 		 * allocation out of the tcache.
 		 */
 		arena = NULL;
 	} else {
 		arena = arena_get(tsd_tsdn(tsd), dopts->arena_ind, true);
 	}
 
 	if (unlikely(dopts->alignment != 0)) {
 		return ipalloct(tsd_tsdn(tsd), usize, dopts->alignment,
 		    dopts->zero, tcache, arena);
 	}
 
 	return iallocztm(tsd_tsdn(tsd), size, ind, dopts->zero, tcache, false,
 	    arena, sopts->slow);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
     size_t usize, szind_t ind) {
 	void *ret;
 
 	/*
 	 * For small allocations, sampling bumps the usize.  If so, we allocate
 	 * from the ind_large bucket.
 	 */
 	szind_t ind_large;
 	size_t bumped_usize = usize;
 
 	if (usize <= SMALL_MAXCLASS) {
 		assert(((dopts->alignment == 0) ? sz_s2u(LARGE_MINCLASS) :
 		    sz_sa2u(LARGE_MINCLASS, dopts->alignment))
 		    == LARGE_MINCLASS);
 		ind_large = sz_size2index(LARGE_MINCLASS);
 		bumped_usize = sz_s2u(LARGE_MINCLASS);
 		ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize,
 		    bumped_usize, ind_large);
 		if (unlikely(ret == NULL)) {
 			return NULL;
 		}
 		arena_prof_promote(tsd_tsdn(tsd), ret, usize);
 	} else {
 		ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind);
 	}
 
 	return ret;
 }
 
 /*
  * Returns true if the allocation will overflow, and false otherwise.  Sets
  * *size to the product either way.
  */
 JEMALLOC_ALWAYS_INLINE bool
 compute_size_with_overflow(bool may_overflow, dynamic_opts_t *dopts,
     size_t *size) {
 	/*
 	 * This function is just num_items * item_size, except that we may have
 	 * to check for overflow.
 	 */
 
 	if (!may_overflow) {
 		assert(dopts->num_items == 1);
 		*size = dopts->item_size;
 		return false;
 	}
 
 	/* A size_t with its high-half bits all set to 1. */
 	static const size_t high_bits = SIZE_T_MAX << (sizeof(size_t) * 8 / 2);
 
 	*size = dopts->item_size * dopts->num_items;
 
 	if (unlikely(*size == 0)) {
 		return (dopts->num_items != 0 && dopts->item_size != 0);
 	}
 
 	/*
 	 * We got a non-zero size, but we don't know if we overflowed to get
 	 * there.  To avoid having to do a divide, we'll be clever and note that
 	 * if both A and B can be represented in N/2 bits, then their product
 	 * can be represented in N bits (without the possibility of overflow).
 	 */
 	if (likely((high_bits & (dopts->num_items | dopts->item_size)) == 0)) {
 		return false;
 	}
 	if (likely(*size / dopts->item_size == dopts->num_items)) {
 		return false;
 	}
 	return true;
 }
 
 JEMALLOC_ALWAYS_INLINE int
 imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	/* Where the actual allocated memory will live. */
 	void *allocation = NULL;
 	/* Filled in by compute_size_with_overflow below. */
 	size_t size = 0;
 	/*
 	 * For unaligned allocations, we need only ind.  For aligned
 	 * allocations, or in case of stats or profiling we need usize.
 	 *
 	 * These are actually dead stores, in that their values are reset before
 	 * any branch on their value is taken.  Sometimes though, it's
 	 * convenient to pass them as arguments before this point.  To avoid
 	 * undefined behavior then, we initialize them with dummy stores.
 	 */
 	szind_t ind = 0;
 	size_t usize = 0;
 
 	/* Reentrancy is only checked on slow path. */
 	int8_t reentrancy_level;
 
 	/* Compute the amount of memory the user wants. */
 	if (unlikely(compute_size_with_overflow(sopts->may_overflow, dopts,
 	    &size))) {
 		goto label_oom;
 	}
 
 	/* Validate the user input. */
 	if (sopts->bump_empty_alloc) {
 		if (unlikely(size == 0)) {
 			size = 1;
 		}
 	}
 
 	if (sopts->assert_nonempty_alloc) {
 		assert (size != 0);
 	}
 
 	if (unlikely(dopts->alignment < sopts->min_alignment
 	    || (dopts->alignment & (dopts->alignment - 1)) != 0)) {
 		goto label_invalid_alignment;
 	}
 
 	/* This is the beginning of the "core" algorithm. */
 
 	if (dopts->alignment == 0) {
 		ind = sz_size2index(size);
 		if (unlikely(ind >= NSIZES)) {
 			goto label_oom;
 		}
 		if (config_stats || (config_prof && opt_prof)) {
 			usize = sz_index2size(ind);
 			assert(usize > 0 && usize <= LARGE_MAXCLASS);
 		}
 	} else {
 		usize = sz_sa2u(size, dopts->alignment);
 		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
 	}
 
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	/*
 	 * If we need to handle reentrancy, we can do it out of a
 	 * known-initialized arena (i.e. arena 0).
 	 */
 	reentrancy_level = tsd_reentrancy_level_get(tsd);
 	if (sopts->slow && unlikely(reentrancy_level > 0)) {
 		/*
 		 * We should never specify particular arenas or tcaches from
 		 * within our internal allocations.
 		 */
 		assert(dopts->tcache_ind == TCACHE_IND_AUTOMATIC ||
 		    dopts->tcache_ind == TCACHE_IND_NONE);
 		assert(dopts->arena_ind == ARENA_IND_AUTOMATIC);
 		dopts->tcache_ind = TCACHE_IND_NONE;
 		/* We know that arena 0 has already been initialized. */
 		dopts->arena_ind = 0;
 	}
 
 	/* If profiling is on, get our profiling context. */
 	if (config_prof && opt_prof) {
 		/*
 		 * Note that if we're going down this path, usize must have been
 		 * initialized in the previous if statement.
 		 */
 		prof_tctx_t *tctx = prof_alloc_prep(
 		    tsd, usize, prof_active_get_unlocked(), true);
 
 		alloc_ctx_t alloc_ctx;
 		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
 			alloc_ctx.slab = (usize <= SMALL_MAXCLASS);
 			allocation = imalloc_no_sample(
 			    sopts, dopts, tsd, usize, usize, ind);
 		} else if ((uintptr_t)tctx > (uintptr_t)1U) {
 			/*
 			 * Note that ind might still be 0 here.  This is fine;
 			 * imalloc_sample ignores ind if dopts->alignment > 0.
 			 */
 			allocation = imalloc_sample(
 			    sopts, dopts, tsd, usize, ind);
 			alloc_ctx.slab = false;
 		} else {
 			allocation = NULL;
 		}
 
 		if (unlikely(allocation == NULL)) {
 			prof_alloc_rollback(tsd, tctx, true);
 			goto label_oom;
 		}
 		prof_malloc(tsd_tsdn(tsd), allocation, usize, &alloc_ctx, tctx);
 	} else {
 		/*
 		 * If dopts->alignment > 0, then ind is still 0, but usize was
 		 * computed in the previous if statement.  Down the positive
 		 * alignment path, imalloc_no_sample ignores ind and size
 		 * (relying only on usize).
 		 */
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
 		    ind);
 		if (unlikely(allocation == NULL)) {
 			goto label_oom;
 		}
 	}
 
 	/*
 	 * Allocation has been done at this point.  We still have some
 	 * post-allocation work to do though.
 	 */
 	assert(dopts->alignment == 0
 	    || ((uintptr_t)allocation & (dopts->alignment - 1)) == ZU(0));
 
 	if (config_stats) {
 		assert(usize == isalloc(tsd_tsdn(tsd), allocation));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 
 	if (sopts->slow) {
 		UTRACE(0, size, allocation);
 	}
 
 	/* Success! */
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	*dopts->result = allocation;
 	return 0;
 
 label_oom:
 	if (unlikely(sopts->slow) && config_xmalloc && unlikely(opt_xmalloc)) {
 		malloc_write(sopts->oom_string);
 		abort();
 	}
 
 	if (sopts->slow) {
 		UTRACE(NULL, size, NULL);
 	}
 
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	if (sopts->set_errno_on_error) {
 		set_errno(ENOMEM);
 	}
 
 	if (sopts->null_out_result_on_error) {
 		*dopts->result = NULL;
 	}
 
 	return ENOMEM;
 
 	/*
 	 * This label is only jumped to by one goto; we move it out of line
 	 * anyways to avoid obscuring the non-error paths, and for symmetry with
 	 * the oom case.
 	 */
 label_invalid_alignment:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
 		malloc_write(sopts->invalid_alignment_string);
 		abort();
 	}
 
 	if (sopts->set_errno_on_error) {
 		set_errno(EINVAL);
 	}
 
 	if (sopts->slow) {
 		UTRACE(NULL, size, NULL);
 	}
 
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	if (sopts->null_out_result_on_error) {
 		*dopts->result = NULL;
 	}
 
 	return EINVAL;
 }
 
 /* Returns the errno-style error code of the allocation. */
 JEMALLOC_ALWAYS_INLINE int
 imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
 	if (unlikely(!malloc_initialized()) && unlikely(malloc_init())) {
 		if (config_xmalloc && unlikely(opt_xmalloc)) {
 			malloc_write(sopts->oom_string);
 			abort();
 		}
 		UTRACE(NULL, dopts->num_items * dopts->item_size, NULL);
 		set_errno(ENOMEM);
 		*dopts->result = NULL;
 
 		return ENOMEM;
 	}
 
 	/* We always need the tsd.  Let's grab it right away. */
 	tsd_t *tsd = tsd_fetch();
 	assert(tsd);
 	if (likely(tsd_fast(tsd))) {
 		/* Fast and common path. */
 		tsd_assert_fast(tsd);
 		sopts->slow = false;
 		return imalloc_body(sopts, dopts, tsd);
 	} else {
 		sopts->slow = true;
 		return imalloc_body(sopts, dopts, tsd);
 	}
 }
 /******************************************************************************/
 /*
  * Begin malloc(3)-compatible functions.
  */
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
 je_malloc(size_t size) {
 	void *ret;
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.malloc.entry", "size: %zu", size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
 	sopts.bump_empty_alloc = true;
 	sopts.null_out_result_on_error = true;
 	sopts.set_errno_on_error = true;
 	sopts.oom_string = "<jemalloc>: Error in malloc(): out of memory\n";
 
 	dopts.result = &ret;
 	dopts.num_items = 1;
 	dopts.item_size = size;
 
 	imalloc(&sopts, &dopts);
 
 	LOG("core.malloc.exit", "result: %p", ret);
 
 	return ret;
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 JEMALLOC_ATTR(nonnull(1))
 je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	int ret;
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.posix_memalign.entry", "mem ptr: %p, alignment: %zu, "
 	    "size: %zu", memptr, alignment, size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
 	sopts.bump_empty_alloc = true;
 	sopts.min_alignment = sizeof(void *);
 	sopts.oom_string =
 	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
 	sopts.invalid_alignment_string =
 	    "<jemalloc>: Error allocating aligned memory: invalid alignment\n";
 
 	dopts.result = memptr;
 	dopts.num_items = 1;
 	dopts.item_size = size;
 	dopts.alignment = alignment;
 
 	ret = imalloc(&sopts, &dopts);
 
 	LOG("core.posix_memalign.exit", "result: %d, alloc ptr: %p", ret,
 	    *memptr);
 
 	return ret;
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
 je_aligned_alloc(size_t alignment, size_t size) {
 	void *ret;
 
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.aligned_alloc.entry", "alignment: %zu, size: %zu\n",
 	    alignment, size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
 	sopts.bump_empty_alloc = true;
 	sopts.null_out_result_on_error = true;
 	sopts.set_errno_on_error = true;
 	sopts.min_alignment = 1;
 	sopts.oom_string =
 	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
 	sopts.invalid_alignment_string =
 	    "<jemalloc>: Error allocating aligned memory: invalid alignment\n";
 
 	dopts.result = &ret;
 	dopts.num_items = 1;
 	dopts.item_size = size;
 	dopts.alignment = alignment;
 
 	imalloc(&sopts, &dopts);
 
 	LOG("core.aligned_alloc.exit", "result: %p", ret);
 
 	return ret;
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
 je_calloc(size_t num, size_t size) {
 	void *ret;
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.calloc.entry", "num: %zu, size: %zu\n", num, size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
 	sopts.may_overflow = true;
 	sopts.bump_empty_alloc = true;
 	sopts.null_out_result_on_error = true;
 	sopts.set_errno_on_error = true;
 	sopts.oom_string = "<jemalloc>: Error in calloc(): out of memory\n";
 
 	dopts.result = &ret;
 	dopts.num_items = num;
 	dopts.item_size = size;
 	dopts.zero = true;
 
 	imalloc(&sopts, &dopts);
 
 	LOG("core.calloc.exit", "result: %p", ret);
 
 	return ret;
 }
 
 static void *
 irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
     prof_tctx_t *tctx) {
 	void *p;
 
 	if (tctx == NULL) {
 		return NULL;
 	}
 	if (usize <= SMALL_MAXCLASS) {
 		p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false);
 		if (p == NULL) {
 			return NULL;
 		}
 		arena_prof_promote(tsd_tsdn(tsd), p, usize);
 	} else {
 		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
 	}
 
 	return p;
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
    alloc_ctx_t *alloc_ctx) {
 	void *p;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
 	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr, alloc_ctx);
 	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx);
 	} else {
 		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
 	}
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return NULL;
 	}
 	prof_realloc(tsd, p, usize, tctx, prof_active, true, old_ptr, old_usize,
 	    old_tctx);
 
 	return p;
 }
 
 JEMALLOC_ALWAYS_INLINE void
 ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	if (!slow_path) {
 		tsd_assert_fast(tsd);
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	if (tsd_reentrancy_level_get(tsd) != 0) {
 		assert(slow_path);
 	}
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 	assert(alloc_ctx.szind != NSIZES);
 
 	size_t usize;
 	if (config_prof && opt_prof) {
 		usize = sz_index2size(alloc_ctx.szind);
 		prof_free(tsd, ptr, usize, &alloc_ctx);
 	} else if (config_stats) {
 		usize = sz_index2size(alloc_ctx.szind);
 	}
 	if (config_stats) {
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	}
 
 	if (likely(!slow_path)) {
 		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
 		    false);
 	} else {
 		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
 		    true);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	if (!slow_path) {
 		tsd_assert_fast(tsd);
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	if (tsd_reentrancy_level_get(tsd) != 0) {
 		assert(slow_path);
 	}
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	alloc_ctx_t alloc_ctx, *ctx;
 	if (!config_cache_oblivious && ((uintptr_t)ptr & PAGE_MASK) != 0) {
 		/*
 		 * When cache_oblivious is disabled and ptr is not page aligned,
 		 * the allocation was not sampled -- usize can be used to
 		 * determine szind directly.
 		 */
 		alloc_ctx.szind = sz_size2index(usize);
 		alloc_ctx.slab = true;
 		ctx = &alloc_ctx;
 		if (config_debug) {
 			alloc_ctx_t dbg_ctx;
 			rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 			rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree,
 			    rtree_ctx, (uintptr_t)ptr, true, &dbg_ctx.szind,
 			    &dbg_ctx.slab);
 			assert(dbg_ctx.szind == alloc_ctx.szind);
 			assert(dbg_ctx.slab == alloc_ctx.slab);
 		}
 	} else if (config_prof && opt_prof) {
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 		assert(alloc_ctx.szind == sz_size2index(usize));
 		ctx = &alloc_ctx;
 	} else {
 		ctx = NULL;
 	}
 
 	if (config_prof && opt_prof) {
 		prof_free(tsd, ptr, usize, ctx);
 	}
 	if (config_stats) {
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	}
 
 	if (likely(!slow_path)) {
 		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, false);
 	} else {
 		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, true);
 	}
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ALLOC_SIZE(2)
 je_realloc(void *ptr, size_t size) {
 	void *ret;
 	tsdn_t *tsdn JEMALLOC_CC_SILENCE_INIT(NULL);
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t old_usize = 0;
 
 	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
 
 	if (unlikely(size == 0)) {
-		if (ptr != NULL) {
-			/* realloc(ptr, 0) is equivalent to free(ptr). */
-			UTRACE(ptr, 0, 0);
-			tcache_t *tcache;
-			tsd_t *tsd = tsd_fetch();
-			if (tsd_reentrancy_level_get(tsd) == 0) {
-				tcache = tcache_get(tsd);
-			} else {
-				tcache = NULL;
-			}
-			ifree(tsd, ptr, tcache, true);
-
-			LOG("core.realloc.exit", "result: %p", NULL);
-			return NULL;
-		}
 		size = 1;
 	}
 
 	if (likely(ptr != NULL)) {
 		assert(malloc_initialized() || IS_INITIALIZER);
 		tsd_t *tsd = tsd_fetch();
 
 		check_entry_exit_locking(tsd_tsdn(tsd));
 
 		alloc_ctx_t alloc_ctx;
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 		assert(alloc_ctx.szind != NSIZES);
 		old_usize = sz_index2size(alloc_ctx.szind);
 		assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 		if (config_prof && opt_prof) {
 			usize = sz_s2u(size);
 			ret = unlikely(usize == 0 || usize > LARGE_MAXCLASS) ?
 			    NULL : irealloc_prof(tsd, ptr, old_usize, usize,
 			    &alloc_ctx);
 		} else {
 			if (config_stats) {
 				usize = sz_s2u(size);
 			}
 			ret = iralloc(tsd, ptr, old_usize, size, 0, false);
 		}
 		tsdn = tsd_tsdn(tsd);
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		void *ret = je_malloc(size);
 		LOG("core.realloc.exit", "result: %p", ret);
 		return ret;
 	}
 
 	if (unlikely(ret == NULL)) {
 		if (config_xmalloc && unlikely(opt_xmalloc)) {
 			malloc_write("<jemalloc>: Error in realloc(): "
 			    "out of memory\n");
 			abort();
 		}
 		set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
 		tsd_t *tsd;
 
 		assert(usize == isalloc(tsdn, ret));
 		tsd = tsdn_tsd(tsdn);
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, ret);
 	check_entry_exit_locking(tsdn);
 
 	LOG("core.realloc.exit", "result: %p", ret);
 	return ret;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_free(void *ptr) {
 	LOG("core.free.entry", "ptr: %p", ptr);
 
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		/*
 		 * We avoid setting up tsd fully (e.g. tcache, arena binding)
 		 * based on only free() calls -- other activities trigger the
 		 * minimal to full transition.  This is because free() may
 		 * happen during thread shutdown after tls deallocation: if a
 		 * thread never had any malloc activities until then, a
 		 * fully-setup tsd won't be destructed properly.
 		 */
 		tsd_t *tsd = tsd_fetch_min();
 		check_entry_exit_locking(tsd_tsdn(tsd));
 
 		tcache_t *tcache;
 		if (likely(tsd_fast(tsd))) {
 			tsd_assert_fast(tsd);
 			/* Unconditionally get tcache ptr on fast path. */
 			tcache = tsd_tcachep_get(tsd);
 			ifree(tsd, ptr, tcache, false);
 		} else {
 			if (likely(tsd_reentrancy_level_get(tsd) == 0)) {
 				tcache = tcache_get(tsd);
 			} else {
 				tcache = NULL;
 			}
 			ifree(tsd, ptr, tcache, true);
 		}
 		check_entry_exit_locking(tsd_tsdn(tsd));
 	}
 	LOG("core.free.exit", "");
 }
 
 /*
  * End malloc(3)-compatible functions.
  */
 /******************************************************************************/
 /*
  * Begin non-standard override functions.
  */
 
 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc)
 je_memalign(size_t alignment, size_t size) {
 	void *ret;
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.memalign.entry", "alignment: %zu, size: %zu\n", alignment,
 	    size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
 	sopts.bump_empty_alloc = true;
 	sopts.min_alignment = 1;
 	sopts.oom_string =
 	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
 	sopts.invalid_alignment_string =
 	    "<jemalloc>: Error allocating aligned memory: invalid alignment\n";
 	sopts.null_out_result_on_error = true;
 
 	dopts.result = &ret;
 	dopts.num_items = 1;
 	dopts.item_size = size;
 	dopts.alignment = alignment;
 
 	imalloc(&sopts, &dopts);
 
 	LOG("core.memalign.exit", "result: %p", ret);
 	return ret;
 }
 #endif
 
 #ifdef JEMALLOC_OVERRIDE_VALLOC
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc)
 je_valloc(size_t size) {
 	void *ret;
 
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.valloc.entry", "size: %zu\n", size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
 	sopts.bump_empty_alloc = true;
 	sopts.null_out_result_on_error = true;
 	sopts.min_alignment = PAGE;
 	sopts.oom_string =
 	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
 	sopts.invalid_alignment_string =
 	    "<jemalloc>: Error allocating aligned memory: invalid alignment\n";
 
 	dopts.result = &ret;
 	dopts.num_items = 1;
 	dopts.item_size = size;
 	dopts.alignment = PAGE;
 
 	imalloc(&sopts, &dopts);
 
 	LOG("core.valloc.exit", "result: %p\n", ret);
 	return ret;
 }
 #endif
 
 #if defined(JEMALLOC_IS_MALLOC) && defined(JEMALLOC_GLIBC_MALLOC_HOOK)
 /*
  * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible
  * to inconsistently reference libc's malloc(3)-compatible functions
  * (https://bugzilla.mozilla.org/show_bug.cgi?id=493541).
  *
  * These definitions interpose hooks in glibc.  The functions are actually
  * passed an extra argument for the caller return address, which will be
  * ignored.
  */
 JEMALLOC_EXPORT void (*__free_hook)(void *ptr) = je_free;
 JEMALLOC_EXPORT void *(*__malloc_hook)(size_t size) = je_malloc;
 JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc;
 #  ifdef JEMALLOC_GLIBC_MEMALIGN_HOOK
 JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
     je_memalign;
 #  endif
 
 #  ifdef CPU_COUNT
 /*
  * To enable static linking with glibc, the libc specific malloc interface must
  * be implemented also, so none of glibc's malloc.o functions are added to the
  * link.
  */
 #    define ALIAS(je_fn)	__attribute__((alias (#je_fn), used))
 /* To force macro expansion of je_ prefix before stringification. */
 #    define PREALIAS(je_fn)	ALIAS(je_fn)
 #    ifdef JEMALLOC_OVERRIDE___LIBC_CALLOC
 void *__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc);
 #    endif
 #    ifdef JEMALLOC_OVERRIDE___LIBC_FREE
 void __libc_free(void* ptr) PREALIAS(je_free);
 #    endif
 #    ifdef JEMALLOC_OVERRIDE___LIBC_MALLOC
 void *__libc_malloc(size_t size) PREALIAS(je_malloc);
 #    endif
 #    ifdef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
 void *__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign);
 #    endif
 #    ifdef JEMALLOC_OVERRIDE___LIBC_REALLOC
 void *__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
 #    endif
 #    ifdef JEMALLOC_OVERRIDE___LIBC_VALLOC
 void *__libc_valloc(size_t size) PREALIAS(je_valloc);
 #    endif
 #    ifdef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
 int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(je_posix_memalign);
 #    endif
 #    undef PREALIAS
 #    undef ALIAS
 #  endif
 #endif
 
 /*
  * End non-standard override functions.
  */
 /******************************************************************************/
 /*
  * Begin non-standard functions.
  */
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
 je_mallocx(size_t size, int flags) {
 	void *ret;
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.mallocx.entry", "size: %zu, flags: %d", size, flags);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
 	sopts.assert_nonempty_alloc = true;
 	sopts.null_out_result_on_error = true;
 	sopts.oom_string = "<jemalloc>: Error in mallocx(): out of memory\n";
 
 	dopts.result = &ret;
 	dopts.num_items = 1;
 	dopts.item_size = size;
 	if (unlikely(flags != 0)) {
 		if ((flags & MALLOCX_LG_ALIGN_MASK) != 0) {
 			dopts.alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
 		}
 
 		dopts.zero = MALLOCX_ZERO_GET(flags);
 
 		if ((flags & MALLOCX_TCACHE_MASK) != 0) {
 			if ((flags & MALLOCX_TCACHE_MASK)
 			    == MALLOCX_TCACHE_NONE) {
 				dopts.tcache_ind = TCACHE_IND_NONE;
 			} else {
 				dopts.tcache_ind = MALLOCX_TCACHE_GET(flags);
 			}
 		} else {
 			dopts.tcache_ind = TCACHE_IND_AUTOMATIC;
 		}
 
 		if ((flags & MALLOCX_ARENA_MASK) != 0)
 			dopts.arena_ind = MALLOCX_ARENA_GET(flags);
 	}
 
 	imalloc(&sopts, &dopts);
 
 	LOG("core.mallocx.exit", "result: %p", ret);
 	return ret;
 }
 
 static void *
 irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
     prof_tctx_t *tctx) {
 	void *p;
 
 	if (tctx == NULL) {
 		return NULL;
 	}
 	if (usize <= SMALL_MAXCLASS) {
 		p = iralloct(tsdn, old_ptr, old_usize, LARGE_MINCLASS,
 		    alignment, zero, tcache, arena);
 		if (p == NULL) {
 			return NULL;
 		}
 		arena_prof_promote(tsdn, p, usize);
 	} else {
 		p = iralloct(tsdn, old_ptr, old_usize, usize, alignment, zero,
 		    tcache, arena);
 	}
 
 	return p;
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
     size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
     arena_t *arena, alloc_ctx_t *alloc_ctx) {
 	void *p;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
 	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr, alloc_ctx);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
 		    *usize, alignment, zero, tcache, arena, tctx);
 	} else {
 		p = iralloct(tsd_tsdn(tsd), old_ptr, old_usize, size, alignment,
 		    zero, tcache, arena);
 	}
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, false);
 		return NULL;
 	}
 
 	if (p == old_ptr && alignment != 0) {
 		/*
 		 * The allocation did not move, so it is possible that the size
 		 * class is smaller than would guarantee the requested
 		 * alignment, and that the alignment constraint was
 		 * serendipitously satisfied.  Additionally, old_usize may not
 		 * be the same as the current usize because of in-place large
 		 * reallocation.  Therefore, query the actual value of usize.
 		 */
 		*usize = isalloc(tsd_tsdn(tsd), p);
 	}
 	prof_realloc(tsd, p, *usize, tctx, prof_active, false, old_ptr,
 	    old_usize, old_tctx);
 
 	return p;
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ALLOC_SIZE(2)
 je_rallocx(void *ptr, size_t size, int flags) {
 	void *p;
 	tsd_t *tsd;
 	size_t usize;
 	size_t old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
 	bool zero = flags & MALLOCX_ZERO;
 	arena_t *arena;
 	tcache_t *tcache;
 
 	LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
 	    size, flags);
 
 
 	assert(ptr != NULL);
 	assert(size != 0);
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
 		arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
 		if (unlikely(arena == NULL)) {
 			goto label_oom;
 		}
 	} else {
 		arena = NULL;
 	}
 
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
 			tcache = NULL;
 		} else {
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
 		}
 	} else {
 		tcache = tcache_get(tsd);
 	}
 
 	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 	assert(alloc_ctx.szind != NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ?
 		    sz_s2u(size) : sz_sa2u(size, alignment);
 		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
 		    zero, tcache, arena, &alloc_ctx);
 		if (unlikely(p == NULL)) {
 			goto label_oom;
 		}
 	} else {
 		p = iralloct(tsd_tsdn(tsd), ptr, old_usize, size, alignment,
 		    zero, tcache, arena);
 		if (unlikely(p == NULL)) {
 			goto label_oom;
 		}
 		if (config_stats) {
 			usize = isalloc(tsd_tsdn(tsd), p);
 		}
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 
 	if (config_stats) {
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	LOG("core.rallocx.exit", "result: %p", p);
 	return p;
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
 		malloc_write("<jemalloc>: Error in rallocx(): out of memory\n");
 		abort();
 	}
 	UTRACE(ptr, size, 0);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	LOG("core.rallocx.exit", "result: %p", NULL);
 	return NULL;
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero) {
 	size_t usize;
 
 	if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero)) {
 		return old_usize;
 	}
 	usize = isalloc(tsdn, ptr);
 
 	return usize;
 }
 
 static size_t
 ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx) {
 	size_t usize;
 
 	if (tctx == NULL) {
 		return old_usize;
 	}
 	usize = ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
 	    zero);
 
 	return usize;
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) {
 	size_t usize_max, usize;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
 	old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx);
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
 	 * prof_alloc_prep() to decide whether to capture a backtrace.
 	 * prof_realloc() will use the actual usize to decide whether to sample.
 	 */
 	if (alignment == 0) {
 		usize_max = sz_s2u(size+extra);
 		assert(usize_max > 0 && usize_max <= LARGE_MAXCLASS);
 	} else {
 		usize_max = sz_sa2u(size+extra, alignment);
 		if (unlikely(usize_max == 0 || usize_max > LARGE_MAXCLASS)) {
 			/*
 			 * usize_max is out of range, and chances are that
 			 * allocation will fail, but use the maximum possible
 			 * value and carry on with prof_alloc_prep(), just in
 			 * case allocation succeeds.
 			 */
 			usize_max = LARGE_MAXCLASS;
 		}
 	}
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize,
 		    size, extra, alignment, zero, tctx);
 	} else {
 		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
 		    extra, alignment, zero);
 	}
 	if (usize == old_usize) {
 		prof_alloc_rollback(tsd, tctx, false);
 		return usize;
 	}
 	prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize,
 	    old_tctx);
 
 	return usize;
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	tsd_t *tsd;
 	size_t usize, old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
 	bool zero = flags & MALLOCX_ZERO;
 
 	LOG("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, "
 	    "flags: %d", ptr, size, extra, flags);
 
 	assert(ptr != NULL);
 	assert(size != 0);
 	assert(SIZE_T_MAX - size >= extra);
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 	assert(alloc_ctx.szind != NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
 	 * extra) numerical overflow, but we may need to clamp extra to avoid
 	 * exceeding LARGE_MAXCLASS.
 	 *
 	 * Ordinarily, size limit checking is handled deeper down, but here we
 	 * have to check as part of (size + extra) clamping, since we need the
 	 * clamped value in the above helper functions.
 	 */
 	if (unlikely(size > LARGE_MAXCLASS)) {
 		usize = old_usize;
 		goto label_not_resized;
 	}
 	if (unlikely(LARGE_MAXCLASS - size < extra)) {
 		extra = LARGE_MAXCLASS - size;
 	}
 
 	if (config_prof && opt_prof) {
 		usize = ixallocx_prof(tsd, ptr, old_usize, size, extra,
 		    alignment, zero, &alloc_ctx);
 	} else {
 		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
 		    extra, alignment, zero);
 	}
 	if (unlikely(usize == old_usize)) {
 		goto label_not_resized;
 	}
 
 	if (config_stats) {
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 label_not_resized:
 	UTRACE(ptr, size, ptr);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	LOG("core.xallocx.exit", "result: %zu", usize);
 	return usize;
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 JEMALLOC_ATTR(pure)
 je_sallocx(const void *ptr, UNUSED int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 
 	LOG("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags);
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	assert(ptr != NULL);
 
 	tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
 
 	if (config_debug || force_ivsalloc) {
 		usize = ivsalloc(tsdn, ptr);
 		assert(force_ivsalloc || usize != 0);
 	} else {
 		usize = isalloc(tsdn, ptr);
 	}
 
 	check_entry_exit_locking(tsdn);
 
 	LOG("core.sallocx.exit", "result: %zu", usize);
 	return usize;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_dallocx(void *ptr, int flags) {
 	LOG("core.dallocx.entry", "ptr: %p, flags: %d", ptr, flags);
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd_t *tsd = tsd_fetch();
 	bool fast = tsd_fast(tsd);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	tcache_t *tcache;
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		/* Not allowed to be reentrant and specify a custom tcache. */
 		assert(tsd_reentrancy_level_get(tsd) == 0);
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
 			tcache = NULL;
 		} else {
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
 		}
 	} else {
 		if (likely(fast)) {
 			tcache = tsd_tcachep_get(tsd);
 			assert(tcache == tcache_get(tsd));
 		} else {
 			if (likely(tsd_reentrancy_level_get(tsd) == 0)) {
 				tcache = tcache_get(tsd);
 			}  else {
 				tcache = NULL;
 			}
 		}
 	}
 
 	UTRACE(ptr, 0, 0);
 	if (likely(fast)) {
 		tsd_assert_fast(tsd);
 		ifree(tsd, ptr, tcache, false);
 	} else {
 		ifree(tsd, ptr, tcache, true);
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	LOG("core.dallocx.exit", "");
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 inallocx(tsdn_t *tsdn, size_t size, int flags) {
 	check_entry_exit_locking(tsdn);
 
 	size_t usize;
 	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0)) {
 		usize = sz_s2u(size);
 	} else {
 		usize = sz_sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
 	}
 	check_entry_exit_locking(tsdn);
 	return usize;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_sdallocx(void *ptr, size_t size, int flags) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
 	    size, flags);
 
 	tsd_t *tsd = tsd_fetch();
 	bool fast = tsd_fast(tsd);
 	size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	tcache_t *tcache;
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		/* Not allowed to be reentrant and specify a custom tcache. */
 		assert(tsd_reentrancy_level_get(tsd) == 0);
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
 			tcache = NULL;
 		} else {
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
 		}
 	} else {
 		if (likely(fast)) {
 			tcache = tsd_tcachep_get(tsd);
 			assert(tcache == tcache_get(tsd));
 		} else {
 			if (likely(tsd_reentrancy_level_get(tsd) == 0)) {
 				tcache = tcache_get(tsd);
 			} else {
 				tcache = NULL;
 			}
 		}
 	}
 
 	UTRACE(ptr, 0, 0);
 	if (likely(fast)) {
 		tsd_assert_fast(tsd);
 		isfree(tsd, ptr, usize, tcache, false);
 	} else {
 		isfree(tsd, ptr, usize, tcache, true);
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	LOG("core.sdallocx.exit", "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 JEMALLOC_ATTR(pure)
 je_nallocx(size_t size, int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 
 	assert(size != 0);
 
 	if (unlikely(malloc_init())) {
 		LOG("core.nallocx.exit", "result: %zu", ZU(0));
 		return 0;
 	}
 
 	tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
 
 	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > LARGE_MAXCLASS)) {
 		LOG("core.nallocx.exit", "result: %zu", ZU(0));
 		return 0;
 	}
 
 	check_entry_exit_locking(tsdn);
 	LOG("core.nallocx.exit", "result: %zu", usize);
 	return usize;
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
     size_t newlen) {
 	int ret;
 	tsd_t *tsd;
 
 	LOG("core.mallctl.entry", "name: %s", name);
 
 	if (unlikely(malloc_init())) {
 		LOG("core.mallctl.exit", "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
 	tsd = tsd_fetch();
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	LOG("core.mallctl.exit", "result: %d", ret);
 	return ret;
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	int ret;
 
 	LOG("core.mallctlnametomib.entry", "name: %s", name);
 
 	if (unlikely(malloc_init())) {
 		LOG("core.mallctlnametomib.exit", "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
 	tsd_t *tsd = tsd_fetch();
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_nametomib(tsd, name, mibp, miblenp);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	LOG("core.mallctlnametomib.exit", "result: %d", ret);
 	return ret;
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
   void *newp, size_t newlen) {
 	int ret;
 	tsd_t *tsd;
 
 	LOG("core.mallctlbymib.entry", "");
 
 	if (unlikely(malloc_init())) {
 		LOG("core.mallctlbymib.exit", "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
 	tsd = tsd_fetch();
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	LOG("core.mallctlbymib.exit", "result: %d", ret);
 	return ret;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts) {
 	tsdn_t *tsdn;
 
 	LOG("core.malloc_stats_print.entry", "");
 
 	tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
 	stats_print(write_cb, cbopaque, opts);
 	check_entry_exit_locking(tsdn);
 	LOG("core.malloc_stats_print.exit", "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	size_t ret;
 	tsdn_t *tsdn;
 
 	LOG("core.malloc_usable_size.entry", "ptr: %p", ptr);
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
 
 	if (unlikely(ptr == NULL)) {
 		ret = 0;
 	} else {
 		if (config_debug || force_ivsalloc) {
 			ret = ivsalloc(tsdn, ptr);
 			assert(force_ivsalloc || ret != 0);
 		} else {
 			ret = isalloc(tsdn, ptr);
 		}
 	}
 
 	check_entry_exit_locking(tsdn);
 	LOG("core.malloc_usable_size.exit", "result: %zu", ret);
 	return ret;
 }
 
 /*
  * End non-standard functions.
  */
 /******************************************************************************/
 /*
  * Begin compatibility functions.
  */
 
 #define	ALLOCM_LG_ALIGN(la)	(la)
 #define	ALLOCM_ALIGN(a)		(ffsl(a)-1)
 #define	ALLOCM_ZERO		((int)0x40)
 #define	ALLOCM_NO_MOVE		((int)0x80)
 
 #define	ALLOCM_SUCCESS		0
 #define	ALLOCM_ERR_OOM		1
 #define	ALLOCM_ERR_NOT_MOVED	2
 
 int
 je_allocm(void **ptr, size_t *rsize, size_t size, int flags) {
 	assert(ptr != NULL);
 
 	void *p = je_mallocx(size, flags);
 	if (p == NULL) {
 		return (ALLOCM_ERR_OOM);
 	}
 	if (rsize != NULL) {
 		*rsize = isalloc(tsdn_fetch(), p);
 	}
 	*ptr = p;
 	return ALLOCM_SUCCESS;
 }
 
 int
 je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) {
 	assert(ptr != NULL);
 	assert(*ptr != NULL);
 	assert(size != 0);
 	assert(SIZE_T_MAX - size >= extra);
 
 	int ret;
 	bool no_move = flags & ALLOCM_NO_MOVE;
 
 	if (no_move) {
 		size_t usize = je_xallocx(*ptr, size, extra, flags);
 		ret = (usize >= size) ? ALLOCM_SUCCESS : ALLOCM_ERR_NOT_MOVED;
 		if (rsize != NULL) {
 			*rsize = usize;
 		}
 	} else {
 		void *p = je_rallocx(*ptr, size+extra, flags);
 		if (p != NULL) {
 			*ptr = p;
 			ret = ALLOCM_SUCCESS;
 		} else {
 			ret = ALLOCM_ERR_OOM;
 		}
 		if (rsize != NULL) {
 			*rsize = isalloc(tsdn_fetch(), *ptr);
 		}
 	}
 	return ret;
 }
 
 int
 je_sallocm(const void *ptr, size_t *rsize, int flags) {
 	assert(rsize != NULL);
 	*rsize = je_sallocx(ptr, flags);
 	return ALLOCM_SUCCESS;
 }
 
 int
 je_dallocm(void *ptr, int flags) {
 	je_dallocx(ptr, flags);
 	return ALLOCM_SUCCESS;
 }
 
 int
 je_nallocm(size_t *rsize, size_t size, int flags) {
 	size_t usize = je_nallocx(size, flags);
 	if (usize == 0) {
 		return ALLOCM_ERR_OOM;
 	}
 	if (rsize != NULL) {
 		*rsize = usize;
 	}
 	return ALLOCM_SUCCESS;
 }
 
 #undef ALLOCM_LG_ALIGN
 #undef ALLOCM_ALIGN
 #undef ALLOCM_ZERO
 #undef ALLOCM_NO_MOVE
 
 #undef ALLOCM_SUCCESS
 #undef ALLOCM_ERR_OOM
 #undef ALLOCM_ERR_NOT_MOVED
 
 /*
  * End compatibility functions.
  */
 /******************************************************************************/
 /*
  * The following functions are used by threading libraries for protection of
  * malloc during fork().
  */
 
 /*
  * If an application creates a thread before doing any allocation in the main
  * thread, then calls fork(2) in the main thread followed by memory allocation
  * in the child process, a race can occur that results in deadlock within the
  * child: the main thread may have forked while the created thread had
  * partially initialized the allocator.  Ordinarily jemalloc prevents
  * fork/malloc races via the following functions it registers during
  * initialization using pthread_atfork(), but of course that does no good if
  * the allocator isn't fully initialized at fork time.  The following library
  * constructor is a partial solution to this problem.  It may still be possible
  * to trigger the deadlock described above, but doing so would involve forking
  * via a library constructor that runs before jemalloc's runs.
  */
 #ifndef JEMALLOC_JET
 JEMALLOC_ATTR(constructor)
 static void
 jemalloc_constructor(void) {
 	malloc_init();
 }
 #endif
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
 void
 jemalloc_prefork(void)
 #else
 JEMALLOC_EXPORT void
 _malloc_prefork(void)
 #endif
 {
 	tsd_t *tsd;
 	unsigned i, j, narenas;
 	arena_t *arena;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	if (!malloc_initialized()) {
 		return;
 	}
 #endif
 	assert(malloc_initialized());
 
 	tsd = tsd_fetch();
 
 	narenas = narenas_total_get();
 
 	witness_prefork(tsd_witness_tsdp_get(tsd));
 	/* Acquire all mutexes in a safe order. */
 	ctl_prefork(tsd_tsdn(tsd));
 	tcache_prefork(tsd_tsdn(tsd));
 	malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock);
 	if (have_background_thread) {
 		background_thread_prefork0(tsd_tsdn(tsd));
 	}
 	prof_prefork0(tsd_tsdn(tsd));
 	if (have_background_thread) {
 		background_thread_prefork1(tsd_tsdn(tsd));
 	}
 	/* Break arena prefork into stages to preserve lock order. */
 	for (i = 0; i < 8; i++) {
 		for (j = 0; j < narenas; j++) {
 			if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
 			    NULL) {
 				switch (i) {
 				case 0:
 					arena_prefork0(tsd_tsdn(tsd), arena);
 					break;
 				case 1:
 					arena_prefork1(tsd_tsdn(tsd), arena);
 					break;
 				case 2:
 					arena_prefork2(tsd_tsdn(tsd), arena);
 					break;
 				case 3:
 					arena_prefork3(tsd_tsdn(tsd), arena);
 					break;
 				case 4:
 					arena_prefork4(tsd_tsdn(tsd), arena);
 					break;
 				case 5:
 					arena_prefork5(tsd_tsdn(tsd), arena);
 					break;
 				case 6:
 					arena_prefork6(tsd_tsdn(tsd), arena);
 					break;
 				case 7:
 					arena_prefork7(tsd_tsdn(tsd), arena);
 					break;
 				default: not_reached();
 				}
 			}
 		}
 	}
 	prof_prefork1(tsd_tsdn(tsd));
 }
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
 void
 jemalloc_postfork_parent(void)
 #else
 JEMALLOC_EXPORT void
 _malloc_postfork(void)
 #endif
 {
 	tsd_t *tsd;
 	unsigned i, narenas;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	if (!malloc_initialized()) {
 		return;
 	}
 #endif
 	assert(malloc_initialized());
 
 	tsd = tsd_fetch();
 
 	witness_postfork_parent(tsd_witness_tsdp_get(tsd));
 	/* Release all mutexes, now that fork() has completed. */
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
 		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) {
 			arena_postfork_parent(tsd_tsdn(tsd), arena);
 		}
 	}
 	prof_postfork_parent(tsd_tsdn(tsd));
 	if (have_background_thread) {
 		background_thread_postfork_parent(tsd_tsdn(tsd));
 	}
 	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock);
 	tcache_postfork_parent(tsd_tsdn(tsd));
 	ctl_postfork_parent(tsd_tsdn(tsd));
 }
 
 void
 jemalloc_postfork_child(void) {
 	tsd_t *tsd;
 	unsigned i, narenas;
 
 	assert(malloc_initialized());
 
 	tsd = tsd_fetch();
 
 	witness_postfork_child(tsd_witness_tsdp_get(tsd));
 	/* Release all mutexes, now that fork() has completed. */
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
 		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) {
 			arena_postfork_child(tsd_tsdn(tsd), arena);
 		}
 	}
 	prof_postfork_child(tsd_tsdn(tsd));
 	if (have_background_thread) {
 		background_thread_postfork_child(tsd_tsdn(tsd));
 	}
 	malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
 	tcache_postfork_child(tsd_tsdn(tsd));
 	ctl_postfork_child(tsd_tsdn(tsd));
 }
 
 void
 _malloc_first_thread(void)
 {
 
 	(void)malloc_mutex_first_thread();
 }
 
 /******************************************************************************/
Index: projects/clang900-import/contrib/netbsd-tests/lib/libc/sys/t_stat.c
===================================================================
--- projects/clang900-import/contrib/netbsd-tests/lib/libc/sys/t_stat.c	(revision 352536)
+++ projects/clang900-import/contrib/netbsd-tests/lib/libc/sys/t_stat.c	(revision 352537)
@@ -1,419 +1,422 @@
 /* $NetBSD: t_stat.c,v 1.5 2017/01/13 20:06:50 christos Exp $ */
 
 /*-
  * Copyright (c) 2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jukka Ruohonen.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
 __RCSID("$NetBSD: t_stat.c,v 1.5 2017/01/13 20:06:50 christos Exp $");
 
 #include <sys/stat.h>
 #include <sys/socket.h>
 #include <sys/types.h>
 
 #include <arpa/inet.h>
 #include <netinet/in.h>
 
 #include <atf-c.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <fts.h>
 #include <limits.h>
 #include <string.h>
 #include <unistd.h>
 
 #include <stdio.h>
 
 static const char *path = "stat";
 
 ATF_TC_WITH_CLEANUP(stat_chflags);
 ATF_TC_HEAD(stat_chflags, tc)
 {
 	atf_tc_set_md_var(tc, "descr", "Test chflags(2) with stat(2)");
 }
 
 ATF_TC_BODY(stat_chflags, tc)
 {
 	struct stat sa, sb;
 	int fd;
 
 	(void)memset(&sa, 0, sizeof(struct stat));
 	(void)memset(&sb, 0, sizeof(struct stat));
 
 	fd = open(path, O_RDONLY | O_CREAT, 0600);
 
 	ATF_REQUIRE(fd != -1);
 	ATF_REQUIRE(stat(path, &sa) == 0);
 	ATF_REQUIRE(chflags(path, UF_NODUMP) == 0);
 	ATF_REQUIRE(stat(path, &sb) == 0);
 
 	if (sa.st_flags == sb.st_flags)
 		atf_tc_fail("stat(2) did not detect chflags(2)");
 
 	ATF_REQUIRE(close(fd) == 0);
 	ATF_REQUIRE(unlink(path) == 0);
 }
 
 ATF_TC_CLEANUP(stat_chflags, tc)
 {
 	(void)unlink(path);
 }
 
 ATF_TC(stat_dir);
 ATF_TC_HEAD(stat_dir, tc)
 {
 	atf_tc_set_md_var(tc, "descr", "Test stat(2) with directories");
 }
 
 ATF_TC_BODY(stat_dir, tc)
 {
 	const short depth = 2;
 	struct stat sa, sb;
 	char *argv[2];
 	FTSENT *ftse;
 	FTS *fts;
 	int ops;
 
 	argv[1] = NULL;
 	argv[0] = __UNCONST("/");
 
 	ops = FTS_NOCHDIR;
 	ops |= FTS_PHYSICAL;
 
 	fts = fts_open(argv, ops, NULL);
 	ATF_REQUIRE(fts != NULL);
 
 	while ((ftse = fts_read(fts)) != NULL) {
 
 		if (ftse->fts_level < 1)
 			continue;
 
 		if (ftse->fts_level > depth) {
 			(void)fts_set(fts, ftse, FTS_SKIP);
 			continue;
 		}
 
 		switch(ftse->fts_info) {
 
 		case FTS_DP:
 
 			(void)memset(&sa, 0, sizeof(struct stat));
 			(void)memset(&sb, 0, sizeof(struct stat));
 
 			ATF_REQUIRE(stat(ftse->fts_parent->fts_path,&sa) == 0);
 			ATF_REQUIRE(chdir(ftse->fts_path) == 0);
 			ATF_REQUIRE(stat(".", &sb) == 0);
 
 			/*
 			 * The previous two stat(2) calls
 			 * should be for the same directory.
 			 */
 			if (sa.st_dev != sb.st_dev || sa.st_ino != sb.st_ino)
 				atf_tc_fail("inconsistent stat(2)");
 
 			/*
 			 * Check that fts(3)'s stat(2)
 			 * call equals the manual one.
 			 */
 			if (sb.st_ino != ftse->fts_statp->st_ino)
 				atf_tc_fail("stat(2) and fts(3) differ");
 
 			break;
 
 		default:
 			break;
 		}
 	}
 
 	(void)fts_close(fts);
 }
 
 ATF_TC(stat_err);
 ATF_TC_HEAD(stat_err, tc)
 {
 	atf_tc_set_md_var(tc, "descr", "Test errors from the stat(2) family");
 }
 
 ATF_TC_BODY(stat_err, tc)
 {
 	char buf[NAME_MAX + 1];
 	struct stat st;
 
 	(void)memset(buf, 'x', sizeof(buf));
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(EBADF, fstat(-1, &st) == -1);
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(ENAMETOOLONG, stat(buf, &st) == -1);
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(ENAMETOOLONG, lstat(buf, &st) == -1);
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(EFAULT, stat((void *)-1, &st) == -1);
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(EFAULT, lstat((void *)-1, &st) == -1);
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(EFAULT, stat("/etc/passwd", (void *)-1) == -1);
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(EFAULT, lstat("/etc/passwd", (void *)-1) == -1);
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(ENOENT, stat("/a/b/c/d/e/f/g/h/i/j/k", &st) == -1);
 
 	errno = 0;
 	ATF_REQUIRE_ERRNO(ENOENT, lstat("/a/b/c/d/e/f/g/h/i/j/k", &st) == -1);
 }
 
 ATF_TC_WITH_CLEANUP(stat_mtime);
 ATF_TC_HEAD(stat_mtime, tc)
 {
 	atf_tc_set_md_var(tc, "descr", "Test modification times with stat(2)");
 }
 
 ATF_TC_BODY(stat_mtime, tc)
 {
 	struct stat sa, sb;
 	int fd[3];
 	size_t i;
 
 	for (i = 0; i < __arraycount(fd); i++) {
 
 		(void)memset(&sa, 0, sizeof(struct stat));
 		(void)memset(&sb, 0, sizeof(struct stat));
 
 		fd[i] = open(path, O_WRONLY | O_CREAT, 0600);
 
 		ATF_REQUIRE(fd[i] != -1);
 		ATF_REQUIRE(write(fd[i], "X", 1) == 1);
 		ATF_REQUIRE(stat(path, &sa) == 0);
 
 		(void)sleep(1);
 
 		ATF_REQUIRE(write(fd[i], "X", 1) == 1);
 		ATF_REQUIRE(stat(path, &sb) == 0);
 
 		ATF_REQUIRE(close(fd[i]) == 0);
 		ATF_REQUIRE(unlink(path) == 0);
 
 		if (sa.st_mtime == sb.st_mtime)
 			atf_tc_fail("mtimes did not change");
 	}
 }
 
 ATF_TC_CLEANUP(stat_mtime, tc)
 {
 	(void)unlink(path);
 }
 
 ATF_TC_WITH_CLEANUP(stat_perm);
 ATF_TC_HEAD(stat_perm, tc)
 {
 	atf_tc_set_md_var(tc, "descr", "Test permissions with stat(2)");
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 
 ATF_TC_BODY(stat_perm, tc)
 {
 	struct stat sa, sb;
 	gid_t gid;
 	uid_t uid;
 	int fd;
 
 	(void)memset(&sa, 0, sizeof(struct stat));
 	(void)memset(&sb, 0, sizeof(struct stat));
 
 	uid = getuid();
 	gid = getgid();
 
 	fd = open(path, O_RDONLY | O_CREAT, 0600);
 
 	ATF_REQUIRE(fd != -1);
 	ATF_REQUIRE(fstat(fd, &sa) == 0);
 	ATF_REQUIRE(stat(path, &sb) == 0);
 
 	if (gid != sa.st_gid || sa.st_gid != sb.st_gid)
 		atf_tc_fail("invalid GID");
 
 	if (uid != sa.st_uid || sa.st_uid != sb.st_uid)
 		atf_tc_fail("invalid UID");
 
 	ATF_REQUIRE(close(fd) == 0);
 	ATF_REQUIRE(unlink(path) == 0);
 }
 
 ATF_TC_CLEANUP(stat_perm, tc)
 {
 	(void)unlink(path);
 }
 
 ATF_TC_WITH_CLEANUP(stat_size);
 ATF_TC_HEAD(stat_size, tc)
 {
 	atf_tc_set_md_var(tc, "descr", "Test file sizes with stat(2)");
 }
 
 ATF_TC_BODY(stat_size, tc)
 {
 	struct stat sa, sb, sc;
 	const size_t n = 10;
 	size_t i;
 	int fd;
 
 	fd = open(path, O_WRONLY | O_CREAT, 0600);
 	ATF_REQUIRE(fd >= 0);
 
 	for (i = 0; i < n; i++) {
 
 		(void)memset(&sa, 0, sizeof(struct stat));
 		(void)memset(&sb, 0, sizeof(struct stat));
 		(void)memset(&sc, 0, sizeof(struct stat));
 
 		ATF_REQUIRE(fstat(fd, &sa) == 0);
 		ATF_REQUIRE(write(fd, "X", 1) == 1);
 		ATF_REQUIRE(fstat(fd, &sb) == 0);
 		ATF_REQUIRE(stat(path, &sc) == 0);
 
 		if (sa.st_size + 1 != sb.st_size)
 			atf_tc_fail("invalid file size");
 
 		if (sb.st_size != sc.st_size)
 			atf_tc_fail("stat(2) and fstat(2) mismatch");
 	}
 
 	ATF_REQUIRE(close(fd) == 0);
 	ATF_REQUIRE(unlink(path) == 0);
 }
 
 ATF_TC_CLEANUP(stat_size, tc)
 {
 	(void)unlink(path);
 }
 
 ATF_TC(stat_socket);
 ATF_TC_HEAD(stat_socket, tc)
 {
 	atf_tc_set_md_var(tc, "descr", "Test fstat(2) with "
 	    "a socket (PR kern/46077)");
 }
 
 ATF_TC_BODY(stat_socket, tc)
 {
 	struct sockaddr_in addr;
 	struct stat st;
 	uint32_t iaddr;
 	int fd, flags;
 
+	if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false))
+		atf_tc_skip("https://bugs.freebsd.org/240621");
+
 	(void)memset(&st, 0, sizeof(struct stat));
 	(void)memset(&addr, 0, sizeof(struct sockaddr_in));
 
 	fd = socket(AF_INET, SOCK_STREAM, 0);
 	ATF_REQUIRE(fd >= 0);
 
 	flags = fcntl(fd, F_GETFL);
 
 	ATF_REQUIRE(flags != -1);
 	ATF_REQUIRE(fcntl(fd, F_SETFL, flags | O_NONBLOCK) != -1);
 	ATF_REQUIRE(inet_pton(AF_INET, "127.0.0.1", &iaddr) == 1);
 
 	addr.sin_port = htons(42);
 	addr.sin_family = AF_INET;
 	addr.sin_addr.s_addr = iaddr;
 
 	errno = 0;
 
 	ATF_REQUIRE_ERRNO(EINPROGRESS,
 	    connect(fd, (struct sockaddr *)&addr,
 		sizeof(struct sockaddr_in)) == -1);
 
 	errno = 0;
 
 	if (fstat(fd, &st) != 0 || errno != 0)
 		atf_tc_fail("fstat(2) failed for a EINPROGRESS socket");
 
 	(void)close(fd);
 }
 
 ATF_TC_WITH_CLEANUP(stat_symlink);
 ATF_TC_HEAD(stat_symlink, tc)
 {
 	atf_tc_set_md_var(tc, "descr", "Test symbolic links with stat(2)");
 }
 
 ATF_TC_BODY(stat_symlink, tc)
 {
 	const char *pathlink = "pathlink";
 	struct stat sa, sb;
 	int fd;
 
 	(void)memset(&sa, 0, sizeof(struct stat));
 	(void)memset(&sb, 0, sizeof(struct stat));
 
 	fd = open(path, O_WRONLY | O_CREAT, 0600);
 
 	ATF_REQUIRE(fd >= 0);
 	ATF_REQUIRE(symlink(path, pathlink) == 0);
 	ATF_REQUIRE(stat(pathlink, &sa) == 0);
 	ATF_REQUIRE(lstat(pathlink, &sb) == 0);
 
 	if (S_ISLNK(sa.st_mode) != 0)
 		atf_tc_fail("stat(2) detected symbolic link");
 
 	if (S_ISLNK(sb.st_mode) == 0)
 		atf_tc_fail("lstat(2) did not detect symbolic link");
 
 	if (sa.st_mode == sb.st_mode)
 		atf_tc_fail("inconsistencies between stat(2) and lstat(2)");
 
 	(void)close(fd);
 	ATF_REQUIRE(unlink(path) == 0);
 	ATF_REQUIRE(unlink(pathlink) == 0);
 }
 
 ATF_TC_CLEANUP(stat_symlink, tc)
 {
 	(void)unlink(path);
 }
 
 ATF_TP_ADD_TCS(tp)
 {
 
 	ATF_TP_ADD_TC(tp, stat_chflags);
 	ATF_TP_ADD_TC(tp, stat_dir);
 	ATF_TP_ADD_TC(tp, stat_err);
 	ATF_TP_ADD_TC(tp, stat_mtime);
 	ATF_TP_ADD_TC(tp, stat_perm);
 	ATF_TP_ADD_TC(tp, stat_size);
 	ATF_TP_ADD_TC(tp, stat_socket);
 	ATF_TP_ADD_TC(tp, stat_symlink);
 
 	return atf_no_error();
 }
Index: projects/clang900-import/contrib/netbsd-tests
===================================================================
--- projects/clang900-import/contrib/netbsd-tests	(revision 352536)
+++ projects/clang900-import/contrib/netbsd-tests	(revision 352537)

Property changes on: projects/clang900-import/contrib/netbsd-tests
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/netbsd-tests:r352308-352536
Index: projects/clang900-import/lib/Makefile
===================================================================
--- projects/clang900-import/lib/Makefile	(revision 352536)
+++ projects/clang900-import/lib/Makefile	(revision 352537)
@@ -1,215 +1,215 @@
 #	@(#)Makefile	8.1 (Berkeley) 6/4/93
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 # The SUBDIR_BOOTSTRAP list is a small set of libraries which are used by many
 # of the other libraries.  These are built first with a .WAIT between them
 # and the main list to avoid needing a SUBDIR_DEPEND line on every library
 # naming just these few items.
 
 SUBDIR_BOOTSTRAP= \
 	csu \
 	.WAIT \
 	libc \
 	libc_nonshared \
 	libcompiler_rt \
 	${_libclang_rt} \
 	${_libcplusplus} \
 	${_libcxxrt} \
 	libelf \
 	msun
 
 # The main list; please keep these sorted alphabetically.
 
 SUBDIR=	${SUBDIR_BOOTSTRAP} \
 	.WAIT \
 	geom \
 	libalias \
 	libarchive \
 	libauditd \
 	libbegemot \
 	libblocksruntime \
 	libbsdstat \
 	libbsm \
 	libbz2 \
 	libcalendar \
 	libcam \
 	libcapsicum \
 	libcasper \
 	libcompat \
 	libcrypt \
 	libdevctl \
 	libdevinfo \
 	libdevstat \
 	libdl \
 	libdwarf \
 	libedit \
 	libelftc \
 	libevent \
 	libexecinfo \
 	libexpat \
 	libfetch \
 	libfigpar \
 	libgeom \
 	libifconfig \
 	libipsec \
 	libjail \
 	libkiconv \
 	libkvm \
 	liblzma \
 	libmemstat \
 	libmd \
 	libmt \
 	lib80211 \
 	libnetbsd \
 	libnv \
 	libopenbsd \
 	libopie \
 	libpam \
 	libpathconv \
 	libpcap \
 	libpjdlog \
 	${_libproc} \
 	libprocstat \
 	libregex \
 	librpcsvc \
 	librss \
 	librt \
 	${_librtld_db} \
 	libsbuf \
 	libsmb \
 	libsqlite3 \
 	libstdbuf \
 	libstdthreads \
 	libsysdecode \
 	libtacplus \
 	libthread_db \
 	libucl \
 	libufs \
 	libugidfw \
 	libulog \
 	libutil \
 	${_libvgl} \
 	libwrap \
 	libxo \
 	liby \
 	libz \
 	libzstd \
 	ncurses
 
 # Inter-library dependencies.  When the makefile for a library contains LDADD
 # libraries, those libraries should be listed as build order dependencies here.
 
 SUBDIR_DEPEND_geom=	libufs
-SUBDIR_DEPEND_libarchive= libz libbz2 libexpat liblzma libmd
+SUBDIR_DEPEND_libarchive= libz libbz2 libexpat liblzma libmd libzstd
 SUBDIR_DEPEND_libauditdm= libbsm
 SUBDIR_DEPEND_libbsnmp= ${_libnetgraph}
 SUBDIR_DEPEND_libc++:= libcxxrt
 SUBDIR_DEPEND_libc= libcompiler_rt
 SUBDIR_DEPEND_libcam= libsbuf
 SUBDIR_DEPEND_libcasper= libnv
 SUBDIR_DEPEND_libdevstat= libkvm
 SUBDIR_DEPEND_libdpv= libfigpar ncurses libutil
 SUBDIR_DEPEND_libedit= ncurses
 SUBDIR_DEPEND_libgeom= libexpat libsbuf
 SUBDIR_DEPEND_librpcsec_gss= libgssapi
 SUBDIR_DEPEND_libmagic= libz
 SUBDIR_DEPEND_libmemstat= libkvm
 SUBDIR_DEPEND_libopie= libmd
 SUBDIR_DEPEND_libpam= libcrypt libopie ${_libradius} librpcsvc libtacplus libutil ${_libypclnt} ${_libcom_err} 
 SUBDIR_DEPEND_libpjdlog= libutil
 SUBDIR_DEPEND_libprocstat= libkvm libutil
 SUBDIR_DEPEND_libradius= libmd
 SUBDIR_DEPEND_libsmb= libkiconv
 SUBDIR_DEPEND_libtacplus= libmd
 SUBDIR_DEPEND_libulog= libmd
 SUBDIR_DEPEND_libunbound= ${_libldns}
 SUBDIR_DEPEND_liblzma= ${_libthr}
 .if ${MK_OFED} != "no"
 SUBDIR_DEPEND_libpcap= ofed
 .endif
 
 # NB: keep these sorted by MK_* knobs
 
 SUBDIR.${MK_ATM}+=	libngatm
 SUBDIR.${MK_BEARSSL}+=	libbearssl libsecureboot
 SUBDIR.${MK_BLACKLIST}+=libblacklist
 SUBDIR.${MK_BLUETOOTH}+=libbluetooth libsdp
 SUBDIR.${MK_BSNMP}+=	libbsnmp
 
 .if !defined(COMPAT_32BIT) && !defined(COMPAT_SOFTFP)
 SUBDIR.${MK_CLANG}+=	clang
 .endif
 
 SUBDIR.${MK_CUSE}+= 	libcuse
 SUBDIR.${MK_CXX}+=	libdevdctl
 SUBDIR.${MK_TOOLCHAIN}+=libpe
 SUBDIR.${MK_DIALOG}+=	libdpv
 SUBDIR.${MK_FILE}+=	libmagic
 SUBDIR.${MK_GPIO}+=	libgpio
 SUBDIR.${MK_GSSAPI}+=	libgssapi librpcsec_gss
 SUBDIR.${MK_ICONV}+=	libiconv_modules
 SUBDIR.${MK_KERBEROS_SUPPORT}+=	libcom_err
 SUBDIR.${MK_LDNS}+=	libldns
 
 # The libraries under libclang_rt can only be built by clang, and only make
 # sense to build when clang is enabled at all.  Furthermore, they can only be
 # built for certain architectures.
 .if ${MK_CLANG} != "no" && ${COMPILER_TYPE} == "clang" && \
     (${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \
     ${MACHINE_CPUARCH} == "arm" || ${MACHINE_CPUARCH} == "i386")
 _libclang_rt=	libclang_rt
 .endif
 
 .if ${MK_LIBCPLUSPLUS} != "no"
 _libcxxrt=	libcxxrt
 _libcplusplus=	libc++
 _libcplusplus+=	libc++experimental
 .endif
 
 SUBDIR.${MK_EFI}+=	libefivar
 SUBDIR.${MK_GOOGLETEST}+=	googletest
 SUBDIR.${MK_LIBTHR}+=	libthr
 SUBDIR.${MK_LLVM_LIBUNWIND}+=	libgcc_eh
 SUBDIR.${MK_LLVM_LIBUNWIND}+=	libgcc_s
 SUBDIR.${MK_NETGRAPH}+=	libnetgraph
 SUBDIR.${MK_NIS}+=	libypclnt
 
 .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64"
 _libvgl=	libvgl
 .endif
 
 .if ${MACHINE_CPUARCH} == "aarch64"
 SUBDIR.${MK_PMC}+=	libopencsd
 .endif
 
 .if ${MACHINE_CPUARCH} == "amd64"
 SUBDIR.${MK_PMC}+=	libipt
 SUBDIR.${MK_BHYVE}+=	libvmmapi
 .endif
 
 .if ${MACHINE_CPUARCH} != "sparc64"
 _libproc=	libproc
 _librtld_db=	librtld_db
 .endif
 
 SUBDIR.${MK_OPENMP}+=	libomp
 SUBDIR.${MK_OPENSSL}+=	libmp
 SUBDIR.${MK_PMC}+=	libpmc libpmcstat
 SUBDIR.${MK_RADIUS_SUPPORT}+=	libradius
 SUBDIR.${MK_SENDMAIL}+=	libmilter libsm libsmdb libsmutil
 SUBDIR.${MK_TELNET}+=	libtelnet
 SUBDIR.${MK_TESTS_SUPPORT}+=	atf
 SUBDIR.${MK_TESTS}+=	tests
 SUBDIR.${MK_UNBOUND}+=	libunbound
 SUBDIR.${MK_USB}+=	libusbhid libusb
 SUBDIR.${MK_OFED}+=	ofed
 SUBDIR.${MK_VERIEXEC}+=	libveriexec
 SUBDIR.${MK_ZFS}+=	libbe
 
 .if !make(install)
 SUBDIR_PARALLEL=
 .endif
 
 .include <bsd.subdir.mk>
Index: projects/clang900-import/lib/libarchive/Makefile
===================================================================
--- projects/clang900-import/lib/libarchive/Makefile	(revision 352536)
+++ projects/clang900-import/lib/libarchive/Makefile	(revision 352537)
@@ -1,426 +1,427 @@
 # $FreeBSD$
 .include <src.opts.mk>
 
 PACKAGE=lib${LIB}
 _LIBARCHIVEDIR=	${SRCTOP}/contrib/libarchive
 
 LIB=	archive
 
-LIBADD=	z bz2 lzma bsdxml
-CFLAGS+= -DHAVE_BZLIB_H=1 -DHAVE_LIBLZMA=1 -DHAVE_LZMA_H=1
+LIBADD=	z bz2 lzma bsdxml zstd
+CFLAGS+= -DHAVE_BZLIB_H=1 -DHAVE_LIBLZMA=1 -DHAVE_LZMA_H=1 -DHAVE_ZSTD_H=1 -DHAVE_LIBZSTD=1
 
 # FreeBSD SHLIB_MAJOR value is managed as part of the FreeBSD system.
 # It has no real relation to the libarchive version number.
 SHLIB_MAJOR= 7
 
 CFLAGS+=	-DPLATFORM_CONFIG_H=\"${.CURDIR}/config_freebsd.h\"
 CFLAGS+=	-I${.OBJDIR}
+CFLAGS+=	-I${SRCTOP}/sys/contrib/zstd/lib
 
 .if ${MK_OPENSSL} != "no"
 CFLAGS+=	-DWITH_OPENSSL
 LIBADD+=	crypto
 .else
 LIBADD+=	md
 .endif
 
 .if ${MK_ICONV} != "no"
 # TODO: This can be changed back to CFLAGS once iconv works correctly
 # with statically linked binaries.
 SHARED_CFLAGS+=	-DHAVE_ICONV=1 -DHAVE_ICONV_H=1 -DICONV_CONST=
 .endif
 
 .if ${MACHINE_ARCH:Marm*} != "" || ${MACHINE_ARCH:Mmips*} != "" || \
 	${MACHINE_ARCH:Msparc64*} != "" || ${MACHINE_ARCH:Mpowerpc*} != ""
 NO_WCAST_ALIGN=	yes
 .if ${MACHINE_ARCH:M*64*} == ""
 CFLAGS+=	-DPPMD_32BIT
 .endif
 .endif
 NO_WCAST_ALIGN.clang=
 
 .PATH: ${_LIBARCHIVEDIR}/libarchive
 
 # Headers to be installed in /usr/include
 INCS=	archive.h archive_entry.h
 
 # Sources to be compiled.
 SRCS=	archive_acl.c					\
 	archive_blake2sp_ref.c				\
 	archive_blake2s_ref.c				\
 	archive_check_magic.c				\
 	archive_cmdline.c				\
 	archive_cryptor.c				\
 	archive_disk_acl_freebsd.c			\
 	archive_digest.c				\
 	archive_entry.c					\
 	archive_entry_copy_stat.c			\
 	archive_entry_link_resolver.c			\
 	archive_entry_sparse.c				\
 	archive_entry_stat.c				\
 	archive_entry_strmode.c				\
 	archive_entry_xattr.c				\
 	archive_getdate.c				\
 	archive_hmac.c					\
 	archive_match.c					\
 	archive_options.c				\
 	archive_pack_dev.c				\
 	archive_pathmatch.c				\
 	archive_ppmd7.c					\
 	archive_ppmd8.c					\
 	archive_random.c				\
 	archive_rb.c					\
 	archive_read.c					\
 	archive_read_add_passphrase.c			\
 	archive_read_append_filter.c			\
 	archive_read_data_into_fd.c			\
 	archive_read_disk_entry_from_file.c		\
 	archive_read_disk_posix.c			\
 	archive_read_disk_set_standard_lookup.c		\
 	archive_read_extract.c				\
 	archive_read_extract2.c				\
 	archive_read_open_fd.c				\
 	archive_read_open_file.c			\
 	archive_read_open_filename.c			\
 	archive_read_open_memory.c			\
 	archive_read_set_format.c			\
 	archive_read_set_options.c			\
 	archive_read_support_filter_all.c		\
 	archive_read_support_filter_bzip2.c		\
 	archive_read_support_filter_compress.c		\
 	archive_read_support_filter_gzip.c		\
 	archive_read_support_filter_grzip.c		\
 	archive_read_support_filter_lrzip.c		\
 	archive_read_support_filter_lz4.c		\
 	archive_read_support_filter_lzop.c		\
 	archive_read_support_filter_none.c		\
 	archive_read_support_filter_program.c		\
 	archive_read_support_filter_rpm.c		\
 	archive_read_support_filter_uu.c		\
 	archive_read_support_filter_xz.c		\
 	archive_read_support_filter_zstd.c		\
 	archive_read_support_format_7zip.c		\
 	archive_read_support_format_all.c		\
 	archive_read_support_format_ar.c		\
 	archive_read_support_format_by_code.c		\
 	archive_read_support_format_cab.c		\
 	archive_read_support_format_cpio.c		\
 	archive_read_support_format_empty.c		\
 	archive_read_support_format_iso9660.c		\
 	archive_read_support_format_lha.c		\
 	archive_read_support_format_mtree.c		\
 	archive_read_support_format_rar.c		\
 	archive_read_support_format_rar5.c		\
 	archive_read_support_format_raw.c		\
 	archive_read_support_format_tar.c		\
 	archive_read_support_format_warc.c		\
 	archive_read_support_format_xar.c		\
 	archive_read_support_format_zip.c		\
 	archive_string.c				\
 	archive_string_sprintf.c			\
 	archive_util.c					\
 	archive_version_details.c			\
 	archive_virtual.c				\
 	archive_write.c					\
 	archive_write_add_filter.c			\
 	archive_write_disk_set_standard_lookup.c	\
 	archive_write_disk_posix.c			\
 	archive_write_open_fd.c				\
 	archive_write_open_file.c			\
 	archive_write_open_filename.c			\
 	archive_write_open_memory.c			\
 	archive_write_add_filter_b64encode.c		\
 	archive_write_add_filter_by_name.c		\
 	archive_write_add_filter_bzip2.c		\
 	archive_write_add_filter_compress.c		\
 	archive_write_add_filter_grzip.c		\
 	archive_write_add_filter_gzip.c			\
 	archive_write_add_filter_lrzip.c		\
 	archive_write_add_filter_lz4.c			\
 	archive_write_add_filter_lzop.c			\
 	archive_write_add_filter_none.c			\
 	archive_write_add_filter_program.c		\
 	archive_write_add_filter_uuencode.c		\
 	archive_write_add_filter_xz.c			\
 	archive_write_add_filter_zstd.c			\
 	archive_write_set_format.c			\
 	archive_write_set_format_7zip.c			\
 	archive_write_set_format_ar.c			\
 	archive_write_set_format_by_name.c		\
 	archive_write_set_format_cpio.c			\
 	archive_write_set_format_cpio_newc.c		\
 	archive_write_set_format_filter_by_ext.c	\
 	archive_write_set_format_gnutar.c		\
 	archive_write_set_format_iso9660.c		\
 	archive_write_set_format_mtree.c		\
 	archive_write_set_format_pax.c			\
 	archive_write_set_format_raw.c			\
 	archive_write_set_format_shar.c			\
 	archive_write_set_format_ustar.c		\
 	archive_write_set_format_v7tar.c		\
 	archive_write_set_format_warc.c			\
 	archive_write_set_format_xar.c			\
 	archive_write_set_format_zip.c			\
 	archive_write_set_passphrase.c			\
 	archive_write_set_options.c			\
 	filter_fork_posix.c
 
 # Man pages to be installed.
 MAN=	archive_entry.3					\
 	archive_entry_acl.3				\
 	archive_entry_linkify.3				\
 	archive_entry_misc.3				\
 	archive_entry_paths.3				\
 	archive_entry_perms.3				\
 	archive_entry_stat.3				\
 	archive_entry_time.3				\
 	archive_read.3					\
 	archive_read_data.3				\
 	archive_read_disk.3				\
 	archive_read_extract.3				\
 	archive_read_filter.3				\
 	archive_read_format.3				\
 	archive_read_free.3				\
 	archive_read_header.3				\
 	archive_read_new.3				\
 	archive_read_open.3				\
 	archive_read_set_options.3			\
 	archive_util.3					\
 	archive_write.3					\
 	archive_write_blocksize.3			\
 	archive_write_data.3				\
 	archive_write_disk.3				\
 	archive_write_filter.3				\
 	archive_write_finish_entry.3			\
 	archive_write_format.3				\
 	archive_write_free.3				\
 	archive_write_header.3				\
 	archive_write_new.3				\
 	archive_write_open.3				\
 	archive_write_set_options.3			\
 	cpio.5						\
 	libarchive.3					\
 	libarchive_changes.3				\
 	libarchive_internals.3				\
 	libarchive-formats.5				\
 	tar.5
 
 # Symlink the man pages under each function name.
 MLINKS+=	archive_entry.3 archive_entry_clear.3
 MLINKS+=	archive_entry.3 archive_entry_clone.3
 MLINKS+=	archive_entry.3 archive_entry_free.3
 MLINKS+=	archive_entry.3 archive_entry_new.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_add_entry.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_add_entry_w.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_clear.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_count.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_next.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_next_w.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_reset.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_text_w.3
 MLINKS+=	archive_entry_linkify.3 archive_entry_linkresolver.3
 MLINKS+=	archive_entry_linkify.3 archive_entry_linkresolver_new.3
 MLINKS+=	archive_entry_linkify.3 archive_entry_linkresolver_set_strategy.3
 MLINKS+=	archive_entry_linkify.3 archive_entry_linkresolver_free.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_hardlink.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_hardlink_w.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_link.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_link_w.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_pathname.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_pathname_w.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_sourcepath.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_symlink.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_symlink_w.3
 MLINKS+=	archive_entry_paths.3 archive_entry_hardlink.3
 MLINKS+=	archive_entry_paths.3 archive_entry_hardlink_w.3
 MLINKS+=	archive_entry_paths.3 archive_entry_pathname.3
 MLINKS+=	archive_entry_paths.3 archive_entry_pathname_w.3
 MLINKS+=	archive_entry_paths.3 archive_entry_set_hardlink.3
 MLINKS+=	archive_entry_paths.3 archive_entry_set_link.3
 MLINKS+=	archive_entry_paths.3 archive_entry_set_pathname.3
 MLINKS+=	archive_entry_paths.3 archive_entry_set_symlink.3
 MLINKS+=	archive_entry_paths.3 archive_entry_symlink.3
 MLINKS+=	archive_entry_paths.3 archive_entry_symlink_w.3
 MLINKS+=	archive_entry_paths.3 archive_entry_update_symlink_utf8.3
 MLINKS+=	archive_entry_paths.3 archive_entry_update_hardlink_utf8.3
 MLINKS+=	archive_entry_perms.3 archive_entry_copy_fflags_text.3
 MLINKS+=	archive_entry_perms.3 archive_entry_copy_fflags_text_w.3
 MLINKS+=	archive_entry_perms.3 archive_entry_copy_gname.3
 MLINKS+=	archive_entry_perms.3 archive_entry_copy_gname_w.3
 MLINKS+=	archive_entry_perms.3 archive_entry_copy_uname.3
 MLINKS+=	archive_entry_perms.3 archive_entry_copy_uname_w.3
 MLINKS+=	archive_entry_perms.3 archive_entry_fflags.3
 MLINKS+=	archive_entry_perms.3 archive_entry_fflags_text.3
 MLINKS+=	archive_entry_perms.3 archive_entry_gid.3
 MLINKS+=	archive_entry_perms.3 archive_entry_gname.3
 MLINKS+=	archive_entry_perms.3 archive_entry_gname_w.3
 MLINKS+=	archive_entry_perms.3 archive_entry_set_fflags.3
 MLINKS+=	archive_entry_perms.3 archive_entry_set_gid.3
 MLINKS+=	archive_entry_perms.3 archive_entry_set_gname.3
 MLINKS+=	archive_entry_perms.3 archive_entry_perm.3
 MLINKS+=	archive_entry_perms.3 archive_entry_set_perm.3
 MLINKS+=	archive_entry_perms.3 archive_entry_set_uid.3
 MLINKS+=	archive_entry_perms.3 archive_entry_set_uname.3
 MLINKS+=	archive_entry_perms.3 archive_entry_strmode.3
 MLINKS+=	archive_entry_perms.3 archive_entry_uid.3
 MLINKS+=	archive_entry_perms.3 archive_entry_uname.3
 MLINKS+=	archive_entry_perms.3 archive_entry_uname_w.3
 MLINKS+=	archive_entry_perms.3 archive_entry_update_gname_utf8.3
 MLINKS+=	archive_entry_perms.3 archive_entry_update_uname_utf8.3
 MLINKS+=	archive_entry_stat.3 archive_entry_copy_stat.3
 MLINKS+=	archive_entry_stat.3 archive_entry_dev.3
 MLINKS+=	archive_entry_stat.3 archive_entry_dev_is_set.3
 MLINKS+=	archive_entry_stat.3 archive_entry_devmajor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_devminor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_filetype.3
 MLINKS+=	archive_entry_stat.3 archive_entry_ino.3
 MLINKS+=	archive_entry_stat.3 archive_entry_ino64.3
 MLINKS+=	archive_entry_stat.3 archive_entry_ino_is_set.3
 MLINKS+=	archive_entry_stat.3 archive_entry_mode.3
 MLINKS+=	archive_entry_stat.3 archive_entry_nlink.3
 MLINKS+=	archive_entry_stat.3 archive_entry_rdev.3
 MLINKS+=	archive_entry_stat.3 archive_entry_rdevmajor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_rdevminor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_dev.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_devmajor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_devminor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_filetype.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_ino.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_ino64.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_mode.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_nlink.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_rdev.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_rdevmajor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_rdevminor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_size.3
 MLINKS+=	archive_entry_stat.3 archive_entry_size.3
 MLINKS+=	archive_entry_stat.3 archive_entry_size_is_set.3
 MLINKS+=	archive_entry_stat.3 archive_entry_unset_size.3
 MLINKS+=	archive_entry_time.3 archive_entry_atime.3
 MLINKS+=	archive_entry_time.3 archive_entry_atime_is_set.3
 MLINKS+=	archive_entry_time.3 archive_entry_atime_nsec.3
 MLINKS+=	archive_entry_time.3 archive_entry_birthtime.3
 MLINKS+=	archive_entry_time.3 archive_entry_birthtime_is_set.3
 MLINKS+=	archive_entry_time.3 archive_entry_birthtime_nsec.3
 MLINKS+=	archive_entry_time.3 archive_entry_ctime.3
 MLINKS+=	archive_entry_time.3 archive_entry_ctime_is_set.3
 MLINKS+=	archive_entry_time.3 archive_entry_ctime_nsec.3
 MLINKS+=	archive_entry_time.3 archive_entry_mtime.3
 MLINKS+=	archive_entry_time.3 archive_entry_mtime_is_set.3
 MLINKS+=	archive_entry_time.3 archive_entry_mtime_nsec.3
 MLINKS+=	archive_entry_time.3 archive_entry_set_atime.3
 MLINKS+=	archive_entry_time.3 archive_entry_set_birthtime.3
 MLINKS+=	archive_entry_time.3 archive_entry_set_ctime.3
 MLINKS+=	archive_entry_time.3 archive_entry_set_mtime.3
 MLINKS+=	archive_entry_time.3 archive_entry_unset_atime.3
 MLINKS+=	archive_entry_time.3 archive_entry_unset_birthtime.3
 MLINKS+=	archive_entry_time.3 archive_entry_unset_ctime.3
 MLINKS+=	archive_entry_time.3 archive_entry_unset_mtime.3
 MLINKS+=	archive_read_data.3 archive_read_data_block.3
 MLINKS+=	archive_read_data.3 archive_read_data_into_fd.3
 MLINKS+=	archive_read_data.3 archive_read_data_skip.3
 MLINKS+=	archive_read_header.3 archive_read_next_header.3
 MLINKS+=	archive_read_header.3 archive_read_next_header2.3
 MLINKS+=	archive_read_extract.3 archive_read_extract2.3
 MLINKS+=	archive_read_extract.3 archive_read_extract_set_progress_callback.3
 MLINKS+=	archive_read_extract.3 archive_read_extract_set_skip_file.3
 MLINKS+=	archive_read_open.3 archive_read_open2.3
 MLINKS+=	archive_read_open.3 archive_read_open_FILE.3
 MLINKS+=	archive_read_open.3 archive_read_open_fd.3
 MLINKS+=	archive_read_open.3 archive_read_open_file.3
 MLINKS+=	archive_read_open.3 archive_read_open_filename.3
 MLINKS+=	archive_read_open.3 archive_read_open_memory.3
 MLINKS+=	archive_read_free.3 archive_read_close.3
 MLINKS+=	archive_read_free.3 archive_read_finish.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_all.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_bzip2.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_compress.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_gzip.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_lzma.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_none.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_xz.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_program.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_program_signature.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_7zip.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_all.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_ar.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_by_code.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_cab.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_cpio.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_empty.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_iso9660.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_lha.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_mtree.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_rar.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_raw.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_tar.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_xar.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_zip.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_entry_from_file.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_gname.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_new.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_set_gname_lookup.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_set_standard_lookup.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_set_symlink_hybrid.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_set_symlink_logical.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_set_symlink_physical.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_set_uname_lookup.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_uname.3
 MLINKS+=	archive_read_set_options.3 archive_read_set_filter_option.3
 MLINKS+=	archive_read_set_options.3 archive_read_set_format_option.3
 MLINKS+=	archive_read_set_options.3 archive_read_set_option.3
 MLINKS+=	archive_util.3 archive_clear_error.3
 MLINKS+=	archive_util.3 archive_compression.3
 MLINKS+=	archive_util.3 archive_compression_name.3
 MLINKS+=	archive_util.3 archive_copy_error.3
 MLINKS+=	archive_util.3 archive_errno.3
 MLINKS+=	archive_util.3 archive_error_string.3
 MLINKS+=	archive_util.3 archive_file_count.3
 MLINKS+=	archive_util.3 archive_filter_code.3
 MLINKS+=	archive_util.3 archive_filter_count.3
 MLINKS+=	archive_util.3 archive_filter_name.3
 MLINKS+=	archive_util.3 archive_format.3
 MLINKS+=	archive_util.3 archive_format_name.3
 MLINKS+=	archive_util.3 archive_position.3
 MLINKS+=	archive_util.3 archive_set_error.3
 MLINKS+=	archive_write_blocksize.3 archive_write_get_bytes_in_last_block.3
 MLINKS+=	archive_write_blocksize.3 archive_write_get_bytes_per_block.3
 MLINKS+=	archive_write_blocksize.3 archive_write_set_bytes_in_last_block.3
 MLINKS+=	archive_write_blocksize.3 archive_write_set_bytes_per_block.3
 MLINKS+=	archive_write_disk.3 archive_write_data_block.3
 MLINKS+=	archive_write_disk.3 archive_write_disk_new.3
 MLINKS+=	archive_write_disk.3 archive_write_disk_set_group_lookup.3
 MLINKS+=	archive_write_disk.3 archive_write_disk_set_options.3
 MLINKS+=	archive_write_disk.3 archive_write_disk_set_skip_file.3
 MLINKS+=	archive_write_disk.3 archive_write_disk_set_standard_lookup.3
 MLINKS+=	archive_write_disk.3 archive_write_disk_set_user_lookup.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_bzip2.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_compress.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_gzip.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_lzip.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_lzma.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_none.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_program.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_xz.3
 MLINKS+=	archive_write_format.3 archive_write_set_format_cpio.3
 MLINKS+=	archive_write_format.3 archive_write_set_format_pax.3
 MLINKS+=	archive_write_format.3 archive_write_set_format_pax_restricted.3
 MLINKS+=	archive_write_format.3 archive_write_set_format_shar.3
 MLINKS+=	archive_write_format.3 archive_write_set_format_shar_dump.3
 MLINKS+=	archive_write_format.3 archive_write_set_format_ustar.3
 MLINKS+=	archive_write_free.3 archive_write_close.3
 MLINKS+=	archive_write_free.3 archive_write_fail.3
 MLINKS+=	archive_write_free.3 archive_write_finish.3
 MLINKS+=	archive_write_open.3 archive_write_open_FILE.3
 MLINKS+=	archive_write_open.3 archive_write_open_fd.3
 MLINKS+=	archive_write_open.3 archive_write_open_file.3
 MLINKS+=	archive_write_open.3 archive_write_open_filename.3
 MLINKS+=	archive_write_open.3 archive_write_open_memory.3
 MLINKS+=	archive_write_set_options.3 archive_write_set_filter_option.3
 MLINKS+=	archive_write_set_options.3 archive_write_set_format_option.3
 MLINKS+=	archive_write_set_options.3 archive_write_set_option.3
 MLINKS+=	libarchive.3 archive.3
 
 HAS_TESTS=
 SUBDIR.${MK_TESTS}+= tests
 
 .include <bsd.lib.mk>
Index: projects/clang900-import/lib/libarchive/tests/Makefile
===================================================================
--- projects/clang900-import/lib/libarchive/tests/Makefile	(revision 352536)
+++ projects/clang900-import/lib/libarchive/tests/Makefile	(revision 352537)
@@ -1,628 +1,631 @@
 # $FreeBSD$
 
 PACKAGE=	tests
 
 _LIBARCHIVEDIR=	${SRCTOP}/contrib/libarchive
 
 ATF_TESTS_SH+=	functional_test
 
 TEST_METADATA.functional_test+=	timeout="600"
 
 BINDIR=	${TESTSDIR}
 
 PROGS+=	libarchive_test
 
 CFLAGS+= -I${.CURDIR} -I${.CURDIR:H} -I${.OBJDIR}
 CFLAGS+= -I${_LIBARCHIVEDIR}/libarchive -I${_LIBARCHIVEDIR}/libarchive/test
 CFLAGS+= -I${_LIBARCHIVEDIR}/test_utils
 CFLAGS+= -DHAVE_LIBLZMA=1 -DHAVE_LZMA_H=1
 
 # Uncomment to link against dmalloc
 #LDADD+= -L/usr/local/lib -ldmalloc
 #CFLAGS+= -I/usr/local/include -DUSE_DMALLOC
 
 .PATH: ${_LIBARCHIVEDIR}/libarchive/test
 TESTS_SRCS= \
 	test_acl_nfs4.c				\
 	test_acl_pax.c				\
 	test_acl_platform_nfs4.c		\
 	test_acl_platform_posix1e.c		\
 	test_acl_posix1e.c			\
 	test_acl_text.c				\
 	test_archive_api_feature.c		\
 	test_archive_clear_error.c		\
 	test_archive_cmdline.c			\
 	test_archive_digest.c			\
 	test_archive_getdate.c			\
 	test_archive_match_time.c		\
 	test_archive_match_owner.c		\
 	test_archive_match_path.c		\
 	test_archive_pathmatch.c		\
 	test_archive_read_add_passphrase.c	\
 	test_archive_read_close_twice.c		\
 	test_archive_read_close_twice_open_fd.c	\
 	test_archive_read_close_twice_open_filename.c	\
 	test_archive_read_multiple_data_objects.c	\
 	test_archive_read_next_header_empty.c	\
 	test_archive_read_next_header_raw.c	\
 	test_archive_read_open2.c		\
 	test_archive_read_set_filter_option.c	\
 	test_archive_read_set_format_option.c	\
 	test_archive_read_set_option.c		\
 	test_archive_read_set_options.c		\
 	test_archive_read_support.c		\
 	test_archive_set_error.c		\
 	test_archive_string.c			\
 	test_archive_string_conversion.c	\
 	test_archive_write_add_filter_by_name.c	\
 	test_archive_write_set_filter_option.c	\
 	test_archive_write_set_format_by_name.c	\
 	test_archive_write_set_format_filter_by_ext.c \
 	test_archive_write_set_format_option.c	\
 	test_archive_write_set_option.c		\
 	test_archive_write_set_options.c	\
 	test_archive_write_set_passphrase.c	\
 	test_bad_fd.c				\
 	test_compat_bzip2.c			\
 	test_compat_cpio.c			\
 	test_compat_gtar.c			\
 	test_compat_gzip.c			\
 	test_compat_lz4.c			\
 	test_compat_lzip.c			\
 	test_compat_lzma.c			\
 	test_compat_lzop.c			\
 	test_compat_mac.c			\
 	test_compat_perl_archive_tar.c		\
 	test_compat_plexus_archiver_tar.c	\
 	test_compat_solaris_tar_acl.c		\
 	test_compat_solaris_pax_sparse.c	\
 	test_compat_star_acl.c			\
 	test_compat_tar_hardlink.c		\
 	test_compat_uudecode.c			\
 	test_compat_uudecode_large.c		\
 	test_compat_xz.c			\
 	test_compat_zip.c			\
 	test_compat_zstd.c			\
 	test_empty_write.c			\
 	test_entry.c				\
 	test_entry_strmode.c			\
 	test_extattr_freebsd.c			\
 	test_filter_count.c			\
 	test_fuzz.c				\
 	test_gnutar_filename_encoding.c		\
 	test_link_resolver.c			\
 	test_open_fd.c				\
 	test_open_failure.c			\
 	test_open_file.c			\
 	test_open_filename.c			\
 	test_pax_filename_encoding.c		\
 	test_read_data_large.c			\
 	test_read_disk.c			\
 	test_read_disk_directory_traversals.c	\
 	test_read_disk_entry_from_file.c	\
 	test_read_extract.c			\
 	test_read_file_nonexistent.c		\
 	test_read_filter_compress.c		\
 	test_read_filter_grzip.c		\
 	test_read_filter_lrzip.c		\
 	test_read_filter_lzop.c			\
 	test_read_filter_lzop_multiple_parts.c	\
 	test_read_filter_program.c		\
 	test_read_filter_program_signature.c	\
 	test_read_filter_uudecode.c		\
 	test_read_format_7zip.c			\
 	test_read_format_7zip_encryption_data.c \
 	test_read_format_7zip_encryption_header.c	\
 	test_read_format_7zip_encryption_partially.c	\
 	test_read_format_7zip_malformed.c	\
 	test_read_format_ar.c			\
 	test_read_format_cab.c			\
 	test_read_format_cab_filename.c		\
 	test_read_format_cpio_afio.c		\
 	test_read_format_cpio_bin.c		\
 	test_read_format_cpio_bin_Z.c		\
 	test_read_format_cpio_bin_be.c		\
 	test_read_format_cpio_bin_bz2.c		\
 	test_read_format_cpio_bin_gz.c		\
 	test_read_format_cpio_bin_le.c		\
 	test_read_format_cpio_bin_lzip.c	\
 	test_read_format_cpio_bin_lzma.c	\
 	test_read_format_cpio_bin_xz.c		\
 	test_read_format_cpio_filename.c	\
 	test_read_format_cpio_odc.c		\
 	test_read_format_cpio_svr4_gzip.c	\
 	test_read_format_cpio_svr4c_Z.c		\
 	test_read_format_cpio_svr4_bzip2_rpm.c	\
 	test_read_format_cpio_svr4_gzip_rpm.c	\
 	test_read_format_empty.c		\
 	test_read_format_gtar_filename.c	\
 	test_read_format_gtar_gz.c		\
 	test_read_format_gtar_lzma.c		\
 	test_read_format_gtar_sparse.c		\
 	test_read_format_gtar_sparse_skip_entry.c \
 	test_read_format_iso_Z.c		\
 	test_read_format_iso_multi_extent.c	\
 	test_read_format_iso_xorriso.c		\
 	test_read_format_isorr_rr_moved.c	\
 	test_read_format_isojoliet_bz2.c	\
 	test_read_format_isojoliet_long.c	\
 	test_read_format_isojoliet_rr.c		\
 	test_read_format_isojoliet_versioned.c	\
 	test_read_format_isorr_bz2.c		\
 	test_read_format_isorr_ce.c		\
 	test_read_format_isorr_new_bz2.c	\
 	test_read_format_isozisofs_bz2.c	\
 	test_read_format_lha.c			\
 	test_read_format_lha_bugfix_0.c		\
 	test_read_format_lha_filename.c		\
 	test_read_format_mtree.c		\
 	test_read_format_mtree_crash747.c	\
 	test_read_format_pax_bz2.c		\
 	test_read_format_rar.c			\
 	test_read_format_rar5.c			\
 	test_read_format_rar_encryption_data.c	\
 	test_read_format_rar_encryption_header.c	\
 	test_read_format_rar_encryption_partially.c	\
 	test_read_format_rar_invalid1.c		\
 	test_read_format_raw.c			\
 	test_read_format_tar.c			\
 	test_read_format_tar_concatenated.c	\
 	test_read_format_tar_empty_filename.c	\
 	test_read_format_tar_empty_pax.c	\
 	test_read_format_tar_empty_with_gnulabel.c	\
 	test_read_format_tar_filename.c		\
 	test_read_format_tbz.c			\
 	test_read_format_tgz.c			\
 	test_read_format_tlz.c			\
 	test_read_format_txz.c			\
 	test_read_format_tz.c			\
 	test_read_format_ustar_filename.c	\
 	test_read_format_warc.c			\
 	test_read_format_xar.c			\
 	test_read_format_zip.c			\
 	test_read_format_zip_7075_utf8_paths.c	\
 	test_read_format_zip_comment_stored.c	\
 	test_read_format_zip_encryption_data.c	\
 	test_read_format_zip_encryption_header.c	\
 	test_read_format_zip_encryption_partially.c	\
 	test_read_format_zip_extra_padding.c	\
 	test_read_format_zip_filename.c		\
 	test_read_format_zip_high_compression.c	\
 	test_read_format_zip_jar.c		\
 	test_read_format_zip_mac_metadata.c	\
 	test_read_format_zip_malformed.c	\
 	test_read_format_zip_msdos.c		\
 	test_read_format_zip_nested.c		\
 	test_read_format_zip_nofiletype.c	\
 	test_read_format_zip_padded.c		\
 	test_read_format_zip_sfx.c		\
 	test_read_format_zip_traditional_encryption_data.c	\
 	test_read_format_zip_winzip_aes.c	\
 	test_read_format_zip_winzip_aes_large.c	\
 	test_read_format_zip_with_invalid_traditional_eocd.c	\
 	test_read_format_zip_zip64.c		\
 	test_read_large.c			\
 	test_read_pax_schily_xattr.c		\
 	test_read_pax_truncated.c		\
 	test_read_position.c			\
 	test_read_set_format.c			\
 	test_read_too_many_filters.c		\
 	test_read_truncated.c			\
 	test_read_truncated_filter.c		\
 	test_sparse_basic.c			\
 	test_tar_filenames.c			\
 	test_tar_large.c			\
 	test_warn_missing_hardlink_target.c	\
 	test_ustar_filenames.c			\
 	test_ustar_filename_encoding.c		\
 	test_write_disk.c			\
 	test_write_disk_appledouble.c		\
 	test_write_disk_failures.c		\
 	test_write_disk_hardlink.c		\
 	test_write_disk_hfs_compression.c	\
 	test_write_disk_lookup.c		\
 	test_write_disk_mac_metadata.c		\
 	test_write_disk_no_hfs_compression.c	\
 	test_write_disk_perms.c			\
 	test_write_disk_secure.c		\
 	test_write_disk_secure744.c		\
 	test_write_disk_secure745.c		\
 	test_write_disk_secure746.c		\
 	test_write_disk_sparse.c		\
 	test_write_disk_symlink.c		\
 	test_write_disk_times.c			\
 	test_write_filter_b64encode.c		\
 	test_write_filter_bzip2.c		\
 	test_write_filter_compress.c		\
 	test_write_filter_gzip.c		\
 	test_write_filter_gzip_timestamp.c	\
 	test_write_filter_lrzip.c		\
 	test_write_filter_lz4.c			\
 	test_write_filter_lzip.c		\
 	test_write_filter_lzma.c		\
 	test_write_filter_lzop.c		\
 	test_write_filter_program.c		\
 	test_write_filter_uuencode.c		\
 	test_write_filter_xz.c			\
 	test_write_filter_zstd.c		\
 	test_write_format_7zip.c		\
 	test_write_format_7zip_empty.c		\
 	test_write_format_7zip_large.c		\
 	test_write_format_ar.c			\
 	test_write_format_cpio.c		\
 	test_write_format_cpio_empty.c		\
 	test_write_format_cpio_newc.c		\
 	test_write_format_cpio_odc.c		\
 	test_write_format_gnutar.c		\
 	test_write_format_gnutar_filenames.c	\
 	test_write_format_iso9660.c		\
 	test_write_format_iso9660_boot.c	\
 	test_write_format_iso9660_empty.c	\
 	test_write_format_iso9660_filename.c	\
 	test_write_format_iso9660_zisofs.c	\
 	test_write_format_mtree.c		\
 	test_write_format_mtree_absolute_path.c	\
 	test_write_format_mtree_classic.c	\
 	test_write_format_mtree_classic_indent.c	\
 	test_write_format_mtree_fflags.c	\
 	test_write_format_mtree_no_separator.c	\
 	test_write_format_mtree_quoted_filename.c	\
 	test_write_format_pax.c			\
 	test_write_format_raw.c			\
 	test_write_format_raw_b64.c		\
 	test_write_format_shar_empty.c		\
 	test_write_format_tar.c			\
 	test_write_format_tar_empty.c		\
 	test_write_format_tar_sparse.c		\
 	test_write_format_tar_ustar.c		\
 	test_write_format_tar_v7tar.c		\
 	test_write_format_warc.c		\
 	test_write_format_warc_empty.c		\
 	test_write_format_xar.c			\
 	test_write_format_xar_empty.c		\
 	test_write_format_zip.c			\
 	test_write_format_zip_compression_store.c	\
 	test_write_format_zip_empty.c		\
 	test_write_format_zip_empty_zip64.c	\
 	test_write_format_zip_file.c		\
 	test_write_format_zip_file_zip64.c	\
 	test_write_format_zip_large.c		\
 	test_write_format_zip_zip64.c		\
 	test_write_open_memory.c		\
 	test_write_read_format_zip.c		\
 	test_xattr_platform.c			\
 	test_zip_filename_encoding.c
 
 # Deterministic failures:
 # Crashes with SIGBUS
 BROKEN_TESTS+=			test_archive_rmd160
 # Fails with `libarchive/test/test_archive_crypto.c:121: md != actualmd`
 BROKEN_TESTS+=			test_archive_sha384
 # Fails with `test_read_disk_directory_traversals.c:1094: File at has atime 886622, 1443306049 seconds ago`
 BROKEN_TESTS+=			test_read_disk_directory_traversals
 
 # Non-deterministic failures:
 # (Times out?) [and] crashes
 BROKEN_TESTS+=			test_fuzz_rar
 
+# https://bugs.freebsd.org/240683
+BROKEN_TESTS+=			test_write_filter_zstd
+
 # Build the test program.
 SRCS.libarchive_test=		\
 	${TESTS_SRCS}		\
 	read_open_memory.c	\
 	list.h
 
 LIBADD.libarchive_test=	archive
 
 .PATH: ${_LIBARCHIVEDIR}/test_utils
 SRCS.libarchive_test+=	test_main.c	\
 			test_utils.c
 
 # list.h is just a list of all tests, as indicated by DEFINE_TEST macro lines
 list.h: ${TESTS_SRCS} Makefile
 	@(cd ${_LIBARCHIVEDIR}/libarchive/test && \
 	grep -E -h ^DEFINE_TEST ${.ALLSRC:N*Makefile} | \
 	    egrep -v '${BROKEN_TESTS:tW:C/ /|/g}') > ${.TARGET}.tmp
 	@mv ${.TARGET}.tmp ${.TARGET}
 
 CLEANTESTS+=	list.h list.h.tmp
 ${PACKAGE}FILES+=	README
 ${PACKAGE}FILES+=	test_acl_pax_posix1e.tar.uu
 ${PACKAGE}FILES+=	test_acl_pax_nfs4.tar.uu
 ${PACKAGE}FILES+=	test_archive_string_conversion.txt.Z.uu
 ${PACKAGE}FILES+=	test_compat_bzip2_1.tbz.uu
 ${PACKAGE}FILES+=	test_compat_bzip2_2.tbz.uu
 ${PACKAGE}FILES+=	test_compat_cpio_1.cpio.uu
 ${PACKAGE}FILES+=	test_compat_gtar_1.tar.uu
 ${PACKAGE}FILES+=	test_compat_gtar_2.tar.uu
 ${PACKAGE}FILES+=	test_compat_gzip_1.tgz.uu
 ${PACKAGE}FILES+=	test_compat_gzip_2.tgz.uu
 ${PACKAGE}FILES+=	test_compat_lz4_1.tar.lz4.uu
 ${PACKAGE}FILES+=	test_compat_lz4_2.tar.lz4.uu
 ${PACKAGE}FILES+=	test_compat_lz4_3.tar.lz4.uu
 ${PACKAGE}FILES+=	test_compat_lz4_B4.tar.lz4.uu
 ${PACKAGE}FILES+=	test_compat_lz4_B4BD.tar.lz4.uu
 ${PACKAGE}FILES+=	test_compat_lz4_B4BDBX.tar.lz4.uu
 ${PACKAGE}FILES+=	test_compat_lz4_B5.tar.lz4.uu
 ${PACKAGE}FILES+=	test_compat_lz4_B5BD.tar.lz4.uu
 ${PACKAGE}FILES+=	test_compat_lz4_B6.tar.lz4.uu
 ${PACKAGE}FILES+=	test_compat_lz4_B6BD.tar.lz4.uu
 ${PACKAGE}FILES+=	test_compat_lz4_B7.tar.lz4.uu
 ${PACKAGE}FILES+=	test_compat_lz4_B7BD.tar.lz4.uu
 ${PACKAGE}FILES+=	test_compat_lzip_1.tlz.uu
 ${PACKAGE}FILES+=	test_compat_lzip_2.tlz.uu
 ${PACKAGE}FILES+=	test_compat_lzma_1.tlz.uu
 ${PACKAGE}FILES+=	test_compat_lzma_2.tlz.uu
 ${PACKAGE}FILES+=	test_compat_lzma_3.tlz.uu
 ${PACKAGE}FILES+=	test_compat_lzop_1.tar.lzo.uu
 ${PACKAGE}FILES+=	test_compat_lzop_2.tar.lzo.uu
 ${PACKAGE}FILES+=	test_compat_lzop_3.tar.lzo.uu
 ${PACKAGE}FILES+=	test_compat_mac-1.tar.Z.uu
 ${PACKAGE}FILES+=	test_compat_mac-2.tar.Z.uu
 ${PACKAGE}FILES+=	test_compat_perl_archive_tar.tar.uu
 ${PACKAGE}FILES+=	test_compat_plexus_archiver_tar.tar.uu
 ${PACKAGE}FILES+=	test_compat_solaris_pax_sparse_1.pax.Z.uu
 ${PACKAGE}FILES+=	test_compat_solaris_pax_sparse_2.pax.Z.uu
 ${PACKAGE}FILES+=	test_compat_solaris_tar_acl.tar.uu
 ${PACKAGE}FILES+=	test_compat_star_acl_nfs4.tar.uu
 ${PACKAGE}FILES+=	test_compat_star_acl_posix1e.tar.uu
 ${PACKAGE}FILES+=	test_compat_tar_hardlink_1.tar.uu
 ${PACKAGE}FILES+=	test_compat_uudecode_large.tar.Z.uu
 ${PACKAGE}FILES+=	test_compat_xz_1.txz.uu
 ${PACKAGE}FILES+=	test_compat_zip_1.zip.uu
 ${PACKAGE}FILES+=	test_compat_zip_2.zip.uu
 ${PACKAGE}FILES+=	test_compat_zip_3.zip.uu
 ${PACKAGE}FILES+=	test_compat_zip_4.zip.uu
 ${PACKAGE}FILES+=	test_compat_zip_5.zip.uu
 ${PACKAGE}FILES+=	test_compat_zip_6.zip.uu
 ${PACKAGE}FILES+=	test_compat_zip_7.xps.uu
 ${PACKAGE}FILES+=	test_compat_zip_8.zip.uu
 ${PACKAGE}FILES+=	test_compat_zstd_1.tar.zst.uu
 ${PACKAGE}FILES+=	test_fuzz.cab.uu
 ${PACKAGE}FILES+=	test_fuzz.lzh.uu
 ${PACKAGE}FILES+=	test_fuzz_1.iso.Z.uu
 ${PACKAGE}FILES+=	test_pax_filename_encoding.tar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_multiple_files.part1.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_multiple_files.part2.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_multiple_files.part3.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_multiple_files.part4.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_multiple_files.part5.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_multiple_files.part6.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_single_file.part1.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_single_file.part2.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_single_file.part3.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_uncompressed_files.part01.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_uncompressed_files.part02.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_uncompressed_files.part03.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_uncompressed_files.part04.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_uncompressed_files.part05.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_uncompressed_files.part06.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_uncompressed_files.part07.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_uncompressed_files.part08.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_uncompressed_files.part09.rar.uu
 ${PACKAGE}FILES+=	test_rar_multivolume_uncompressed_files.part10.rar.uu
 ${PACKAGE}FILES+=	test_read_filter_grzip.tar.grz.uu
 ${PACKAGE}FILES+=	test_read_filter_lrzip.tar.lrz.uu
 ${PACKAGE}FILES+=	test_read_filter_lzop.tar.lzo.uu
 ${PACKAGE}FILES+=	test_read_filter_lzop_multiple_parts.tar.lzo.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_bcj2_bzip2.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_bcj2_copy_1.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_bcj2_copy_2.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_bcj2_copy_lzma.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_bcj2_deflate.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_bcj2_lzma1_1.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_bcj2_lzma1_2.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_bcj2_lzma2_1.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_bcj2_lzma2_2.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_bcj_bzip2.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_bcj_copy.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_bcj_deflate.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_bcj_lzma1.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_bcj_lzma2.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_bzip2.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_copy.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_copy_2.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_deflate.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_delta_lzma1.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_delta_lzma2.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_empty_archive.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_empty_file.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_encryption.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_encryption_header.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_encryption_partially.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_lzma1.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_lzma1_2.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_lzma1_lzma2.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_lzma2.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_malformed.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_malformed2.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_ppmd.7z.uu
 ${PACKAGE}FILES+=	test_read_format_7zip_symbolic_name.7z.uu
 ${PACKAGE}FILES+=	test_read_format_ar.ar.uu
 ${PACKAGE}FILES+=	test_read_format_cab_1.cab.uu
 ${PACKAGE}FILES+=	test_read_format_cab_2.cab.uu
 ${PACKAGE}FILES+=	test_read_format_cab_3.cab.uu
 ${PACKAGE}FILES+=	test_read_format_cab_filename_cp932.cab.uu
 ${PACKAGE}FILES+=	test_read_format_cpio_bin_be.cpio.uu
 ${PACKAGE}FILES+=	test_read_format_cpio_bin_le.cpio.uu
 ${PACKAGE}FILES+=	test_read_format_cpio_filename_cp866.cpio.uu
 ${PACKAGE}FILES+=	test_read_format_cpio_filename_eucjp.cpio.uu
 ${PACKAGE}FILES+=	test_read_format_cpio_filename_koi8r.cpio.uu
 ${PACKAGE}FILES+=	test_read_format_cpio_filename_utf8_jp.cpio.uu
 ${PACKAGE}FILES+=	test_read_format_cpio_filename_utf8_ru.cpio.uu
 ${PACKAGE}FILES+=	test_read_format_cpio_svr4_bzip2_rpm.rpm.uu
 ${PACKAGE}FILES+=	test_read_format_cpio_svr4_gzip_rpm.rpm.uu
 ${PACKAGE}FILES+=	test_read_format_gtar_filename_cp866.tar.Z.uu
 ${PACKAGE}FILES+=	test_read_format_gtar_filename_eucjp.tar.Z.uu
 ${PACKAGE}FILES+=	test_read_format_gtar_filename_koi8r.tar.Z.uu
 ${PACKAGE}FILES+=	test_read_format_gtar_sparse_1_13.tar.uu
 ${PACKAGE}FILES+=	test_read_format_gtar_sparse_1_17.tar.uu
 ${PACKAGE}FILES+=	test_read_format_gtar_sparse_1_17_posix00.tar.uu
 ${PACKAGE}FILES+=	test_read_format_gtar_sparse_1_17_posix01.tar.uu
 ${PACKAGE}FILES+=	test_read_format_gtar_sparse_1_17_posix10.tar.uu
 ${PACKAGE}FILES+=	test_read_format_gtar_sparse_1_17_posix10_modified.tar.uu
 ${PACKAGE}FILES+=	test_read_format_gtar_sparse_skip_entry.tar.Z.uu
 ${PACKAGE}FILES+=	test_read_format_iso.iso.Z.uu
 ${PACKAGE}FILES+=	test_read_format_iso_2.iso.Z.uu
 ${PACKAGE}FILES+=	test_read_format_iso_joliet.iso.Z.uu
 ${PACKAGE}FILES+=	test_read_format_iso_joliet_by_nero.iso.Z.uu
 ${PACKAGE}FILES+=	test_read_format_iso_joliet_long.iso.Z.uu
 ${PACKAGE}FILES+=	test_read_format_iso_joliet_rockridge.iso.Z.uu
 ${PACKAGE}FILES+=	test_read_format_iso_multi_extent.iso.Z.uu
 ${PACKAGE}FILES+=	test_read_format_iso_rockridge.iso.Z.uu
 ${PACKAGE}FILES+=	test_read_format_iso_rockridge_ce.iso.Z.uu
 ${PACKAGE}FILES+=	test_read_format_iso_rockridge_new.iso.Z.uu
 ${PACKAGE}FILES+=	test_read_format_iso_rockridge_rr_moved.iso.Z.uu
 ${PACKAGE}FILES+=	test_read_format_iso_xorriso.iso.Z.uu
 ${PACKAGE}FILES+=	test_read_format_iso_zisofs.iso.Z.uu
 ${PACKAGE}FILES+=	test_read_format_lha_bugfix_0.lzh.uu
 ${PACKAGE}FILES+=	test_read_format_lha_filename_cp932.lzh.uu
 ${PACKAGE}FILES+=	test_read_format_lha_header0.lzh.uu
 ${PACKAGE}FILES+=	test_read_format_lha_header1.lzh.uu
 ${PACKAGE}FILES+=	test_read_format_lha_header2.lzh.uu
 ${PACKAGE}FILES+=	test_read_format_lha_header3.lzh.uu
 ${PACKAGE}FILES+=	test_read_format_lha_lh0.lzh.uu
 ${PACKAGE}FILES+=	test_read_format_lha_lh6.lzh.uu
 ${PACKAGE}FILES+=	test_read_format_lha_lh7.lzh.uu
 ${PACKAGE}FILES+=	test_read_format_lha_withjunk.lzh.uu
 ${PACKAGE}FILES+=	test_read_format_mtree.mtree.uu
 ${PACKAGE}FILES+=	test_read_format_mtree_crash747.mtree.bz2.uu
 ${PACKAGE}FILES+=	test_read_format_mtree_nomagic.mtree.uu
 ${PACKAGE}FILES+=	test_read_format_mtree_nomagic2.mtree.uu
 ${PACKAGE}FILES+=	test_read_format_mtree_nomagic3.mtree.uu
 ${PACKAGE}FILES+=	test_read_format_mtree_noprint.mtree.uu
 ${PACKAGE}FILES+=	test_read_format_rar.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_binary_data.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_compress_best.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_compress_normal.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_encryption_data.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_encryption_header.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_encryption_partially.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_invalid1.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_multi_lzss_blocks.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_multivolume.part0001.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_multivolume.part0002.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_multivolume.part0003.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_multivolume.part0004.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_noeof.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_ppmd_lzss_conversion.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_ppmd_use_after_free.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_ppmd_use_after_free2.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_sfx.exe.uu
 ${PACKAGE}FILES+=	test_read_format_rar_subblock.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_unicode.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar_windows.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_arm.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_arm_filter_on_window_boundary.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_blake2.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_compressed.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_different_window_size.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_distance_overflow.rar.uu
-${PACKAGE}FILES+=	test_read_format_rar5_extra_field_version.rar.uu	
+${PACKAGE}FILES+=	test_read_format_rar5_extra_field_version.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_fileattr.rar.uu
-${PACKAGE}FILES+=	test_read_format_rar5_hardlink.rar.uu	
-${PACKAGE}FILES+=	test_read_format_rar5_invalid_dict_reference.rar.uu	
-${PACKAGE}FILES+=	test_read_format_rar5_leftshift1.rar.uu	
-${PACKAGE}FILES+=	test_read_format_rar5_leftshift2.rar.uu	
+${PACKAGE}FILES+=	test_read_format_rar5_hardlink.rar.uu
+${PACKAGE}FILES+=	test_read_format_rar5_invalid_dict_reference.rar.uu
+${PACKAGE}FILES+=	test_read_format_rar5_leftshift1.rar.uu
+${PACKAGE}FILES+=	test_read_format_rar5_leftshift2.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_multiarchive.part01.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_multiarchive.part02.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_multiarchive.part03.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_multiarchive.part04.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_multiarchive.part05.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_multiarchive.part06.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_multiarchive.part07.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_multiarchive.part08.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_multiarchive_solid.part01.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_multiarchive_solid.part02.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_multiarchive_solid.part03.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_multiarchive_solid.part04.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_multiple_files.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_multiple_files_solid.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_nonempty_dir_stream.rar.uu
-${PACKAGE}FILES+=	test_read_format_rar5_owner.rar.uu	
-${PACKAGE}FILES+=	test_read_format_rar5_readtables_overflow.rar.uu	
+${PACKAGE}FILES+=	test_read_format_rar5_owner.rar.uu
+${PACKAGE}FILES+=	test_read_format_rar5_readtables_overflow.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_solid.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_stored.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_stored_manyfiles.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_symlink.rar.uu
-${PACKAGE}FILES+=	test_read_format_rar5_truncated_huff.rar.uu	
+${PACKAGE}FILES+=	test_read_format_rar5_truncated_huff.rar.uu
 ${PACKAGE}FILES+=	test_read_format_rar5_win32.rar.uu
 ${PACKAGE}FILES+=	test_read_format_raw.bufr.uu
 ${PACKAGE}FILES+=	test_read_format_raw.data.Z.uu
 ${PACKAGE}FILES+=	test_read_format_raw.data.gz.uu
 ${PACKAGE}FILES+=	test_read_format_raw.data.uu
 ${PACKAGE}FILES+=	test_read_format_tar_concatenated.tar.uu
 ${PACKAGE}FILES+=	test_read_format_tar_empty_filename.tar.uu
 ${PACKAGE}FILES+=	test_read_format_tar_empty_with_gnulabel.tar.uu
 ${PACKAGE}FILES+=	test_read_format_tar_empty_pax.tar.Z.uu
 ${PACKAGE}FILES+=	test_read_format_tar_filename_koi8r.tar.Z.uu
 ${PACKAGE}FILES+=	test_read_format_ustar_filename_cp866.tar.Z.uu
 ${PACKAGE}FILES+=	test_read_format_ustar_filename_eucjp.tar.Z.uu
 ${PACKAGE}FILES+=	test_read_format_ustar_filename_koi8r.tar.Z.uu
 ${PACKAGE}FILES+=	test_read_format_warc.warc.uu
 ${PACKAGE}FILES+=	test_read_format_zip.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_7075_utf8_paths.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_bz2_hang.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_bzip2.zipx.uu
 ${PACKAGE}FILES+=	test_read_format_zip_bzip2_multi.zipx.uu
 ${PACKAGE}FILES+=	test_read_format_zip_comment_stored_1.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_comment_stored_2.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_encryption_data.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_encryption_header.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_encryption_partially.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_extra_padding.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_filename_cp866.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_filename_cp932.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_filename_koi8r.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_filename_utf8_jp.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_filename_utf8_ru.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_filename_utf8_ru2.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_high_compression.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_jar.jar.uu
 ${PACKAGE}FILES+=	test_read_format_zip_length_at_end.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_lzma_alone_leak.zipx.uu
 ${PACKAGE}FILES+=	test_read_format_zip_lzma.zipx.uu
 ${PACKAGE}FILES+=	test_read_format_zip_lzma_multi.zipx.uu
 ${PACKAGE}FILES+=	test_read_format_zip_mac_metadata.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_malformed1.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_msdos.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_nested.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_nofiletype.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_padded1.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_padded2.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_padded3.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_ppmd8.zipx.uu
 ${PACKAGE}FILES+=	test_read_format_zip_ppmd8_crash_1.zipx.uu
 ${PACKAGE}FILES+=	test_read_format_zip_ppmd8_crash_2.zipx.uu
 ${PACKAGE}FILES+=	test_read_format_zip_ppmd8_multi.zipx.uu
 ${PACKAGE}FILES+=	test_read_format_zip_sfx.uu
 ${PACKAGE}FILES+=	test_read_format_zip_symlink.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_traditional_encryption_data.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_ux.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_with_invalid_traditional_eocd.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_winzip_aes128.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_winzip_aes256.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_winzip_aes256_large.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_winzip_aes256_stored.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_xz_multi.zipx.uu
 ${PACKAGE}FILES+=	test_read_format_zip_zip64a.zip.uu
 ${PACKAGE}FILES+=	test_read_format_zip_zip64b.zip.uu
 ${PACKAGE}FILES+=	test_read_large_splitted_rar_aa.uu
 ${PACKAGE}FILES+=	test_read_large_splitted_rar_ab.uu
 ${PACKAGE}FILES+=	test_read_large_splitted_rar_ac.uu
 ${PACKAGE}FILES+=	test_read_large_splitted_rar_ad.uu
 ${PACKAGE}FILES+=	test_read_large_splitted_rar_ae.uu
 ${PACKAGE}FILES+=	test_read_pax_schily_xattr.tar.uu
 ${PACKAGE}FILES+=	test_read_splitted_rar_aa.uu
 ${PACKAGE}FILES+=	test_read_splitted_rar_ab.uu
 ${PACKAGE}FILES+=	test_read_splitted_rar_ac.uu
 ${PACKAGE}FILES+=	test_read_splitted_rar_ad.uu
 ${PACKAGE}FILES+=	test_read_too_many_filters.gz.uu
 ${PACKAGE}FILES+=	test_splitted_rar_seek_support_aa.uu
 ${PACKAGE}FILES+=	test_splitted_rar_seek_support_ab.uu
 ${PACKAGE}FILES+=	test_splitted_rar_seek_support_ac.uu
 ${PACKAGE}FILES+=	test_write_disk_appledouble.cpio.gz.uu
 ${PACKAGE}FILES+=	test_write_disk_hfs_compression.tgz.uu
 ${PACKAGE}FILES+=	test_write_disk_mac_metadata.tar.gz.uu
 ${PACKAGE}FILES+=	test_write_disk_no_hfs_compression.tgz.uu
 
 .include <bsd.test.mk>
Index: projects/clang900-import/lib/libbe/be.c
===================================================================
--- projects/clang900-import/lib/libbe/be.c	(revision 352536)
+++ projects/clang900-import/lib/libbe/be.c	(revision 352537)
@@ -1,1097 +1,1098 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2017 Kyle J. Kneitinger <kyle@kneit.in>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
 #include <sys/ucred.h>
 
 #include <ctype.h>
 #include <libgen.h>
 #include <libzfs_core.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>
 #include <unistd.h>
 
 #include "be.h"
 #include "be_impl.h"
 
 struct be_destroy_data {
 	libbe_handle_t		*lbh;
 	char			*snapname;
 };
 
 #if SOON
 static int be_create_child_noent(libbe_handle_t *lbh, const char *active,
     const char *child_path);
 static int be_create_child_cloned(libbe_handle_t *lbh, const char *active);
 #endif
 
 /* Arbitrary... should tune */
 #define	BE_SNAP_SERIAL_MAX	1024
 
 /*
  * Iterator function for locating the rootfs amongst the children of the
  * zfs_be_root set by loader(8).  data is expected to be a libbe_handle_t *.
  */
 static int
 be_locate_rootfs(libbe_handle_t *lbh)
 {
 	struct statfs sfs;
 	struct extmnttab entry;
 	zfs_handle_t *zfs;
 
 	/*
 	 * Check first if root is ZFS; if not, we'll bail on rootfs capture.
 	 * Unfortunately needed because zfs_path_to_zhandle will emit to
 	 * stderr if / isn't actually a ZFS filesystem, which we'd like
 	 * to avoid.
 	 */
 	if (statfs("/", &sfs) == 0) {
 		statfs2mnttab(&sfs, &entry);
 		if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
 			return (1);
 	} else
 		return (1);
 	zfs = zfs_path_to_zhandle(lbh->lzh, "/", ZFS_TYPE_FILESYSTEM);
 	if (zfs == NULL)
 		return (1);
 
 	strlcpy(lbh->rootfs, zfs_get_name(zfs), sizeof(lbh->rootfs));
 	zfs_close(zfs);
 	return (0);
 }
 
 /*
  * Initializes the libbe context to operate in the root boot environment
  * dataset, for example, zroot/ROOT.
  */
 libbe_handle_t *
 libbe_init(const char *root)
 {
 	char altroot[MAXPATHLEN];
 	libbe_handle_t *lbh;
 	char *poolname, *pos;
 	int pnamelen;
 
 	lbh = NULL;
 	poolname = pos = NULL;
 
 	if ((lbh = calloc(1, sizeof(libbe_handle_t))) == NULL)
 		goto err;
 
 	if ((lbh->lzh = libzfs_init()) == NULL)
 		goto err;
 
 	/*
 	 * Grab rootfs, we'll work backwards from there if an optional BE root
 	 * has not been passed in.
 	 */
 	if (be_locate_rootfs(lbh) != 0) {
 		if (root == NULL)
 			goto err;
 		*lbh->rootfs = '\0';
 	}
 	if (root == NULL) {
 		/* Strip off the final slash from rootfs to get the be root */
 		strlcpy(lbh->root, lbh->rootfs, sizeof(lbh->root));
 		pos = strrchr(lbh->root, '/');
 		if (pos == NULL)
 			goto err;
 		*pos = '\0';
 	} else
 		strlcpy(lbh->root, root, sizeof(lbh->root));
 
 	if ((pos = strchr(lbh->root, '/')) == NULL)
 		goto err;
 
 	pnamelen = pos - lbh->root;
 	poolname = malloc(pnamelen + 1);
 	if (poolname == NULL)
 		goto err;
 
 	strlcpy(poolname, lbh->root, pnamelen + 1);
 	if ((lbh->active_phandle = zpool_open(lbh->lzh, poolname)) == NULL)
 		goto err;
 	free(poolname);
 	poolname = NULL;
 
 	if (zpool_get_prop(lbh->active_phandle, ZPOOL_PROP_BOOTFS, lbh->bootfs,
 	    sizeof(lbh->bootfs), NULL, true) != 0)
 		goto err;
 
 	if (zpool_get_prop(lbh->active_phandle, ZPOOL_PROP_ALTROOT,
 	    altroot, sizeof(altroot), NULL, true) == 0 &&
 	    strcmp(altroot, "-") != 0)
 		lbh->altroot_len = strlen(altroot);
 
 	return (lbh);
 err:
 	if (lbh != NULL) {
 		if (lbh->active_phandle != NULL)
 			zpool_close(lbh->active_phandle);
 		if (lbh->lzh != NULL)
 			libzfs_fini(lbh->lzh);
 		free(lbh);
 	}
 	free(poolname);
 	return (NULL);
 }
 
 
 /*
  * Free memory allocated by libbe_init()
  */
 void
 libbe_close(libbe_handle_t *lbh)
 {
 
 	if (lbh->active_phandle != NULL)
 		zpool_close(lbh->active_phandle);
 	libzfs_fini(lbh->lzh);
 	free(lbh);
 }
 
 /*
  * Proxy through to libzfs for the moment.
  */
 void
 be_nicenum(uint64_t num, char *buf, size_t buflen)
 {
 
 	zfs_nicenum(num, buf, buflen);
 }
 
 static int
 be_destroy_cb(zfs_handle_t *zfs_hdl, void *data)
 {
 	char path[BE_MAXPATHLEN];
 	struct be_destroy_data *bdd;
 	zfs_handle_t *snap;
 	int err;
 
 	bdd = (struct be_destroy_data *)data;
 	if (bdd->snapname == NULL) {
 		err = zfs_iter_children(zfs_hdl, be_destroy_cb, data);
 		if (err != 0)
 			return (err);
 		return (zfs_destroy(zfs_hdl, false));
 	}
 	/* If we're dealing with snapshots instead, delete that one alone */
 	err = zfs_iter_filesystems(zfs_hdl, be_destroy_cb, data);
 	if (err != 0)
 		return (err);
 	/*
 	 * This part is intentionally glossing over any potential errors,
 	 * because there's a lot less potential for errors when we're cleaning
 	 * up snapshots rather than a full deep BE.  The primary error case
 	 * here being if the snapshot doesn't exist in the first place, which
 	 * the caller will likely deem insignificant as long as it doesn't
 	 * exist after the call.  Thus, such a missing snapshot shouldn't jam
 	 * up the destruction.
 	 */
 	snprintf(path, sizeof(path), "%s@%s", zfs_get_name(zfs_hdl),
 	    bdd->snapname);
 	if (!zfs_dataset_exists(bdd->lbh->lzh, path, ZFS_TYPE_SNAPSHOT))
 		return (0);
 	snap = zfs_open(bdd->lbh->lzh, path, ZFS_TYPE_SNAPSHOT);
 	if (snap != NULL)
 		zfs_destroy(snap, false);
 	return (0);
 }
 
 /*
  * Destroy the boot environment or snapshot specified by the name
  * parameter. Options are or'd together with the possible values:
  * BE_DESTROY_FORCE : forces operation on mounted datasets
  * BE_DESTROY_ORIGIN: destroy the origin snapshot as well
  */
 int
 be_destroy(libbe_handle_t *lbh, const char *name, int options)
 {
 	struct be_destroy_data bdd;
 	char origin[BE_MAXPATHLEN], path[BE_MAXPATHLEN];
 	zfs_handle_t *fs;
 	char *snapdelim;
 	int err, force, mounted;
 	size_t rootlen;
 
 	bdd.lbh = lbh;
 	bdd.snapname = NULL;
 	force = options & BE_DESTROY_FORCE;
 	*origin = '\0';
 
 	be_root_concat(lbh, name, path);
 
 	if ((snapdelim = strchr(path, '@')) == NULL) {
 		if (!zfs_dataset_exists(lbh->lzh, path, ZFS_TYPE_FILESYSTEM))
 			return (set_error(lbh, BE_ERR_NOENT));
 
 		if (strcmp(path, lbh->rootfs) == 0 ||
 		    strcmp(path, lbh->bootfs) == 0)
 			return (set_error(lbh, BE_ERR_DESTROYACT));
 
 		fs = zfs_open(lbh->lzh, path, ZFS_TYPE_FILESYSTEM);
 		if (fs == NULL)
 			return (set_error(lbh, BE_ERR_ZFSOPEN));
 
 		if ((options & BE_DESTROY_ORIGIN) != 0 &&
 		    zfs_prop_get(fs, ZFS_PROP_ORIGIN, origin, sizeof(origin),
 		    NULL, NULL, 0, 1) != 0)
 			return (set_error(lbh, BE_ERR_NOORIGIN));
 
 		/* Don't destroy a mounted dataset unless force is specified */
 		if ((mounted = zfs_is_mounted(fs, NULL)) != 0) {
 			if (force) {
 				zfs_unmount(fs, NULL, 0);
 			} else {
 				free(bdd.snapname);
 				return (set_error(lbh, BE_ERR_DESTROYMNT));
 			}
 		}
 	} else {
 		if (!zfs_dataset_exists(lbh->lzh, path, ZFS_TYPE_SNAPSHOT))
 			return (set_error(lbh, BE_ERR_NOENT));
 
 		bdd.snapname = strdup(snapdelim + 1);
 		if (bdd.snapname == NULL)
 			return (set_error(lbh, BE_ERR_NOMEM));
 		*snapdelim = '\0';
 		fs = zfs_open(lbh->lzh, path, ZFS_TYPE_DATASET);
 		if (fs == NULL) {
 			free(bdd.snapname);
 			return (set_error(lbh, BE_ERR_ZFSOPEN));
 		}
 	}
 
 	err = be_destroy_cb(fs, &bdd);
 	zfs_close(fs);
 	free(bdd.snapname);
 	if (err != 0) {
 		/* Children are still present or the mount is referenced */
 		if (err == EBUSY)
 			return (set_error(lbh, BE_ERR_DESTROYMNT));
 		return (set_error(lbh, BE_ERR_UNKNOWN));
 	}
 
 	if ((options & BE_DESTROY_ORIGIN) == 0)
 		return (0);
 
 	/* The origin can't possibly be shorter than the BE root */
 	rootlen = strlen(lbh->root);
 	if (*origin == '\0' || strlen(origin) <= rootlen + 1)
 		return (set_error(lbh, BE_ERR_INVORIGIN));
 
 	/*
 	 * We'll be chopping off the BE root and running this back through
 	 * be_destroy, so that we properly handle the origin snapshot whether
 	 * it be that of a deep BE or not.
 	 */
 	if (strncmp(origin, lbh->root, rootlen) != 0 || origin[rootlen] != '/')
 		return (0);
 
 	return (be_destroy(lbh, origin + rootlen + 1,
 	    options & ~BE_DESTROY_ORIGIN));
 }
 
 static void
 be_setup_snapshot_name(libbe_handle_t *lbh, char *buf, size_t buflen)
 {
 	time_t rawtime;
 	int len, serial;
 
 	time(&rawtime);
 	len = strlen(buf);
 	len += strftime(buf + len, buflen - len, "@%F-%T", localtime(&rawtime));
 	/* No room for serial... caller will do its best */
 	if (buflen - len < 2)
 		return;
 
 	for (serial = 0; serial < BE_SNAP_SERIAL_MAX; ++serial) {
 		snprintf(buf + len, buflen - len, "-%d", serial);
 		if (!zfs_dataset_exists(lbh->lzh, buf, ZFS_TYPE_SNAPSHOT))
 			return;
 	}
 }
 
 int
 be_snapshot(libbe_handle_t *lbh, const char *source, const char *snap_name,
     bool recursive, char *result)
 {
 	char buf[BE_MAXPATHLEN];
 	int err;
 
 	be_root_concat(lbh, source, buf);
 
 	if ((err = be_exists(lbh, buf)) != 0)
 		return (set_error(lbh, err));
 
 	if (snap_name != NULL) {
 		if (strlcat(buf, "@", sizeof(buf)) >= sizeof(buf))
 			return (set_error(lbh, BE_ERR_INVALIDNAME));
 
 		if (strlcat(buf, snap_name, sizeof(buf)) >= sizeof(buf))
 			return (set_error(lbh, BE_ERR_INVALIDNAME));
 
 		if (result != NULL)
 			snprintf(result, BE_MAXPATHLEN, "%s@%s", source,
 			    snap_name);
 	} else {
 		be_setup_snapshot_name(lbh, buf, sizeof(buf));
 
 		if (result != NULL && strlcpy(result, strrchr(buf, '/') + 1,
 		    sizeof(buf)) >= sizeof(buf))
 			return (set_error(lbh, BE_ERR_INVALIDNAME));
 	}
 	if ((err = zfs_snapshot(lbh->lzh, buf, recursive, NULL)) != 0) {
 		switch (err) {
 		case EZFS_INVALIDNAME:
 			return (set_error(lbh, BE_ERR_INVALIDNAME));
 
 		default:
 			/*
 			 * The other errors that zfs_ioc_snapshot might return
 			 * shouldn't happen if we've set things up properly, so
 			 * we'll gloss over them and call it UNKNOWN as it will
 			 * require further triage.
 			 */
 			if (errno == ENOTSUP)
 				return (set_error(lbh, BE_ERR_NOPOOL));
 			return (set_error(lbh, BE_ERR_UNKNOWN));
 		}
 	}
 
 	return (BE_ERR_SUCCESS);
 }
 
 
 /*
  * Create the boot environment specified by the name parameter
  */
 int
 be_create(libbe_handle_t *lbh, const char *name)
 {
 	int err;
 
 	err = be_create_from_existing(lbh, name, be_active_path(lbh));
 
 	return (set_error(lbh, err));
 }
 
 static int
 be_deep_clone_prop(int prop, void *cb)
 {
 	int err;
         struct libbe_dccb *dccb;
 	zprop_source_t src;
 	char pval[BE_MAXPATHLEN];
 	char source[BE_MAXPATHLEN];
 	char *val;
 
 	dccb = cb;
 	/* Skip some properties we don't want to touch */
 	if (prop == ZFS_PROP_CANMOUNT)
 		return (ZPROP_CONT);
 
 	/* Don't copy readonly properties */
 	if (zfs_prop_readonly(prop))
 		return (ZPROP_CONT);
 
 	if ((err = zfs_prop_get(dccb->zhp, prop, (char *)&pval,
 	    sizeof(pval), &src, (char *)&source, sizeof(source), false)))
 		/* Just continue if we fail to read a property */
 		return (ZPROP_CONT);
 
 	/*
 	 * Only copy locally defined or received properties.  This continues
 	 * to avoid temporary/default/local properties intentionally without
 	 * breaking received datasets.
 	 */
 	if (src != ZPROP_SRC_LOCAL && src != ZPROP_SRC_RECEIVED)
 		return (ZPROP_CONT);
 
 	/* Augment mountpoint with altroot, if needed */
 	val = pval;
 	if (prop == ZFS_PROP_MOUNTPOINT)
 		val = be_mountpoint_augmented(dccb->lbh, val);
 
 	nvlist_add_string(dccb->props, zfs_prop_to_name(prop), val);
 
 	return (ZPROP_CONT);
 }
 
 /*
  * Return the corresponding boot environment path for a given
  * dataset path, the constructed path is placed in 'result'.
  *
  * example: say our new boot environment name is 'bootenv' and
  *          the dataset path is 'zroot/ROOT/default/data/set'.
  *
  * result should produce: 'zroot/ROOT/bootenv/data/set'
  */
 static int
 be_get_path(struct libbe_deep_clone *ldc, const char *dspath, char *result, int result_size)
 {
 	char *pos;
 	char *child_dataset;
 
 	/* match the root path for the boot environments */
 	pos = strstr(dspath, ldc->lbh->root);
 
 	/* no match, different pools? */
 	if (pos == NULL)
 		return (BE_ERR_BADPATH);
 
 	/* root path of the new boot environment */
 	snprintf(result, result_size, "%s/%s", ldc->lbh->root, ldc->bename);
 
         /* gets us to the parent dataset, the +1 consumes a trailing slash */
 	pos += strlen(ldc->lbh->root) + 1;
 
 	/* skip the parent dataset */
 	if ((child_dataset = strchr(pos, '/')) != NULL)
 		strlcat(result, child_dataset, result_size);
 
 	return (BE_ERR_SUCCESS);
 }
 
 static int
 be_clone_cb(zfs_handle_t *ds, void *data)
 {
 	int err;
 	char be_path[BE_MAXPATHLEN];
 	char snap_path[BE_MAXPATHLEN];
 	const char *dspath;
 	zfs_handle_t *snap_hdl;
 	nvlist_t *props;
 	struct libbe_deep_clone *ldc;
 	struct libbe_dccb dccb;
 
 	ldc = (struct libbe_deep_clone *)data;
 	dspath = zfs_get_name(ds);
 
 	snprintf(snap_path, sizeof(snap_path), "%s@%s", dspath, ldc->snapname);
 
 	/* construct the boot environment path from the dataset we're cloning */
 	if (be_get_path(ldc, dspath, be_path, sizeof(be_path)) != BE_ERR_SUCCESS)
 		return (set_error(ldc->lbh, BE_ERR_UNKNOWN));
 
 	/* the dataset to be created (i.e. the boot environment) already exists */
 	if (zfs_dataset_exists(ldc->lbh->lzh, be_path, ZFS_TYPE_DATASET))
 		return (set_error(ldc->lbh, BE_ERR_EXISTS));
 
 	/* no snapshot found for this dataset, silently skip it */
 	if (!zfs_dataset_exists(ldc->lbh->lzh, snap_path, ZFS_TYPE_SNAPSHOT))
 		return (0);
 
 	if ((snap_hdl =
 	    zfs_open(ldc->lbh->lzh, snap_path, ZFS_TYPE_SNAPSHOT)) == NULL)
 		return (set_error(ldc->lbh, BE_ERR_ZFSOPEN));
 
 	nvlist_alloc(&props, NV_UNIQUE_NAME, KM_SLEEP);
 	nvlist_add_string(props, "canmount", "noauto");
 
 	dccb.lbh = ldc->lbh;
 	dccb.zhp = ds;
 	dccb.props = props;
 	if (zprop_iter(be_deep_clone_prop, &dccb, B_FALSE, B_FALSE,
 	    ZFS_TYPE_FILESYSTEM) == ZPROP_INVAL)
 		return (-1);
 
 	if ((err = zfs_clone(snap_hdl, be_path, props)) != 0)
 		return (set_error(ldc->lbh, BE_ERR_ZFSCLONE));
 
 	nvlist_free(props);
 	zfs_close(snap_hdl);
 
 	if (ldc->depth_limit == -1 || ldc->depth < ldc->depth_limit) {
 		ldc->depth++;
 		err = zfs_iter_filesystems(ds, be_clone_cb, ldc);
 		ldc->depth--;
 	}
 
 	return (set_error(ldc->lbh, err));
 }
 
 /*
  * Create a boot environment with a given name from a given snapshot.
  * Snapshots can be in the format 'zroot/ROOT/default@snapshot' or
  * 'default@snapshot'. In the latter case, 'default@snapshot' will be prepended
  * with the root path that libbe was initailized with.
 */
 static int
 be_clone(libbe_handle_t *lbh, const char *bename, const char *snapshot, int depth)
 {
 	int err;
 	char snap_path[BE_MAXPATHLEN];
 	char *parentname, *snapname;
 	zfs_handle_t *parent_hdl;
 	struct libbe_deep_clone ldc;
 
         /* ensure the boot environment name is valid */
 	if ((err = be_validate_name(lbh, bename)) != 0)
 		return (set_error(lbh, err));
 
 	/*
 	 * prepend the boot environment root path if we're
 	 * given a partial snapshot name.
 	 */
 	if ((err = be_root_concat(lbh, snapshot, snap_path)) != 0)
 		return (set_error(lbh, err));
 
 	/* ensure the snapshot exists */
 	if ((err = be_validate_snap(lbh, snap_path)) != 0)
 		return (set_error(lbh, err));
 
         /* get a copy of the snapshot path so we can disect it */
 	if ((parentname = strdup(snap_path)) == NULL)
 		return (set_error(lbh, BE_ERR_UNKNOWN));
 
         /* split dataset name from snapshot name */
 	snapname = strchr(parentname, '@');
 	if (snapname == NULL) {
 		free(parentname);
 		return (set_error(lbh, BE_ERR_UNKNOWN));
 	}
 	*snapname = '\0';
 	snapname++;
 
         /* set-up the boot environment */
         ldc.lbh = lbh;
         ldc.bename = bename;
         ldc.snapname = snapname;
 	ldc.depth = 0;
 	ldc.depth_limit = depth;
 
         /* the boot environment will be cloned from this dataset */
 	parent_hdl = zfs_open(lbh->lzh, parentname, ZFS_TYPE_DATASET);
 
         /* create the boot environment */
 	err = be_clone_cb(parent_hdl, &ldc);
 
 	free(parentname);
 	return (set_error(lbh, err));
 }
 
 /*
  * Create a boot environment from pre-existing snapshot, specifying a depth.
  */
 int be_create_depth(libbe_handle_t *lbh, const char *bename,
 		    const char *snap, int depth)
 {
 	return (be_clone(lbh, bename, snap, depth));
 }
 
 /*
  * Create the boot environment from pre-existing snapshot
  */
 int
 be_create_from_existing_snap(libbe_handle_t *lbh, const char *bename,
     const char *snap)
 {
 	return (be_clone(lbh, bename, snap, -1));
 }
 
 
 /*
  * Create a boot environment from an existing boot environment
  */
 int
 be_create_from_existing(libbe_handle_t *lbh, const char *bename, const char *old)
 {
 	int err;
 	char snap[BE_MAXPATHLEN];
 
 	if ((err = be_snapshot(lbh, old, NULL, true, snap)) != 0)
 		return (set_error(lbh, err));
 
         err = be_clone(lbh, bename, snap, -1);
 
 	return (set_error(lbh, err));
 }
 
 
 /*
  * Verifies that a snapshot has a valid name, exists, and has a mountpoint of
  * '/'. Returns BE_ERR_SUCCESS (0), upon success, or the relevant BE_ERR_* upon
  * failure. Does not set the internal library error state.
  */
 int
 be_validate_snap(libbe_handle_t *lbh, const char *snap_name)
 {
 
 	if (strlen(snap_name) >= BE_MAXPATHLEN)
 		return (BE_ERR_PATHLEN);
 
 	if (!zfs_name_valid(snap_name, ZFS_TYPE_SNAPSHOT))
 		return (BE_ERR_INVALIDNAME);
 
 	if (!zfs_dataset_exists(lbh->lzh, snap_name,
 	    ZFS_TYPE_SNAPSHOT))
 		return (BE_ERR_NOENT);
 
 	return (BE_ERR_SUCCESS);
 }
 
 
 /*
  * Idempotently appends the name argument to the root boot environment path
  * and copies the resulting string into the result buffer (which is assumed
  * to be at least BE_MAXPATHLEN characters long. Returns BE_ERR_SUCCESS upon
  * success, BE_ERR_PATHLEN if the resulting path is longer than BE_MAXPATHLEN,
  * or BE_ERR_INVALIDNAME if the name is a path that does not begin with
  * zfs_be_root. Does not set internal library error state.
  */
 int
 be_root_concat(libbe_handle_t *lbh, const char *name, char *result)
 {
 	size_t name_len, root_len;
 
 	name_len = strlen(name);
 	root_len = strlen(lbh->root);
 
 	/* Act idempotently; return be name if it is already a full path */
 	if (strrchr(name, '/') != NULL) {
 		if (strstr(name, lbh->root) != name)
 			return (BE_ERR_INVALIDNAME);
 
 		if (name_len >= BE_MAXPATHLEN)
 			return (BE_ERR_PATHLEN);
 
 		strlcpy(result, name, BE_MAXPATHLEN);
 		return (BE_ERR_SUCCESS);
 	} else if (name_len + root_len + 1 < BE_MAXPATHLEN) {
 		snprintf(result, BE_MAXPATHLEN, "%s/%s", lbh->root,
 		    name);
 		return (BE_ERR_SUCCESS);
 	}
 
 	return (BE_ERR_PATHLEN);
 }
 
 
 /*
  * Verifies the validity of a boot environment name (A-Za-z0-9-_.). Returns
  * BE_ERR_SUCCESS (0) if name is valid, otherwise returns BE_ERR_INVALIDNAME
  * or BE_ERR_PATHLEN.
  * Does not set internal library error state.
  */
 int
 be_validate_name(libbe_handle_t *lbh, const char *name)
 {
 
 	/*
 	 * Impose the additional restriction that the entire dataset name must
 	 * not exceed the maximum length of a dataset, i.e. MAXNAMELEN.
 	 */
 	if (strlen(lbh->root) + 1 + strlen(name) > MAXNAMELEN)
 		return (BE_ERR_PATHLEN);
 
 	if (!zfs_name_valid(name, ZFS_TYPE_DATASET))
 		return (BE_ERR_INVALIDNAME);
 
 	return (BE_ERR_SUCCESS);
 }
 
 
 /*
  * usage
  */
 int
 be_rename(libbe_handle_t *lbh, const char *old, const char *new)
 {
 	char full_old[BE_MAXPATHLEN];
 	char full_new[BE_MAXPATHLEN];
 	zfs_handle_t *zfs_hdl;
 	int err;
 
 	/*
 	 * be_validate_name is documented not to set error state, so we should
 	 * do so here.
 	 */
 	if ((err = be_validate_name(lbh, new)) != 0)
 		return (set_error(lbh, err));
 	if ((err = be_root_concat(lbh, old, full_old)) != 0)
 		return (set_error(lbh, err));
 	if ((err = be_root_concat(lbh, new, full_new)) != 0)
 		return (set_error(lbh, err));
 
 	if (!zfs_dataset_exists(lbh->lzh, full_old, ZFS_TYPE_DATASET))
 		return (set_error(lbh, BE_ERR_NOENT));
 
 	if (zfs_dataset_exists(lbh->lzh, full_new, ZFS_TYPE_DATASET))
 		return (set_error(lbh, BE_ERR_EXISTS));
 
 	if ((zfs_hdl = zfs_open(lbh->lzh, full_old,
 	    ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (set_error(lbh, BE_ERR_ZFSOPEN));
 
 	/* recurse, nounmount, forceunmount */
 	struct renameflags flags = {
 		.nounmount = 1,
 	};
 
 	err = zfs_rename(zfs_hdl, NULL, full_new, flags);
 
 	zfs_close(zfs_hdl);
 	if (err != 0)
 		return (set_error(lbh, BE_ERR_UNKNOWN));
 	return (0);
 }
 
 
 int
 be_export(libbe_handle_t *lbh, const char *bootenv, int fd)
 {
 	char snap_name[BE_MAXPATHLEN];
 	char buf[BE_MAXPATHLEN];
 	zfs_handle_t *zfs;
+	sendflags_t flags = { 0 };
 	int err;
 
 	if ((err = be_snapshot(lbh, bootenv, NULL, true, snap_name)) != 0)
 		/* Use the error set by be_snapshot */
 		return (err);
 
 	be_root_concat(lbh, snap_name, buf);
 
 	if ((zfs = zfs_open(lbh->lzh, buf, ZFS_TYPE_DATASET)) == NULL)
 		return (set_error(lbh, BE_ERR_ZFSOPEN));
 
-	err = zfs_send_one(zfs, NULL, fd, 0);
+	err = zfs_send_one(zfs, NULL, fd, flags);
 	zfs_close(zfs);
 
 	return (err);
 }
 
 
 int
 be_import(libbe_handle_t *lbh, const char *bootenv, int fd)
 {
 	char buf[BE_MAXPATHLEN];
 	nvlist_t *props;
 	zfs_handle_t *zfs;
 	recvflags_t flags = { .nomount = 1 };
 	int err;
 
 	be_root_concat(lbh, bootenv, buf);
 
 	if ((err = zfs_receive(lbh->lzh, buf, NULL, &flags, fd, NULL)) != 0) {
 		switch (err) {
 		case EINVAL:
 			return (set_error(lbh, BE_ERR_NOORIGIN));
 		case ENOENT:
 			return (set_error(lbh, BE_ERR_NOENT));
 		case EIO:
 			return (set_error(lbh, BE_ERR_IO));
 		default:
 			return (set_error(lbh, BE_ERR_UNKNOWN));
 		}
 	}
 
 	if ((zfs = zfs_open(lbh->lzh, buf, ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (set_error(lbh, BE_ERR_ZFSOPEN));
 
 	nvlist_alloc(&props, NV_UNIQUE_NAME, KM_SLEEP);
 	nvlist_add_string(props, "canmount", "noauto");
 	nvlist_add_string(props, "mountpoint", "none");
 
 	err = zfs_prop_set_list(zfs, props);
 	nvlist_free(props);
 
 	zfs_close(zfs);
 
 	if (err != 0)
 		return (set_error(lbh, BE_ERR_UNKNOWN));
 
 	return (0);
 }
 
 #if SOON
 static int
 be_create_child_noent(libbe_handle_t *lbh, const char *active,
     const char *child_path)
 {
 	nvlist_t *props;
 	zfs_handle_t *zfs;
 	int err;
 
 	nvlist_alloc(&props, NV_UNIQUE_NAME, KM_SLEEP);
 	nvlist_add_string(props, "canmount", "noauto");
 	nvlist_add_string(props, "mountpoint", child_path);
 
 	/* Create */
 	if ((err = zfs_create(lbh->lzh, active, ZFS_TYPE_DATASET,
 	    props)) != 0) {
 		switch (err) {
 		case EZFS_EXISTS:
 			return (set_error(lbh, BE_ERR_EXISTS));
 		case EZFS_NOENT:
 			return (set_error(lbh, BE_ERR_NOENT));
 		case EZFS_BADTYPE:
 		case EZFS_BADVERSION:
 			return (set_error(lbh, BE_ERR_NOPOOL));
 		case EZFS_BADPROP:
 		default:
 			/* We set something up wrong, probably... */
 			return (set_error(lbh, BE_ERR_UNKNOWN));
 		}
 	}
 	nvlist_free(props);
 
 	if ((zfs = zfs_open(lbh->lzh, active, ZFS_TYPE_DATASET)) == NULL)
 		return (set_error(lbh, BE_ERR_ZFSOPEN));
 
 	/* Set props */
 	if ((err = zfs_prop_set(zfs, "canmount", "noauto")) != 0) {
 		zfs_close(zfs);
 		/*
 		 * Similar to other cases, this shouldn't fail unless we've
 		 * done something wrong.  This is a new dataset that shouldn't
 		 * have been mounted anywhere between creation and now.
 		 */
 		if (err == EZFS_NOMEM)
 			return (set_error(lbh, BE_ERR_NOMEM));
 		return (set_error(lbh, BE_ERR_UNKNOWN));
 	}
 	zfs_close(zfs);
 	return (BE_ERR_SUCCESS);
 }
 
 static int
 be_create_child_cloned(libbe_handle_t *lbh, const char *active)
 {
 	char buf[BE_MAXPATHLEN], tmp[BE_MAXPATHLEN];;
 	zfs_handle_t *zfs;
 	int err;
 
 	/* XXX TODO ? */
 
 	/*
 	 * Establish if the existing path is a zfs dataset or just
 	 * the subdirectory of one
 	 */
 	strlcpy(tmp, "tmp/be_snap.XXXXX", sizeof(tmp));
 	if (mktemp(tmp) == NULL)
 		return (set_error(lbh, BE_ERR_UNKNOWN));
 
 	be_root_concat(lbh, tmp, buf);
 	printf("Here %s?\n", buf);
 	if ((err = zfs_snapshot(lbh->lzh, buf, false, NULL)) != 0) {
 		switch (err) {
 		case EZFS_INVALIDNAME:
 			return (set_error(lbh, BE_ERR_INVALIDNAME));
 
 		default:
 			/*
 			 * The other errors that zfs_ioc_snapshot might return
 			 * shouldn't happen if we've set things up properly, so
 			 * we'll gloss over them and call it UNKNOWN as it will
 			 * require further triage.
 			 */
 			if (errno == ENOTSUP)
 				return (set_error(lbh, BE_ERR_NOPOOL));
 			return (set_error(lbh, BE_ERR_UNKNOWN));
 		}
 	}
 
 	/* Clone */
 	if ((zfs = zfs_open(lbh->lzh, buf, ZFS_TYPE_SNAPSHOT)) == NULL)
 		return (BE_ERR_ZFSOPEN);
 
 	if ((err = zfs_clone(zfs, active, NULL)) != 0)
 		/* XXX TODO correct error */
 		return (set_error(lbh, BE_ERR_UNKNOWN));
 
 	/* set props */
 	zfs_close(zfs);
 	return (BE_ERR_SUCCESS);
 }
 
 int
 be_add_child(libbe_handle_t *lbh, const char *child_path, bool cp_if_exists)
 {
 	struct stat sb;
 	char active[BE_MAXPATHLEN], buf[BE_MAXPATHLEN];
 	nvlist_t *props;
 	const char *s;
 
 	/* Require absolute paths */
 	if (*child_path != '/')
 		return (set_error(lbh, BE_ERR_BADPATH));
 
 	strlcpy(active, be_active_path(lbh), BE_MAXPATHLEN);
 	strcpy(buf, active);
 
 	/* Create non-mountable parent dataset(s) */
 	s = child_path;
 	for (char *p; (p = strchr(s+1, '/')) != NULL; s = p) {
 		size_t len = p - s;
 		strncat(buf, s, len);
 
 		nvlist_alloc(&props, NV_UNIQUE_NAME, KM_SLEEP);
 		nvlist_add_string(props, "canmount", "off");
 		nvlist_add_string(props, "mountpoint", "none");
 		zfs_create(lbh->lzh, buf, ZFS_TYPE_DATASET, props);
 		nvlist_free(props);
 	}
 
 	/* Path does not exist as a descendent of / yet */
 	if (strlcat(active, child_path, BE_MAXPATHLEN) >= BE_MAXPATHLEN)
 		return (set_error(lbh, BE_ERR_PATHLEN));
 
 	if (stat(child_path, &sb) != 0) {
 		/* Verify that error is ENOENT */
 		if (errno != ENOENT)
 			return (set_error(lbh, BE_ERR_UNKNOWN));
 		return (be_create_child_noent(lbh, active, child_path));
 	} else if (cp_if_exists)
 		/* Path is already a descendent of / and should be copied */
 		return (be_create_child_cloned(lbh, active));
 	return (set_error(lbh, BE_ERR_EXISTS));
 }
 #endif	/* SOON */
 
 static int
 be_set_nextboot(libbe_handle_t *lbh, nvlist_t *config, uint64_t pool_guid,
     const char *zfsdev)
 {
 	nvlist_t **child;
 	uint64_t vdev_guid;
 	int c, children;
 
 	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, &child,
 	    &children) == 0) {
 		for (c = 0; c < children; ++c)
 			if (be_set_nextboot(lbh, child[c], pool_guid, zfsdev) != 0)
 				return (1);
 		return (0);
 	}
 
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
 	    &vdev_guid) != 0) {
 		return (1);
 	}
 
 	if (zpool_nextboot(lbh->lzh, pool_guid, vdev_guid, zfsdev) != 0) {
 		perror("ZFS_IOC_NEXTBOOT failed");
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Deactivate old BE dataset; currently just sets canmount=noauto
  */
 static int
 be_deactivate(libbe_handle_t *lbh, const char *ds)
 {
 	zfs_handle_t *zfs;
 
 	if ((zfs = zfs_open(lbh->lzh, ds, ZFS_TYPE_DATASET)) == NULL)
 		return (1);
 	if (zfs_prop_set(zfs, "canmount", "noauto") != 0)
 		return (1);
 	zfs_close(zfs);
 	return (0);
 }
 
 int
 be_activate(libbe_handle_t *lbh, const char *bootenv, bool temporary)
 {
 	char be_path[BE_MAXPATHLEN];
 	char buf[BE_MAXPATHLEN];
 	nvlist_t *config, *dsprops, *vdevs;
 	char *origin;
 	uint64_t pool_guid;
 	zfs_handle_t *zhp;
 	int err;
 
 	be_root_concat(lbh, bootenv, be_path);
 
 	/* Note: be_exists fails if mountpoint is not / */
 	if ((err = be_exists(lbh, be_path)) != 0)
 		return (set_error(lbh, err));
 
 	if (temporary) {
 		config = zpool_get_config(lbh->active_phandle, NULL);
 		if (config == NULL)
 			/* config should be fetchable... */
 			return (set_error(lbh, BE_ERR_UNKNOWN));
 
 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 		    &pool_guid) != 0)
 			/* Similarly, it shouldn't be possible */
 			return (set_error(lbh, BE_ERR_UNKNOWN));
 
 		/* Expected format according to zfsbootcfg(8) man */
 		snprintf(buf, sizeof(buf), "zfs:%s:", be_path);
 
 		/* We have no config tree */
 		if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 		    &vdevs) != 0)
 			return (set_error(lbh, BE_ERR_NOPOOL));
 
 		return (be_set_nextboot(lbh, vdevs, pool_guid, buf));
 	} else {
 		if (be_deactivate(lbh, lbh->bootfs) != 0)
 			return (-1);
 
 		/* Obtain bootenv zpool */
 		err = zpool_set_prop(lbh->active_phandle, "bootfs", be_path);
 		if (err)
 			return (-1);
 
 		zhp = zfs_open(lbh->lzh, be_path, ZFS_TYPE_FILESYSTEM);
 		if (zhp == NULL)
 			return (-1);
 
 		if (be_prop_list_alloc(&dsprops) != 0)
 			return (-1);
 
 		if (be_get_dataset_props(lbh, be_path, dsprops) != 0) {
 			nvlist_free(dsprops);
 			return (-1);
 		}
 
 		if (nvlist_lookup_string(dsprops, "origin", &origin) == 0)
 			err = zfs_promote(zhp);
 		nvlist_free(dsprops);
 
 		zfs_close(zhp);
 
 		if (err)
 			return (-1);
 	}
 
 	return (BE_ERR_SUCCESS);
 }
Index: projects/clang900-import/lib/libc/gen/sysctlnametomib.c
===================================================================
--- projects/clang900-import/lib/libc/gen/sysctlnametomib.c	(revision 352536)
+++ projects/clang900-import/lib/libc/gen/sysctlnametomib.c	(revision 352537)
@@ -1,57 +1,57 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright 2001 The FreeBSD Project. All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE FREEBSD PROJECT ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE FREEBSD PROJECT BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/sysctl.h>
 #include <string.h>
 
 /*
  * This function uses a presently undocumented interface to the kernel
  * to walk the tree and get the type so it can print the value.
  * This interface is under work and consideration, and should probably
  * be killed with a big axe by the first person who can find the time.
  * (be aware though, that the proper interface isn't as obvious as it
  * may seem, there are various conflicting requirements.
  */
 int
 sysctlnametomib(const char *name, int *mibp, size_t *sizep)
 {
 	int oid[2];
 	int error;
 
-	oid[0] = 0;
-	oid[1] = 3;
+	oid[0] = CTL_SYSCTL;
+	oid[1] = CTL_SYSCTL_NAME2OID;
 
 	*sizep *= sizeof(int);
 	error = sysctl(oid, 2, mibp, sizep, name, strlen(name));
 	*sizep /= sizeof(int);
 	return (error);
 }
Index: projects/clang900-import/lib/libc/sys/open.2
===================================================================
--- projects/clang900-import/lib/libc/sys/open.2	(revision 352536)
+++ projects/clang900-import/lib/libc/sys/open.2	(revision 352537)
@@ -1,612 +1,617 @@
 .\" Copyright (c) 1980, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"     @(#)open.2	8.2 (Berkeley) 11/16/93
 .\" $FreeBSD$
 .\"
-.Dd June 14, 2019
+.Dd September 17, 2019
 .Dt OPEN 2
 .Os
 .Sh NAME
 .Nm open , openat
 .Nd open or create a file for reading, writing or executing
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
 .In fcntl.h
 .Ft int
 .Fn open "const char *path" "int flags" "..."
 .Ft int
 .Fn openat "int fd" "const char *path" "int flags" "..."
 .Sh DESCRIPTION
 The file name specified by
 .Fa path
 is opened
 for either execution or reading and/or writing as specified by the
 argument
 .Fa flags
 and the file descriptor returned to the calling process.
 The
 .Fa flags
 argument may indicate the file is to be
 created if it does not exist (by specifying the
 .Dv O_CREAT
 flag).
 In this case
 .Fn open
 and
 .Fn openat
 require an additional argument
 .Fa "mode_t mode" ,
 and the file is created with mode
 .Fa mode
 as described in
 .Xr chmod 2
 and modified by the process' umask value (see
 .Xr umask 2 ) .
 .Pp
 The
 .Fn openat
 function is equivalent to the
 .Fn open
 function except in the case where the
 .Fa path
 specifies a relative path, or the
 .Dv O_BENEATH
 flag is provided.
 For
 .Fn openat
 and relative
 .Fa path ,
 the file to be opened is determined relative to the directory
 associated with the file descriptor
 .Fa fd
 instead of the current working directory.
 The
 .Fa flag
 parameter and the optional fourth parameter correspond exactly to
 the parameters of
 .Fn open .
 If
 .Fn openat
 is passed the special value
 .Dv AT_FDCWD
 in the
 .Fa fd
 parameter, the current working directory is used
 and the behavior is identical to a call to
 .Fn open .
 .Pp
 When
 .Fn openat
 is called with an absolute
 .Fa path
 without the
 .Dv O_BENEATH
 flag, it ignores the
 .Fa fd
 argument.
 When
 .Dv O_BENEATH
 is specified with an absolute
 .Fa path ,
 a directory passed by the
 .Fa fd
 argument is used as the topping point for the resolution.
 See the definition of the
 .Dv O_BENEATH
 flag below.
 .Pp
 In
 .Xr capsicum 4
 capability mode,
 .Fn open
 is not permitted.
 The
 .Fa path
 argument to
 .Fn openat
 must be strictly relative to a file descriptor
 .Fa fd ,
 as defined in
 .Pa sys/kern/vfs_lookup.c .
 .Fa path
 must not be an absolute path and must not contain ".." components
 which cause the path resolution to escape the directory hierarchy
 starting at
 .Fa fd .
 Additionally, no symbolic link in
 .Fa path
 may target absolute path or contain escaping ".." components.
 .Fa fd
 must not be
 .Dv AT_FDCWD .
 .Pp
 If the
 .Dv vfs.lookup_cap_dotdot
 .Xr sysctl 3
 MIB is set to zero, ".." components in the paths,
 used in capability mode, or with the
 .Dv O_BENEATH
 flag, are completely disabled.
 If the
 .Dv vfs.lookup_cap_dotdot_nonlocal
 MIB is set to zero, ".." is not allowed if found on non-local filesystem.
 .Pp
 The flags specified are formed by
 .Em or Ns 'ing
 the following values
 .Pp
 .Bd -literal -offset indent -compact
 O_RDONLY	open for reading only
 O_WRONLY	open for writing only
 O_RDWR		open for reading and writing
 O_EXEC		open for execute only
 O_NONBLOCK	do not block on open
 O_APPEND	append on each write
 O_CREAT		create file if it does not exist
 O_TRUNC		truncate size to 0
 O_EXCL		error if create and file exists
 O_SHLOCK	atomically obtain a shared lock
 O_EXLOCK	atomically obtain an exclusive lock
 O_DIRECT	eliminate or reduce cache effects
 O_FSYNC		synchronous writes
 O_SYNC		synchronous writes
 O_NOFOLLOW	do not follow symlinks
 O_NOCTTY	ignored
 O_TTY_INIT	ignored
 O_DIRECTORY	error if file is not a directory
 O_CLOEXEC	set FD_CLOEXEC upon open
 O_VERIFY	verify the contents of the file
 O_BENEATH	require path to be strictly relative to topping directory
 .Ed
 .Pp
 Opening a file with
 .Dv O_APPEND
 set causes each write on the file
 to be appended to the end.
 If
 .Dv O_TRUNC
 is specified and the
 file exists, the file is truncated to zero length.
 If
 .Dv O_EXCL
 is set with
 .Dv O_CREAT
 and the file already
 exists,
 .Fn open
 returns an error.
 This may be used to
 implement a simple exclusive access locking mechanism.
 If
 .Dv O_EXCL
 is set and the last component of the pathname is
 a symbolic link,
 .Fn open
 will fail even if the symbolic
 link points to a non-existent name.
 If the
 .Dv O_NONBLOCK
 flag is specified and the
 .Fn open
 system call would result
 in the process being blocked for some reason (e.g., waiting for
 carrier on a dialup line),
 .Fn open
 returns immediately.
 The descriptor remains in non-blocking mode for subsequent operations.
 .Pp
 If
 .Dv O_FSYNC
 is used in the mask, all writes will
 immediately and synchronously be written to disk.
 .Pp
 .Dv O_SYNC
 is a synonym for
 .Dv O_FSYNC
 required by
 .Tn POSIX .
 .Pp
 If
 .Dv O_NOFOLLOW
 is used in the mask and the target file passed to
 .Fn open
 is a symbolic link then the
 .Fn open
 will fail.
 .Pp
 When opening a file, a lock with
 .Xr flock 2
 semantics can be obtained by setting
 .Dv O_SHLOCK
 for a shared lock, or
 .Dv O_EXLOCK
 for an exclusive lock.
 If creating a file with
 .Dv O_CREAT ,
 the request for the lock will never fail
 (provided that the underlying file system supports locking).
 .Pp
 .Dv O_DIRECT
 may be used to minimize or eliminate the cache effects of reading and writing.
 The system will attempt to avoid caching the data you read or write.
 If it cannot avoid caching the data,
 it will minimize the impact the data has on the cache.
 Use of this flag can drastically reduce performance if not used with care.
 .Pp
 .Dv O_NOCTTY
 may be used to ensure the OS does not assign this file as the
 controlling terminal when it opens a tty device.
 This is the default on
 .Fx ,
 but is present for
 .Tn POSIX
 compatibility.
 The
 .Fn open
 system call will not assign controlling terminals on
 .Fx .
 .Pp
 .Dv O_TTY_INIT
 may be used to ensure the OS restores the terminal attributes when
 initially opening a TTY.
 This is the default on
 .Fx ,
 but is present for
 .Tn POSIX
 compatibility.
 The initial call to
 .Fn open
 on a TTY will always restore default terminal attributes on
 .Fx .
 .Pp
 .Dv O_DIRECTORY
 may be used to ensure the resulting file descriptor refers to a
 directory.
 This flag can be used to prevent applications with elevated privileges
 from opening files which are even unsafe to open with
 .Dv O_RDONLY ,
 such as device nodes.
 .Pp
 .Dv O_CLOEXEC
 may be used to set
 .Dv FD_CLOEXEC
 flag for the newly returned file descriptor.
 .Pp
 .Dv O_VERIFY
 may be used to indicate to the kernel that the contents of the file should
 be verified before allowing the open to proceed.
 The details of what
 .Dq verified
 means is implementation specific.
 The run-time linker (rtld) uses this flag to ensure shared objects have
 been verified before operating on them.
 .Pp
 .Dv O_BENEATH
 returns
 .Er ENOTCAPABLE
 if the specified relative path, after resolving all symlinks and ".."
 references, does not reside in the directory hierarchy of
 children beneath the topping directory.
 Topping directory is the process current directory if relative
 .Fa path
 is used for
 .Fn open ,
 and the directory referenced by the
 .Fa fd
 argument when using
 .Fn openat .
 If the specified path is absolute,
 .Dv O_BENEATH
 allows arbitrary prefix that ends up at the topping directory,
 after which all further resolved components must be under it.
 .Pp
 If successful,
 .Fn open
 returns a non-negative integer, termed a file descriptor.
 It returns \-1 on failure.
 The file pointer used to mark the current position within the
 file is set to the beginning of the file.
 .Pp
 If a sleeping open of a device node from
 .Xr devfs 5
 is interrupted by a signal, the call always fails with
 .Er EINTR ,
 even if the
 .Dv SA_RESTART
 flag is set for the signal.
 A sleeping open of a fifo (see
 .Xr mkfifo 2 )
 is restarted as normal.
 .Pp
 When a new file is created it is given the group of the directory
 which contains it.
 .Pp
 Unless
 .Dv O_CLOEXEC
 flag was specified,
 the new descriptor is set to remain open across
 .Xr execve 2
 system calls; see
 .Xr close 2 ,
 .Xr fcntl 2
 and
 .Dv O_CLOEXEC
 description.
 .Pp
 The system imposes a limit on the number of file descriptors
 open simultaneously by one process.
 The
 .Xr getdtablesize 2
 system call returns the current system limit.
 .Sh RETURN VALUES
 If successful,
 .Fn open
 and
 .Fn openat
 return a non-negative integer, termed a file descriptor.
 They return \-1 on failure, and set
 .Va errno
 to indicate the error.
 .Sh ERRORS
 The named file is opened unless:
 .Bl -tag -width Er
 .It Bq Er ENOTDIR
 A component of the path prefix is not a directory.
 .It Bq Er ENAMETOOLONG
 A component of a pathname exceeded 255 characters,
 or an entire path name exceeded 1023 characters.
 .It Bq Er ENOENT
 .Dv O_CREAT
 is not set and the named file does not exist.
 .It Bq Er ENOENT
 A component of the path name that must exist does not exist.
 .It Bq Er EACCES
 Search permission is denied for a component of the path prefix.
 .It Bq Er EACCES
 The required permissions (for reading and/or writing)
 are denied for the given flags.
 .It Bq Er EACCES
 .Dv O_TRUNC
 is specified and write permission is denied.
 .It Bq Er EACCES
 .Dv O_CREAT
 is specified,
 the file does not exist,
 and the directory in which it is to be created
 does not permit writing.
 .It Bq Er EPERM
 .Dv O_CREAT
 is specified, the file does not exist, and the directory in which it is to be
 created has its immutable flag set, see the
 .Xr chflags 2
 manual page for more information.
 .It Bq Er EPERM
 The named file has its immutable flag set and the file is to be modified.
 .It Bq Er EPERM
 The named file has its append-only flag set, the file is to be modified, and
 .Dv O_TRUNC
 is specified or
 .Dv O_APPEND
 is not specified.
 .It Bq Er ELOOP
 Too many symbolic links were encountered in translating the pathname.
 .It Bq Er EISDIR
 The named file is a directory, and the arguments specify
 it is to be modified.
+.It Bq Er EISDIR
+The named file is a directory, and the flags specified
+.Dv O_CREAT
+without
+.Dv O_DIRECTORY .
 .It Bq Er EROFS
 The named file resides on a read-only file system,
 and the file is to be modified.
 .It Bq Er EROFS
 .Dv O_CREAT
 is specified and the named file would reside on a read-only file system.
 .It Bq Er EMFILE
 The process has already reached its limit for open file descriptors.
 .It Bq Er ENFILE
 The system file table is full.
 .It Bq Er EMLINK
 .Dv O_NOFOLLOW
 was specified and the target is a symbolic link.
 .It Bq Er ENXIO
 The named file is a character special or block
 special file, and the device associated with this special file
 does not exist.
 .It Bq Er ENXIO
 .Dv O_NONBLOCK
 is set, the named file is a fifo,
 .Dv O_WRONLY
 is set, and no process has the file open for reading.
 .It Bq Er EINTR
 The
 .Fn open
 operation was interrupted by a signal.
 .It Bq Er EOPNOTSUPP
 .Dv O_SHLOCK
 or
 .Dv O_EXLOCK
 is specified but the underlying file system does not support locking.
 .It Bq Er EOPNOTSUPP
 The named file is a special file mounted through a file system that
 does not support access to it (e.g.\& NFS).
 .It Bq Er EWOULDBLOCK
 .Dv O_NONBLOCK
 and one of
 .Dv O_SHLOCK
 or
 .Dv O_EXLOCK
 is specified and the file is locked.
 .It Bq Er ENOSPC
 .Dv O_CREAT
 is specified,
 the file does not exist,
 and the directory in which the entry for the new file is being placed
 cannot be extended because there is no space left on the file
 system containing the directory.
 .It Bq Er ENOSPC
 .Dv O_CREAT
 is specified,
 the file does not exist,
 and there are no free inodes on the file system on which the
 file is being created.
 .It Bq Er EDQUOT
 .Dv O_CREAT
 is specified,
 the file does not exist,
 and the directory in which the entry for the new file
 is being placed cannot be extended because the
 user's quota of disk blocks on the file system
 containing the directory has been exhausted.
 .It Bq Er EDQUOT
 .Dv O_CREAT
 is specified,
 the file does not exist,
 and the user's quota of inodes on the file system on
 which the file is being created has been exhausted.
 .It Bq Er EIO
 An I/O error occurred while making the directory entry or
 allocating the inode for
 .Dv O_CREAT .
 .It Bq Er ETXTBSY
 The file is a pure procedure (shared text) file that is being
 executed and the
 .Fn open
 system call requests write access.
 .It Bq Er EFAULT
 The
 .Fa path
 argument
 points outside the process's allocated address space.
 .It Bq Er EEXIST
 .Dv O_CREAT
 and
 .Dv O_EXCL
 were specified and the file exists.
 .It Bq Er EOPNOTSUPP
 An attempt was made to open a socket (not currently implemented).
 .It Bq Er EINVAL
 An attempt was made to open a descriptor with an illegal combination
 of
 .Dv O_RDONLY ,
 .Dv O_WRONLY ,
 .Dv O_RDWR
 and
 .Dv O_EXEC .
 .It Bq Er EBADF
 The
 .Fa path
 argument does not specify an absolute path and the
 .Fa fd
 argument is
 neither
 .Dv AT_FDCWD
 nor a valid file descriptor open for searching.
 .It Bq Er ENOTDIR
 The
 .Fa path
 argument is not an absolute path and
 .Fa fd
 is neither
 .Dv AT_FDCWD
 nor a file descriptor associated with a directory.
 .It Bq Er ENOTDIR
 .Dv O_DIRECTORY
 is specified and the file is not a directory.
 .It Bq Er ECAPMODE
 .Dv AT_FDCWD
 is specified and the process is in capability mode.
 .It Bq Er ECAPMODE
 .Fn open
 was called and the process is in capability mode.
 .It Bq Er ENOTCAPABLE
 .Fa path
 is an absolute path,
 or contained a ".." component leading to a
 directory outside of the directory hierarchy specified by
 .Fa fd ,
 and the process is in capability mode.
 .It Bq Er ENOTCAPABLE
 The
 .Dv O_BENEATH
 flag was provided, and the absolute
 .Fa path
 does not have its tail fully contained under the topping directory,
 or the relative
 .Fa path
 escapes it.
 .El
 .Sh SEE ALSO
 .Xr chmod 2 ,
 .Xr close 2 ,
 .Xr dup 2 ,
 .Xr fexecve 2 ,
 .Xr fhopen 2 ,
 .Xr getdtablesize 2 ,
 .Xr getfh 2 ,
 .Xr lgetfh 2 ,
 .Xr lseek 2 ,
 .Xr read 2 ,
 .Xr umask 2 ,
 .Xr write 2 ,
 .Xr fopen 3 ,
 .Xr capsicum 4
 .Sh STANDARDS
 These functions are specified by
 .St -p1003.1-2008 .
 .Fx
 sets
 .Va errno
 to
 .Er EMLINK instead of
 .Er ELOOP
 as specified by
 .Tn POSIX
 when
 .Dv O_NOFOLLOW
 is set in flags and the final component of pathname is a symbolic link
 to distinguish it from the case of too many symbolic link traversals
 in one of its non-final components.
 .Sh HISTORY
 The
 .Fn open
 function appeared in
 .At v1 .
 The
 .Fn openat
 function was introduced in
 .Fx 8.0 .
 .Sh BUGS
 The Open Group Extended API Set 2 specification requires that the test
 for whether
 .Fa fd
 is searchable is based on whether
 .Fa fd
 is open for searching, not whether the underlying directory currently
 permits searches.
 The present implementation of the
 .Fa openat
 checks the current permissions of directory instead.
Index: projects/clang900-import/lib/libpmc/libpmc.c
===================================================================
--- projects/clang900-import/lib/libpmc/libpmc.c	(revision 352536)
+++ projects/clang900-import/lib/libpmc/libpmc.c	(revision 352537)
@@ -1,1875 +1,1894 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2003-2008 Joseph Koshy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/pmc.h>
 #include <sys/syscall.h>
 
 #include <ctype.h>
 #include <errno.h>
 #include <err.h>
 #include <fcntl.h>
 #include <pmc.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
 #include <sysexits.h>
 #include <unistd.h>
 
 #include "libpmcinternal.h"
 
 /* Function prototypes */
 #if defined(__amd64__) || defined(__i386__)
 static int k8_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 #endif
 #if defined(__amd64__) || defined(__i386__)
 static int tsc_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 #endif
 #if defined(__arm__)
 #if defined(__XSCALE__)
 static int xscale_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 #endif
 static int armv7_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 #endif
 #if defined(__aarch64__)
 static int arm64_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 #endif
 #if defined(__mips__)
 static int mips_allocate_pmc(enum pmc_event _pe, char* ctrspec,
 			     struct pmc_op_pmcallocate *_pmc_config);
 #endif /* __mips__ */
 static int soft_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 
 #if defined(__powerpc__)
 static int powerpc_allocate_pmc(enum pmc_event _pe, char* ctrspec,
 			     struct pmc_op_pmcallocate *_pmc_config);
 #endif /* __powerpc__ */
 
 #define PMC_CALL(cmd, params)				\
 	syscall(pmc_syscall, PMC_OP_##cmd, (params))
 
 /*
  * Event aliases provide a way for the user to ask for generic events
  * like "cache-misses", or "instructions-retired".  These aliases are
  * mapped to the appropriate canonical event descriptions using a
  * lookup table.
  */
 struct pmc_event_alias {
 	const char	*pm_alias;
 	const char	*pm_spec;
 };
 
 static const struct pmc_event_alias *pmc_mdep_event_aliases;
 
 /*
  * The pmc_event_descr structure maps symbolic names known to the user
  * to integer codes used by the PMC KLD.
  */
 struct pmc_event_descr {
 	const char	*pm_ev_name;
 	enum pmc_event	pm_ev_code;
 };
 
 /*
  * The pmc_class_descr structure maps class name prefixes for
  * event names to event tables and other PMC class data.
  */
 struct pmc_class_descr {
 	const char	*pm_evc_name;
 	size_t		pm_evc_name_size;
 	enum pmc_class	pm_evc_class;
 	const struct pmc_event_descr *pm_evc_event_table;
 	size_t		pm_evc_event_table_size;
 	int		(*pm_evc_allocate_pmc)(enum pmc_event _pe,
 			    char *_ctrspec, struct pmc_op_pmcallocate *_pa);
 };
 
 #define	PMC_TABLE_SIZE(N)	(sizeof(N)/sizeof(N[0]))
 #define	PMC_EVENT_TABLE_SIZE(N)	PMC_TABLE_SIZE(N##_event_table)
 
 #undef	__PMC_EV
 #define	__PMC_EV(C,N) { #N, PMC_EV_ ## C ## _ ## N },
 
 /*
  * PMC_CLASSDEP_TABLE(NAME, CLASS)
  *
  * Define a table mapping event names and aliases to HWPMC event IDs.
  */
 #define	PMC_CLASSDEP_TABLE(N, C)				\
 	static const struct pmc_event_descr N##_event_table[] =	\
 	{							\
 		__PMC_EV_##C()					\
 	}
 
 PMC_CLASSDEP_TABLE(iaf, IAF);
 PMC_CLASSDEP_TABLE(k8, K8);
 PMC_CLASSDEP_TABLE(xscale, XSCALE);
 PMC_CLASSDEP_TABLE(armv7, ARMV7);
 PMC_CLASSDEP_TABLE(armv8, ARMV8);
+PMC_CLASSDEP_TABLE(beri, BERI);
 PMC_CLASSDEP_TABLE(mips24k, MIPS24K);
 PMC_CLASSDEP_TABLE(mips74k, MIPS74K);
 PMC_CLASSDEP_TABLE(octeon, OCTEON);
 PMC_CLASSDEP_TABLE(ppc7450, PPC7450);
 PMC_CLASSDEP_TABLE(ppc970, PPC970);
 PMC_CLASSDEP_TABLE(e500, E500);
 
 static struct pmc_event_descr soft_event_table[PMC_EV_DYN_COUNT];
 
 #undef	__PMC_EV_ALIAS
 #define	__PMC_EV_ALIAS(N,CODE) 	{ N, PMC_EV_##CODE },
 
 static const struct pmc_event_descr cortex_a8_event_table[] = 
 {
 	__PMC_EV_ALIAS_ARMV7_CORTEX_A8()
 };
 
 static const struct pmc_event_descr cortex_a9_event_table[] = 
 {
 	__PMC_EV_ALIAS_ARMV7_CORTEX_A9()
 };
 
 static const struct pmc_event_descr cortex_a53_event_table[] = 
 {
 	__PMC_EV_ALIAS_ARMV8_CORTEX_A53()
 };
 
 static const struct pmc_event_descr cortex_a57_event_table[] = 
 {
 	__PMC_EV_ALIAS_ARMV8_CORTEX_A57()
 };
 
 /*
  * PMC_MDEP_TABLE(NAME, PRIMARYCLASS, ADDITIONAL_CLASSES...)
  *
  * Map a CPU to the PMC classes it supports.
  */
 #define	PMC_MDEP_TABLE(N,C,...)				\
 	static const enum pmc_class N##_pmc_classes[] = {	\
 		PMC_CLASS_##C, __VA_ARGS__			\
 	}
 
 PMC_MDEP_TABLE(k8, K8, PMC_CLASS_SOFT, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(xscale, XSCALE, PMC_CLASS_SOFT, PMC_CLASS_XSCALE);
+PMC_MDEP_TABLE(beri, BERI, PMC_CLASS_SOFT, PMC_CLASS_BERI);
 PMC_MDEP_TABLE(cortex_a8, ARMV7, PMC_CLASS_SOFT, PMC_CLASS_ARMV7);
 PMC_MDEP_TABLE(cortex_a9, ARMV7, PMC_CLASS_SOFT, PMC_CLASS_ARMV7);
 PMC_MDEP_TABLE(cortex_a53, ARMV8, PMC_CLASS_SOFT, PMC_CLASS_ARMV8);
 PMC_MDEP_TABLE(cortex_a57, ARMV8, PMC_CLASS_SOFT, PMC_CLASS_ARMV8);
 PMC_MDEP_TABLE(mips24k, MIPS24K, PMC_CLASS_SOFT, PMC_CLASS_MIPS24K);
 PMC_MDEP_TABLE(mips74k, MIPS74K, PMC_CLASS_SOFT, PMC_CLASS_MIPS74K);
 PMC_MDEP_TABLE(octeon, OCTEON, PMC_CLASS_SOFT, PMC_CLASS_OCTEON);
 PMC_MDEP_TABLE(ppc7450, PPC7450, PMC_CLASS_SOFT, PMC_CLASS_PPC7450, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(ppc970, PPC970, PMC_CLASS_SOFT, PMC_CLASS_PPC970, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(e500, E500, PMC_CLASS_SOFT, PMC_CLASS_E500, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(generic, SOFT, PMC_CLASS_SOFT);
 
 static const struct pmc_event_descr tsc_event_table[] =
 {
 	__PMC_EV_TSC()
 };
 
 #undef	PMC_CLASS_TABLE_DESC
 #define	PMC_CLASS_TABLE_DESC(NAME, CLASS, EVENTS, ALLOCATOR)	\
 static const struct pmc_class_descr NAME##_class_table_descr =	\
 	{							\
 		.pm_evc_name  = #CLASS "-",			\
 		.pm_evc_name_size = sizeof(#CLASS "-") - 1,	\
 		.pm_evc_class = PMC_CLASS_##CLASS ,		\
 		.pm_evc_event_table = EVENTS##_event_table ,	\
 		.pm_evc_event_table_size = 			\
 			PMC_EVENT_TABLE_SIZE(EVENTS),		\
 		.pm_evc_allocate_pmc = ALLOCATOR##_allocate_pmc	\
 	}
 
 #if	defined(__i386__) || defined(__amd64__)
 PMC_CLASS_TABLE_DESC(k8, K8, k8, k8);
 #endif
 #if	defined(__i386__) || defined(__amd64__)
 PMC_CLASS_TABLE_DESC(tsc, TSC, tsc, tsc);
 #endif
 #if	defined(__arm__)
 #if	defined(__XSCALE__)
 PMC_CLASS_TABLE_DESC(xscale, XSCALE, xscale, xscale);
 #endif
 PMC_CLASS_TABLE_DESC(cortex_a8, ARMV7, cortex_a8, armv7);
 PMC_CLASS_TABLE_DESC(cortex_a9, ARMV7, cortex_a9, armv7);
 #endif
 #if	defined(__aarch64__)
 PMC_CLASS_TABLE_DESC(cortex_a53, ARMV8, cortex_a53, arm64);
 PMC_CLASS_TABLE_DESC(cortex_a57, ARMV8, cortex_a57, arm64);
 #endif
 #if defined(__mips__)
+PMC_CLASS_TABLE_DESC(beri, BERI, beri, mips);
 PMC_CLASS_TABLE_DESC(mips24k, MIPS24K, mips24k, mips);
 PMC_CLASS_TABLE_DESC(mips74k, MIPS74K, mips74k, mips);
 PMC_CLASS_TABLE_DESC(octeon, OCTEON, octeon, mips);
 #endif /* __mips__ */
 #if defined(__powerpc__)
 PMC_CLASS_TABLE_DESC(ppc7450, PPC7450, ppc7450, powerpc);
 PMC_CLASS_TABLE_DESC(ppc970, PPC970, ppc970, powerpc);
 PMC_CLASS_TABLE_DESC(e500, E500, e500, powerpc);
 #endif
 
 static struct pmc_class_descr soft_class_table_descr =
 {
 	.pm_evc_name  = "SOFT-",
 	.pm_evc_name_size = sizeof("SOFT-") - 1,
 	.pm_evc_class = PMC_CLASS_SOFT,
 	.pm_evc_event_table = NULL,
 	.pm_evc_event_table_size = 0,
 	.pm_evc_allocate_pmc = soft_allocate_pmc
 };
 
 #undef	PMC_CLASS_TABLE_DESC
 
 static const struct pmc_class_descr **pmc_class_table;
 #define	PMC_CLASS_TABLE_SIZE	cpu_info.pm_nclass
 
 static const enum pmc_class *pmc_mdep_class_list;
 static size_t pmc_mdep_class_list_size;
 
 /*
  * Mapping tables, mapping enumeration values to human readable
  * strings.
  */
 
 static const char * pmc_capability_names[] = {
 #undef	__PMC_CAP
 #define	__PMC_CAP(N,V,D)	#N ,
 	__PMC_CAPS()
 };
 
 struct pmc_class_map {
 	enum pmc_class	pm_class;
 	const char	*pm_name;
 };
 
 static const struct pmc_class_map pmc_class_names[] = {
 #undef	__PMC_CLASS
 #define __PMC_CLASS(S,V,D) { .pm_class = PMC_CLASS_##S, .pm_name = #S } ,
 	__PMC_CLASSES()
 };
 
 struct pmc_cputype_map {
 	enum pmc_cputype pm_cputype;
 	const char	*pm_name;
 };
 
 static const struct pmc_cputype_map pmc_cputype_names[] = {
 #undef	__PMC_CPU
 #define	__PMC_CPU(S, V, D) { .pm_cputype = PMC_CPU_##S, .pm_name = #S } ,
 	__PMC_CPUS()
 };
 
 static const char * pmc_disposition_names[] = {
 #undef	__PMC_DISP
 #define	__PMC_DISP(D)	#D ,
 	__PMC_DISPOSITIONS()
 };
 
 static const char * pmc_mode_names[] = {
 #undef  __PMC_MODE
 #define __PMC_MODE(M,N)	#M ,
 	__PMC_MODES()
 };
 
 static const char * pmc_state_names[] = {
 #undef  __PMC_STATE
 #define __PMC_STATE(S) #S ,
 	__PMC_STATES()
 };
 
 /*
  * Filled in by pmc_init().
  */
 static int pmc_syscall = -1;
 static struct pmc_cpuinfo cpu_info;
 static struct pmc_op_getdyneventinfo soft_event_info;
 
 /* Event masks for events */
 struct pmc_masks {
 	const char	*pm_name;
 	const uint64_t	pm_value;
 };
 #define	PMCMASK(N,V)	{ .pm_name = #N, .pm_value = (V) }
 #define	NULLMASK	{ .pm_name = NULL }
 
 #if defined(__amd64__) || defined(__i386__)
 static int
 pmc_parse_mask(const struct pmc_masks *pmask, char *p, uint64_t *evmask)
 {
 	const struct pmc_masks *pm;
 	char *q, *r;
 	int c;
 
 	if (pmask == NULL)	/* no mask keywords */
 		return (-1);
 	q = strchr(p, '=');	/* skip '=' */
 	if (*++q == '\0')	/* no more data */
 		return (-1);
 	c = 0;			/* count of mask keywords seen */
 	while ((r = strsep(&q, "+")) != NULL) {
 		for (pm = pmask; pm->pm_name && strcasecmp(r, pm->pm_name);
 		    pm++)
 			;
 		if (pm->pm_name == NULL) /* not found */
 			return (-1);
 		*evmask |= pm->pm_value;
 		c++;
 	}
 	return (c);
 }
 #endif
 
 #define	KWMATCH(p,kw)		(strcasecmp((p), (kw)) == 0)
 #define	KWPREFIXMATCH(p,kw)	(strncasecmp((p), (kw), sizeof((kw)) - 1) == 0)
 #define	EV_ALIAS(N,S)		{ .pm_alias = N, .pm_spec = S }
 
 #if defined(__amd64__) || defined(__i386__)
 /*
  * AMD K8 PMCs.
  *
  */
 
 static struct pmc_event_alias k8_aliases[] = {
 	EV_ALIAS("branches",		"k8-fr-retired-taken-branches"),
 	EV_ALIAS("branch-mispredicts",
 	    "k8-fr-retired-taken-branches-mispredicted"),
 	EV_ALIAS("cycles",		"tsc"),
 	EV_ALIAS("dc-misses",		"k8-dc-miss"),
 	EV_ALIAS("ic-misses",		"k8-ic-miss"),
 	EV_ALIAS("instructions",	"k8-fr-retired-x86-instructions"),
 	EV_ALIAS("interrupts",		"k8-fr-taken-hardware-interrupts"),
 	EV_ALIAS("unhalted-cycles",	"k8-bu-cpu-clk-unhalted"),
 	EV_ALIAS(NULL, NULL)
 };
 
 #define	__K8MASK(N,V) PMCMASK(N,(1 << (V)))
 
 /*
  * Parsing tables
  */
 
 /* fp dispatched fpu ops */
 static const struct pmc_masks k8_mask_fdfo[] = {
 	__K8MASK(add-pipe-excluding-junk-ops,	0),
 	__K8MASK(multiply-pipe-excluding-junk-ops,	1),
 	__K8MASK(store-pipe-excluding-junk-ops,	2),
 	__K8MASK(add-pipe-junk-ops,		3),
 	__K8MASK(multiply-pipe-junk-ops,	4),
 	__K8MASK(store-pipe-junk-ops,		5),
 	NULLMASK
 };
 
 /* ls segment register loads */
 static const struct pmc_masks k8_mask_lsrl[] = {
 	__K8MASK(es,	0),
 	__K8MASK(cs,	1),
 	__K8MASK(ss,	2),
 	__K8MASK(ds,	3),
 	__K8MASK(fs,	4),
 	__K8MASK(gs,	5),
 	__K8MASK(hs,	6),
 	NULLMASK
 };
 
 /* ls locked operation */
 static const struct pmc_masks k8_mask_llo[] = {
 	__K8MASK(locked-instructions,	0),
 	__K8MASK(cycles-in-request,	1),
 	__K8MASK(cycles-to-complete,	2),
 	NULLMASK
 };
 
 /* dc refill from {l2,system} and dc copyback */
 static const struct pmc_masks k8_mask_dc[] = {
 	__K8MASK(invalid,	0),
 	__K8MASK(shared,	1),
 	__K8MASK(exclusive,	2),
 	__K8MASK(owner,		3),
 	__K8MASK(modified,	4),
 	NULLMASK
 };
 
 /* dc one bit ecc error */
 static const struct pmc_masks k8_mask_dobee[] = {
 	__K8MASK(scrubber,	0),
 	__K8MASK(piggyback,	1),
 	NULLMASK
 };
 
 /* dc dispatched prefetch instructions */
 static const struct pmc_masks k8_mask_ddpi[] = {
 	__K8MASK(load,	0),
 	__K8MASK(store,	1),
 	__K8MASK(nta,	2),
 	NULLMASK
 };
 
 /* dc dcache accesses by locks */
 static const struct pmc_masks k8_mask_dabl[] = {
 	__K8MASK(accesses,	0),
 	__K8MASK(misses,	1),
 	NULLMASK
 };
 
 /* bu internal l2 request */
 static const struct pmc_masks k8_mask_bilr[] = {
 	__K8MASK(ic-fill,	0),
 	__K8MASK(dc-fill,	1),
 	__K8MASK(tlb-reload,	2),
 	__K8MASK(tag-snoop,	3),
 	__K8MASK(cancelled,	4),
 	NULLMASK
 };
 
 /* bu fill request l2 miss */
 static const struct pmc_masks k8_mask_bfrlm[] = {
 	__K8MASK(ic-fill,	0),
 	__K8MASK(dc-fill,	1),
 	__K8MASK(tlb-reload,	2),
 	NULLMASK
 };
 
 /* bu fill into l2 */
 static const struct pmc_masks k8_mask_bfil[] = {
 	__K8MASK(dirty-l2-victim,	0),
 	__K8MASK(victim-from-l2,	1),
 	NULLMASK
 };
 
 /* fr retired fpu instructions */
 static const struct pmc_masks k8_mask_frfi[] = {
 	__K8MASK(x87,			0),
 	__K8MASK(mmx-3dnow,		1),
 	__K8MASK(packed-sse-sse2,	2),
 	__K8MASK(scalar-sse-sse2,	3),
 	NULLMASK
 };
 
 /* fr retired fastpath double op instructions */
 static const struct pmc_masks k8_mask_frfdoi[] = {
 	__K8MASK(low-op-pos-0,		0),
 	__K8MASK(low-op-pos-1,		1),
 	__K8MASK(low-op-pos-2,		2),
 	NULLMASK
 };
 
 /* fr fpu exceptions */
 static const struct pmc_masks k8_mask_ffe[] = {
 	__K8MASK(x87-reclass-microfaults,	0),
 	__K8MASK(sse-retype-microfaults,	1),
 	__K8MASK(sse-reclass-microfaults,	2),
 	__K8MASK(sse-and-x87-microtraps,	3),
 	NULLMASK
 };
 
 /* nb memory controller page access event */
 static const struct pmc_masks k8_mask_nmcpae[] = {
 	__K8MASK(page-hit,	0),
 	__K8MASK(page-miss,	1),
 	__K8MASK(page-conflict,	2),
 	NULLMASK
 };
 
 /* nb memory controller turnaround */
 static const struct pmc_masks k8_mask_nmct[] = {
 	__K8MASK(dimm-turnaround,		0),
 	__K8MASK(read-to-write-turnaround,	1),
 	__K8MASK(write-to-read-turnaround,	2),
 	NULLMASK
 };
 
 /* nb memory controller bypass saturation */
 static const struct pmc_masks k8_mask_nmcbs[] = {
 	__K8MASK(memory-controller-hi-pri-bypass,	0),
 	__K8MASK(memory-controller-lo-pri-bypass,	1),
 	__K8MASK(dram-controller-interface-bypass,	2),
 	__K8MASK(dram-controller-queue-bypass,		3),
 	NULLMASK
 };
 
 /* nb sized commands */
 static const struct pmc_masks k8_mask_nsc[] = {
 	__K8MASK(nonpostwrszbyte,	0),
 	__K8MASK(nonpostwrszdword,	1),
 	__K8MASK(postwrszbyte,		2),
 	__K8MASK(postwrszdword,		3),
 	__K8MASK(rdszbyte,		4),
 	__K8MASK(rdszdword,		5),
 	__K8MASK(rdmodwr,		6),
 	NULLMASK
 };
 
 /* nb probe result */
 static const struct pmc_masks k8_mask_npr[] = {
 	__K8MASK(probe-miss,		0),
 	__K8MASK(probe-hit,		1),
 	__K8MASK(probe-hit-dirty-no-memory-cancel, 2),
 	__K8MASK(probe-hit-dirty-with-memory-cancel, 3),
 	NULLMASK
 };
 
 /* nb hypertransport bus bandwidth */
 static const struct pmc_masks k8_mask_nhbb[] = { /* HT bus bandwidth */
 	__K8MASK(command,	0),
 	__K8MASK(data,	1),
 	__K8MASK(buffer-release, 2),
 	__K8MASK(nop,	3),
 	NULLMASK
 };
 
 #undef	__K8MASK
 
 #define	K8_KW_COUNT	"count"
 #define	K8_KW_EDGE	"edge"
 #define	K8_KW_INV	"inv"
 #define	K8_KW_MASK	"mask"
 #define	K8_KW_OS	"os"
 #define	K8_KW_USR	"usr"
 
 static int
 k8_allocate_pmc(enum pmc_event pe, char *ctrspec,
     struct pmc_op_pmcallocate *pmc_config)
 {
 	char		*e, *p, *q;
 	int		n;
 	uint32_t	count;
 	uint64_t	evmask;
 	const struct pmc_masks	*pm, *pmask;
 
 	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
 	pmc_config->pm_md.pm_amd.pm_amd_config = 0;
 
 	pmask = NULL;
 	evmask = 0;
 
 #define	__K8SETMASK(M) pmask = k8_mask_##M
 
 	/* setup parsing tables */
 	switch (pe) {
 	case PMC_EV_K8_FP_DISPATCHED_FPU_OPS:
 		__K8SETMASK(fdfo);
 		break;
 	case PMC_EV_K8_LS_SEGMENT_REGISTER_LOAD:
 		__K8SETMASK(lsrl);
 		break;
 	case PMC_EV_K8_LS_LOCKED_OPERATION:
 		__K8SETMASK(llo);
 		break;
 	case PMC_EV_K8_DC_REFILL_FROM_L2:
 	case PMC_EV_K8_DC_REFILL_FROM_SYSTEM:
 	case PMC_EV_K8_DC_COPYBACK:
 		__K8SETMASK(dc);
 		break;
 	case PMC_EV_K8_DC_ONE_BIT_ECC_ERROR:
 		__K8SETMASK(dobee);
 		break;
 	case PMC_EV_K8_DC_DISPATCHED_PREFETCH_INSTRUCTIONS:
 		__K8SETMASK(ddpi);
 		break;
 	case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS:
 		__K8SETMASK(dabl);
 		break;
 	case PMC_EV_K8_BU_INTERNAL_L2_REQUEST:
 		__K8SETMASK(bilr);
 		break;
 	case PMC_EV_K8_BU_FILL_REQUEST_L2_MISS:
 		__K8SETMASK(bfrlm);
 		break;
 	case PMC_EV_K8_BU_FILL_INTO_L2:
 		__K8SETMASK(bfil);
 		break;
 	case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS:
 		__K8SETMASK(frfi);
 		break;
 	case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS:
 		__K8SETMASK(frfdoi);
 		break;
 	case PMC_EV_K8_FR_FPU_EXCEPTIONS:
 		__K8SETMASK(ffe);
 		break;
 	case PMC_EV_K8_NB_MEMORY_CONTROLLER_PAGE_ACCESS_EVENT:
 		__K8SETMASK(nmcpae);
 		break;
 	case PMC_EV_K8_NB_MEMORY_CONTROLLER_TURNAROUND:
 		__K8SETMASK(nmct);
 		break;
 	case PMC_EV_K8_NB_MEMORY_CONTROLLER_BYPASS_SATURATION:
 		__K8SETMASK(nmcbs);
 		break;
 	case PMC_EV_K8_NB_SIZED_COMMANDS:
 		__K8SETMASK(nsc);
 		break;
 	case PMC_EV_K8_NB_PROBE_RESULT:
 		__K8SETMASK(npr);
 		break;
 	case PMC_EV_K8_NB_HT_BUS0_BANDWIDTH:
 	case PMC_EV_K8_NB_HT_BUS1_BANDWIDTH:
 	case PMC_EV_K8_NB_HT_BUS2_BANDWIDTH:
 		__K8SETMASK(nhbb);
 		break;
 
 	default:
 		break;		/* no options defined */
 	}
 
 	while ((p = strsep(&ctrspec, ",")) != NULL) {
 		if (KWPREFIXMATCH(p, K8_KW_COUNT "=")) {
 			q = strchr(p, '=');
 			if (*++q == '\0') /* skip '=' */
 				return (-1);
 
 			count = strtol(q, &e, 0);
 			if (e == q || *e != '\0')
 				return (-1);
 
 			pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
 			pmc_config->pm_md.pm_amd.pm_amd_config |=
 			    AMD_PMC_TO_COUNTER(count);
 
 		} else if (KWMATCH(p, K8_KW_EDGE)) {
 			pmc_config->pm_caps |= PMC_CAP_EDGE;
 		} else if (KWMATCH(p, K8_KW_INV)) {
 			pmc_config->pm_caps |= PMC_CAP_INVERT;
 		} else if (KWPREFIXMATCH(p, K8_KW_MASK "=")) {
 			if ((n = pmc_parse_mask(pmask, p, &evmask)) < 0)
 				return (-1);
 			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
 		} else if (KWMATCH(p, K8_KW_OS)) {
 			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
 		} else if (KWMATCH(p, K8_KW_USR)) {
 			pmc_config->pm_caps |= PMC_CAP_USER;
 		} else
 			return (-1);
 	}
 
 	/* other post processing */
 	switch (pe) {
 	case PMC_EV_K8_FP_DISPATCHED_FPU_OPS:
 	case PMC_EV_K8_FP_CYCLES_WITH_NO_FPU_OPS_RETIRED:
 	case PMC_EV_K8_FP_DISPATCHED_FPU_FAST_FLAG_OPS:
 	case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS:
 	case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS:
 	case PMC_EV_K8_FR_FPU_EXCEPTIONS:
 		/* XXX only available in rev B and later */
 		break;
 	case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS:
 		/* XXX only available in rev C and later */
 		break;
 	case PMC_EV_K8_LS_LOCKED_OPERATION:
 		/* XXX CPU Rev A,B evmask is to be zero */
 		if (evmask & (evmask - 1)) /* > 1 bit set */
 			return (-1);
 		if (evmask == 0) {
 			evmask = 0x01; /* Rev C and later: #instrs */
 			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
 		}
 		break;
 	default:
 		if (evmask == 0 && pmask != NULL) {
 			for (pm = pmask; pm->pm_name; pm++)
 				evmask |= pm->pm_value;
 			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
 		}
 	}
 
 	if (pmc_config->pm_caps & PMC_CAP_QUALIFIER)
 		pmc_config->pm_md.pm_amd.pm_amd_config =
 		    AMD_PMC_TO_UNITMASK(evmask);
 
 	return (0);
 }
 
 #endif
 
 #if	defined(__i386__) || defined(__amd64__)
 static int
 tsc_allocate_pmc(enum pmc_event pe, char *ctrspec,
     struct pmc_op_pmcallocate *pmc_config)
 {
 	if (pe != PMC_EV_TSC_TSC)
 		return (-1);
 
 	/* TSC events must be unqualified. */
 	if (ctrspec && *ctrspec != '\0')
 		return (-1);
 
 	pmc_config->pm_md.pm_amd.pm_amd_config = 0;
 	pmc_config->pm_caps |= PMC_CAP_READ;
 
 	return (0);
 }
 #endif
 
 static struct pmc_event_alias generic_aliases[] = {
 	EV_ALIAS("instructions",		"SOFT-CLOCK.HARD"),
 	EV_ALIAS(NULL, NULL)
 };
 
 static int
 soft_allocate_pmc(enum pmc_event pe, char *ctrspec,
     struct pmc_op_pmcallocate *pmc_config)
 {
 	(void)ctrspec;
 	(void)pmc_config;
 
 	if ((int)pe < PMC_EV_SOFT_FIRST || (int)pe > PMC_EV_SOFT_LAST)
 		return (-1);
 
 	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
 	return (0);
 }
 
 #if	defined(__arm__)
 #if	defined(__XSCALE__)
 
 static struct pmc_event_alias xscale_aliases[] = {
 	EV_ALIAS("branches",		"BRANCH_RETIRED"),
 	EV_ALIAS("branch-mispredicts",	"BRANCH_MISPRED"),
 	EV_ALIAS("dc-misses",		"DC_MISS"),
 	EV_ALIAS("ic-misses",		"IC_MISS"),
 	EV_ALIAS("instructions",	"INSTR_RETIRED"),
 	EV_ALIAS(NULL, NULL)
 };
 static int
 xscale_allocate_pmc(enum pmc_event pe, char *ctrspec __unused,
     struct pmc_op_pmcallocate *pmc_config __unused)
 {
 	switch (pe) {
 	default:
 		break;
 	}
 
 	return (0);
 }
 #endif
 
 static struct pmc_event_alias cortex_a8_aliases[] = {
 	EV_ALIAS("dc-misses",		"L1_DCACHE_REFILL"),
 	EV_ALIAS("ic-misses",		"L1_ICACHE_REFILL"),
 	EV_ALIAS("instructions",	"INSTR_EXECUTED"),
 	EV_ALIAS(NULL, NULL)
 };
 
 static struct pmc_event_alias cortex_a9_aliases[] = {
 	EV_ALIAS("dc-misses",		"L1_DCACHE_REFILL"),
 	EV_ALIAS("ic-misses",		"L1_ICACHE_REFILL"),
 	EV_ALIAS("instructions",	"INSTR_EXECUTED"),
 	EV_ALIAS(NULL, NULL)
 };
 
 static int
 armv7_allocate_pmc(enum pmc_event pe, char *ctrspec __unused,
     struct pmc_op_pmcallocate *pmc_config __unused)
 {
 	switch (pe) {
 	default:
 		break;
 	}
 
 	return (0);
 }
 #endif
 
 #if	defined(__aarch64__)
 static struct pmc_event_alias cortex_a53_aliases[] = {
 	EV_ALIAS(NULL, NULL)
 };
 static struct pmc_event_alias cortex_a57_aliases[] = {
 	EV_ALIAS(NULL, NULL)
 };
 static int
 arm64_allocate_pmc(enum pmc_event pe, char *ctrspec __unused,
     struct pmc_op_pmcallocate *pmc_config __unused)
 {
 	switch (pe) {
 	default:
 		break;
 	}
 
 	return (0);
 }
 #endif
 
 #if defined(__mips__)
 
+static struct pmc_event_alias beri_aliases[] = {
+	EV_ALIAS("instructions",	"INST"),
+	EV_ALIAS(NULL, NULL)
+};
+
 static struct pmc_event_alias mips24k_aliases[] = {
 	EV_ALIAS("instructions",	"INSTR_EXECUTED"),
 	EV_ALIAS("branches",		"BRANCH_COMPLETED"),
 	EV_ALIAS("branch-mispredicts",	"BRANCH_MISPRED"),
 	EV_ALIAS(NULL, NULL)
 };
 
 static struct pmc_event_alias mips74k_aliases[] = {
 	EV_ALIAS("instructions",	"INSTR_EXECUTED"),
 	EV_ALIAS("branches",		"BRANCH_INSNS"),
 	EV_ALIAS("branch-mispredicts",	"MISPREDICTED_BRANCH_INSNS"),
 	EV_ALIAS(NULL, NULL)
 };
 
 static struct pmc_event_alias octeon_aliases[] = {
 	EV_ALIAS("instructions",	"RET"),
 	EV_ALIAS("branches",		"BR"),
 	EV_ALIAS("branch-mispredicts",	"BRMIS"),
 	EV_ALIAS(NULL, NULL)
 };
 
 #define	MIPS_KW_OS		"os"
 #define	MIPS_KW_USR		"usr"
 #define	MIPS_KW_ANYTHREAD	"anythread"
 
 static int
 mips_allocate_pmc(enum pmc_event pe, char *ctrspec __unused,
 		  struct pmc_op_pmcallocate *pmc_config __unused)
 {
 	char *p;
 
 	(void) pe;
 
 	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
 	
 	while ((p = strsep(&ctrspec, ",")) != NULL) {
 		if (KWMATCH(p, MIPS_KW_OS))
 			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
 		else if (KWMATCH(p, MIPS_KW_USR))
 			pmc_config->pm_caps |= PMC_CAP_USER;
 		else if (KWMATCH(p, MIPS_KW_ANYTHREAD))
 			pmc_config->pm_caps |= (PMC_CAP_USER | PMC_CAP_SYSTEM);
 		else
 			return (-1);
 	}
 
 	return (0);
 }
 
 #endif /* __mips__ */
 
 #if defined(__powerpc__)
 
 static struct pmc_event_alias ppc7450_aliases[] = {
 	EV_ALIAS("instructions",	"INSTR_COMPLETED"),
 	EV_ALIAS("branches",		"BRANCHES_COMPLETED"),
 	EV_ALIAS("branch-mispredicts",	"MISPREDICTED_BRANCHES"),
 	EV_ALIAS(NULL, NULL)
 };
 
 static struct pmc_event_alias ppc970_aliases[] = {
 	EV_ALIAS("instructions", "INSTR_COMPLETED"),
 	EV_ALIAS("cycles",       "CYCLES"),
 	EV_ALIAS(NULL, NULL)
 };
 
 static struct pmc_event_alias e500_aliases[] = {
 	EV_ALIAS("instructions", "INSTR_COMPLETED"),
 	EV_ALIAS("cycles",       "CYCLES"),
 	EV_ALIAS(NULL, NULL)
 };
 
 #define	POWERPC_KW_OS		"os"
 #define	POWERPC_KW_USR		"usr"
 #define	POWERPC_KW_ANYTHREAD	"anythread"
 
 static int
 powerpc_allocate_pmc(enum pmc_event pe, char *ctrspec __unused,
 		     struct pmc_op_pmcallocate *pmc_config __unused)
 {
 	char *p;
 
 	(void) pe;
 
 	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
 	
 	while ((p = strsep(&ctrspec, ",")) != NULL) {
 		if (KWMATCH(p, POWERPC_KW_OS))
 			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
 		else if (KWMATCH(p, POWERPC_KW_USR))
 			pmc_config->pm_caps |= PMC_CAP_USER;
 		else if (KWMATCH(p, POWERPC_KW_ANYTHREAD))
 			pmc_config->pm_caps |= (PMC_CAP_USER | PMC_CAP_SYSTEM);
 		else
 			return (-1);
 	}
 
 	return (0);
 }
 
 #endif /* __powerpc__ */
 
 
 /*
  * Match an event name `name' with its canonical form.
  *
  * Matches are case insensitive and spaces, periods, underscores and
  * hyphen characters are considered to match each other.
  *
  * Returns 1 for a match, 0 otherwise.
  */
 
 static int
 pmc_match_event_name(const char *name, const char *canonicalname)
 {
 	int cc, nc;
 	const unsigned char *c, *n;
 
 	c = (const unsigned char *) canonicalname;
 	n = (const unsigned char *) name;
 
 	for (; (nc = *n) && (cc = *c); n++, c++) {
 
 		if ((nc == ' ' || nc == '_' || nc == '-' || nc == '.') &&
 		    (cc == ' ' || cc == '_' || cc == '-' || cc == '.'))
 			continue;
 
 		if (toupper(nc) == toupper(cc))
 			continue;
 
 
 		return (0);
 	}
 
 	if (*n == '\0' && *c == '\0')
 		return (1);
 
 	return (0);
 }
 
 /*
  * Match an event name against all the event named supported by a
  * PMC class.
  *
  * Returns an event descriptor pointer on match or NULL otherwise.
  */
 static const struct pmc_event_descr *
 pmc_match_event_class(const char *name,
     const struct pmc_class_descr *pcd)
 {
 	size_t n;
 	const struct pmc_event_descr *ev;
 
 	ev = pcd->pm_evc_event_table;
 	for (n = 0; n < pcd->pm_evc_event_table_size; n++, ev++)
 		if (pmc_match_event_name(name, ev->pm_ev_name))
 			return (ev);
 
 	return (NULL);
 }
 
 static int
 pmc_mdep_is_compatible_class(enum pmc_class pc)
 {
 	size_t n;
 
 	for (n = 0; n < pmc_mdep_class_list_size; n++)
 		if (pmc_mdep_class_list[n] == pc)
 			return (1);
 	return (0);
 }
 
 /*
  * API entry points
  */
 
 int
 pmc_allocate(const char *ctrspec, enum pmc_mode mode,
     uint32_t flags, int cpu, pmc_id_t *pmcid,
     uint64_t count)
 {
 	size_t n;
 	int retval;
 	char *r, *spec_copy;
 	const char *ctrname;
 	const struct pmc_event_descr *ev;
 	const struct pmc_event_alias *alias;
 	struct pmc_op_pmcallocate pmc_config;
 	const struct pmc_class_descr *pcd;
 
 	spec_copy = NULL;
 	retval    = -1;
 
 	if (mode != PMC_MODE_SS && mode != PMC_MODE_TS &&
 	    mode != PMC_MODE_SC && mode != PMC_MODE_TC) {
 		errno = EINVAL;
 		goto out;
 	}
 	bzero(&pmc_config, sizeof(pmc_config));
 	pmc_config.pm_cpu   = cpu;
 	pmc_config.pm_mode  = mode;
 	pmc_config.pm_flags = flags;
 	pmc_config.pm_count = count;
 	if (PMC_IS_SAMPLING_MODE(mode))
 		pmc_config.pm_caps |= PMC_CAP_INTERRUPT;
 	/*
 	 * Can we pull this straight from the pmu table?
 	 */
 	r = spec_copy = strdup(ctrspec);
 	ctrname = strsep(&r, ",");
 	if (pmc_pmu_enabled()) {
 		if (pmc_pmu_pmcallocate(ctrname, &pmc_config) == 0) {
 			if (PMC_CALL(PMCALLOCATE, &pmc_config) < 0) {
 				goto out;
 			}
 			retval = 0;
 			*pmcid = pmc_config.pm_pmcid;
 			goto out;
 		}
 		errx(EX_USAGE, "ERROR: pmc_pmu_allocate failed, check for ctrname %s\n", ctrname);
 	} else {
 		free(spec_copy);
 		spec_copy = NULL;
 	}
 
 	/* replace an event alias with the canonical event specifier */
 	if (pmc_mdep_event_aliases)
 		for (alias = pmc_mdep_event_aliases; alias->pm_alias; alias++)
 			if (!strcasecmp(ctrspec, alias->pm_alias)) {
 				spec_copy = strdup(alias->pm_spec);
 				break;
 			}
 
 	if (spec_copy == NULL)
 		spec_copy = strdup(ctrspec);
 
 	r = spec_copy;
 	ctrname = strsep(&r, ",");
 
 	/*
 	 * If a explicit class prefix was given by the user, restrict the
 	 * search for the event to the specified PMC class.
 	 */
 	ev = NULL;
 	for (n = 0; n < PMC_CLASS_TABLE_SIZE; n++) {
 		pcd = pmc_class_table[n];
 		if (pcd && pmc_mdep_is_compatible_class(pcd->pm_evc_class) &&
 		    strncasecmp(ctrname, pcd->pm_evc_name,
 				pcd->pm_evc_name_size) == 0) {
 			if ((ev = pmc_match_event_class(ctrname +
 			    pcd->pm_evc_name_size, pcd)) == NULL) {
 				errno = EINVAL;
 				goto out;
 			}
 			break;
 		}
 	}
 
 	/*
 	 * Otherwise, search for this event in all compatible PMC
 	 * classes.
 	 */
 	for (n = 0; ev == NULL && n < PMC_CLASS_TABLE_SIZE; n++) {
 		pcd = pmc_class_table[n];
 		if (pcd && pmc_mdep_is_compatible_class(pcd->pm_evc_class))
 			ev = pmc_match_event_class(ctrname, pcd);
 	}
 
 	if (ev == NULL) {
 		errno = EINVAL;
 		goto out;
 	}
 
 	pmc_config.pm_ev    = ev->pm_ev_code;
 	pmc_config.pm_class = pcd->pm_evc_class;
 
  	if (pcd->pm_evc_allocate_pmc(ev->pm_ev_code, r, &pmc_config) < 0) {
 		errno = EINVAL;
 		goto out;
 	}
 
 	if (PMC_CALL(PMCALLOCATE, &pmc_config) < 0)
 		goto out;
 
 	*pmcid = pmc_config.pm_pmcid;
 
 	retval = 0;
 
  out:
 	if (spec_copy)
 		free(spec_copy);
 
 	return (retval);
 }
 
 int
 pmc_attach(pmc_id_t pmc, pid_t pid)
 {
 	struct pmc_op_pmcattach pmc_attach_args;
 
 	pmc_attach_args.pm_pmc = pmc;
 	pmc_attach_args.pm_pid = pid;
 
 	return (PMC_CALL(PMCATTACH, &pmc_attach_args));
 }
 
 int
 pmc_capabilities(pmc_id_t pmcid, uint32_t *caps)
 {
 	unsigned int i;
 	enum pmc_class cl;
 
 	cl = PMC_ID_TO_CLASS(pmcid);
 	for (i = 0; i < cpu_info.pm_nclass; i++)
 		if (cpu_info.pm_classes[i].pm_class == cl) {
 			*caps = cpu_info.pm_classes[i].pm_caps;
 			return (0);
 		}
 	errno = EINVAL;
 	return (-1);
 }
 
 int
 pmc_configure_logfile(int fd)
 {
 	struct pmc_op_configurelog cla;
 
 	cla.pm_logfd = fd;
 	if (PMC_CALL(CONFIGURELOG, &cla) < 0)
 		return (-1);
 	return (0);
 }
 
 int
 pmc_cpuinfo(const struct pmc_cpuinfo **pci)
 {
 	if (pmc_syscall == -1) {
 		errno = ENXIO;
 		return (-1);
 	}
 
 	*pci = &cpu_info;
 	return (0);
 }
 
 int
 pmc_detach(pmc_id_t pmc, pid_t pid)
 {
 	struct pmc_op_pmcattach pmc_detach_args;
 
 	pmc_detach_args.pm_pmc = pmc;
 	pmc_detach_args.pm_pid = pid;
 	return (PMC_CALL(PMCDETACH, &pmc_detach_args));
 }
 
 int
 pmc_disable(int cpu, int pmc)
 {
 	struct pmc_op_pmcadmin ssa;
 
 	ssa.pm_cpu = cpu;
 	ssa.pm_pmc = pmc;
 	ssa.pm_state = PMC_STATE_DISABLED;
 	return (PMC_CALL(PMCADMIN, &ssa));
 }
 
 int
 pmc_enable(int cpu, int pmc)
 {
 	struct pmc_op_pmcadmin ssa;
 
 	ssa.pm_cpu = cpu;
 	ssa.pm_pmc = pmc;
 	ssa.pm_state = PMC_STATE_FREE;
 	return (PMC_CALL(PMCADMIN, &ssa));
 }
 
 /*
  * Return a list of events known to a given PMC class.  'cl' is the
  * PMC class identifier, 'eventnames' is the returned list of 'const
  * char *' pointers pointing to the names of the events. 'nevents' is
  * the number of event name pointers returned.
  *
  * The space for 'eventnames' is allocated using malloc(3).  The caller
  * is responsible for freeing this space when done.
  */
 int
 pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames,
     int *nevents)
 {
 	int count;
 	const char **names;
 	const struct pmc_event_descr *ev;
 
 	switch (cl)
 	{
 	case PMC_CLASS_IAF:
 		ev = iaf_event_table;
 		count = PMC_EVENT_TABLE_SIZE(iaf);
 		break;
 	case PMC_CLASS_TSC:
 		ev = tsc_event_table;
 		count = PMC_EVENT_TABLE_SIZE(tsc);
 		break;
 	case PMC_CLASS_K8:
 		ev = k8_event_table;
 		count = PMC_EVENT_TABLE_SIZE(k8);
 		break;
 	case PMC_CLASS_XSCALE:
 		ev = xscale_event_table;
 		count = PMC_EVENT_TABLE_SIZE(xscale);
 		break;
 	case PMC_CLASS_ARMV7:
 		switch (cpu_info.pm_cputype) {
 		default:
 		case PMC_CPU_ARMV7_CORTEX_A8:
 			ev = cortex_a8_event_table;
 			count = PMC_EVENT_TABLE_SIZE(cortex_a8);
 			break;
 		case PMC_CPU_ARMV7_CORTEX_A9:
 			ev = cortex_a9_event_table;
 			count = PMC_EVENT_TABLE_SIZE(cortex_a9);
 			break;
 		}
 		break;
 	case PMC_CLASS_ARMV8:
 		switch (cpu_info.pm_cputype) {
 		default:
 		case PMC_CPU_ARMV8_CORTEX_A53:
 			ev = cortex_a53_event_table;
 			count = PMC_EVENT_TABLE_SIZE(cortex_a53);
 			break;
 		case PMC_CPU_ARMV8_CORTEX_A57:
 			ev = cortex_a57_event_table;
 			count = PMC_EVENT_TABLE_SIZE(cortex_a57);
 			break;
 		}
 		break;
+	case PMC_CLASS_BERI:
+		ev = beri_event_table;
+		count = PMC_EVENT_TABLE_SIZE(beri);
+		break;
 	case PMC_CLASS_MIPS24K:
 		ev = mips24k_event_table;
 		count = PMC_EVENT_TABLE_SIZE(mips24k);
 		break;
 	case PMC_CLASS_MIPS74K:
 		ev = mips74k_event_table;
 		count = PMC_EVENT_TABLE_SIZE(mips74k);
 		break;
 	case PMC_CLASS_OCTEON:
 		ev = octeon_event_table;
 		count = PMC_EVENT_TABLE_SIZE(octeon);
 		break;
 	case PMC_CLASS_PPC7450:
 		ev = ppc7450_event_table;
 		count = PMC_EVENT_TABLE_SIZE(ppc7450);
 		break;
 	case PMC_CLASS_PPC970:
 		ev = ppc970_event_table;
 		count = PMC_EVENT_TABLE_SIZE(ppc970);
 		break;
 	case PMC_CLASS_E500:
 		ev = e500_event_table;
 		count = PMC_EVENT_TABLE_SIZE(e500);
 		break;
 	case PMC_CLASS_SOFT:
 		ev = soft_event_table;
 		count = soft_event_info.pm_nevent;
 		break;
 	default:
 		errno = EINVAL;
 		return (-1);
 	}
 
 	if ((names = malloc(count * sizeof(const char *))) == NULL)
 		return (-1);
 
 	*eventnames = names;
 	*nevents = count;
 
 	for (;count--; ev++, names++)
 		*names = ev->pm_ev_name;
 
 	return (0);
 }
 
 int
 pmc_flush_logfile(void)
 {
 	return (PMC_CALL(FLUSHLOG,0));
 }
 
 int
 pmc_close_logfile(void)
 {
 	return (PMC_CALL(CLOSELOG,0));
 }
 
 int
 pmc_get_driver_stats(struct pmc_driverstats *ds)
 {
 	struct pmc_op_getdriverstats gms;
 
 	if (PMC_CALL(GETDRIVERSTATS, &gms) < 0)
 		return (-1);
 
 	/* copy out fields in the current userland<->library interface */
 	ds->pm_intr_ignored    = gms.pm_intr_ignored;
 	ds->pm_intr_processed  = gms.pm_intr_processed;
 	ds->pm_intr_bufferfull = gms.pm_intr_bufferfull;
 	ds->pm_syscalls        = gms.pm_syscalls;
 	ds->pm_syscall_errors  = gms.pm_syscall_errors;
 	ds->pm_buffer_requests = gms.pm_buffer_requests;
 	ds->pm_buffer_requests_failed = gms.pm_buffer_requests_failed;
 	ds->pm_log_sweeps      = gms.pm_log_sweeps;
 	return (0);
 }
 
 int
 pmc_get_msr(pmc_id_t pmc, uint32_t *msr)
 {
 	struct pmc_op_getmsr gm;
 
 	gm.pm_pmcid = pmc;
 	if (PMC_CALL(PMCGETMSR, &gm) < 0)
 		return (-1);
 	*msr = gm.pm_msr;
 	return (0);
 }
 
 int
 pmc_init(void)
 {
 	int error, pmc_mod_id;
 	unsigned int n;
 	uint32_t abi_version;
 	struct module_stat pmc_modstat;
 	struct pmc_op_getcpuinfo op_cpu_info;
 #if defined(__amd64__) || defined(__i386__)
 	int cpu_has_iaf_counters;
 	unsigned int t;
 #endif
 
 	if (pmc_syscall != -1) /* already inited */
 		return (0);
 
 	/* retrieve the system call number from the KLD */
 	if ((pmc_mod_id = modfind(PMC_MODULE_NAME)) < 0)
 		return (-1);
 
 	pmc_modstat.version = sizeof(struct module_stat);
 	if ((error = modstat(pmc_mod_id, &pmc_modstat)) < 0)
 		return (-1);
 
 	pmc_syscall = pmc_modstat.data.intval;
 
 	/* check the kernel module's ABI against our compiled-in version */
 	abi_version = PMC_VERSION;
 	if (PMC_CALL(GETMODULEVERSION, &abi_version) < 0)
 		return (pmc_syscall = -1);
 
 	/* ignore patch & minor numbers for the comparison */
 	if ((abi_version & 0xFF000000) != (PMC_VERSION & 0xFF000000)) {
 		errno  = EPROGMISMATCH;
 		return (pmc_syscall = -1);
 	}
 
 	bzero(&op_cpu_info, sizeof(op_cpu_info));
 	if (PMC_CALL(GETCPUINFO, &op_cpu_info) < 0)
 		return (pmc_syscall = -1);
 
 	cpu_info.pm_cputype = op_cpu_info.pm_cputype;
 	cpu_info.pm_ncpu    = op_cpu_info.pm_ncpu;
 	cpu_info.pm_npmc    = op_cpu_info.pm_npmc;
 	cpu_info.pm_nclass  = op_cpu_info.pm_nclass;
 	for (n = 0; n < op_cpu_info.pm_nclass; n++)
 		memcpy(&cpu_info.pm_classes[n], &op_cpu_info.pm_classes[n],
 		    sizeof(cpu_info.pm_classes[n]));
 
 	pmc_class_table = malloc(PMC_CLASS_TABLE_SIZE *
 	    sizeof(struct pmc_class_descr *));
 
 	if (pmc_class_table == NULL)
 		return (-1);
 
 	for (n = 0; n < PMC_CLASS_TABLE_SIZE; n++)
 		pmc_class_table[n] = NULL;
 
 	/*
 	 * Get soft events list.
 	 */
 	soft_event_info.pm_class = PMC_CLASS_SOFT;
 	if (PMC_CALL(GETDYNEVENTINFO, &soft_event_info) < 0)
 		return (pmc_syscall = -1);
 
 	/* Map soft events to static list. */
 	for (n = 0; n < soft_event_info.pm_nevent; n++) {
 		soft_event_table[n].pm_ev_name =
 		    soft_event_info.pm_events[n].pm_ev_name;
 		soft_event_table[n].pm_ev_code =
 		    soft_event_info.pm_events[n].pm_ev_code;
 	}
 	soft_class_table_descr.pm_evc_event_table_size = \
 	    soft_event_info.pm_nevent;
 	soft_class_table_descr.pm_evc_event_table = \
 	    soft_event_table;
 
 	/*
 	 * Fill in the class table.
 	 */
 	n = 0;
 
 	/* Fill soft events information. */
 	pmc_class_table[n++] = &soft_class_table_descr;
 #if defined(__amd64__) || defined(__i386__)
 	if (cpu_info.pm_cputype != PMC_CPU_GENERIC)
 		pmc_class_table[n++] = &tsc_class_table_descr;
 
 	/*
  	 * Check if this CPU has fixed function counters.
 	 */
 	cpu_has_iaf_counters = 0;
 	for (t = 0; t < cpu_info.pm_nclass; t++)
 		if (cpu_info.pm_classes[t].pm_class == PMC_CLASS_IAF &&
 		    cpu_info.pm_classes[t].pm_num > 0)
 			cpu_has_iaf_counters = 1;
 #endif
 
 #define	PMC_MDEP_INIT(C) do {					\
 		pmc_mdep_event_aliases    = C##_aliases;	\
 		pmc_mdep_class_list  = C##_pmc_classes;		\
 		pmc_mdep_class_list_size =			\
 		    PMC_TABLE_SIZE(C##_pmc_classes);		\
 	} while (0)
 
 #define	PMC_MDEP_INIT_INTEL_V2(C) do {					\
 		PMC_MDEP_INIT(C);					\
 		pmc_class_table[n++] = &iaf_class_table_descr;		\
 		if (!cpu_has_iaf_counters) 				\
 			pmc_mdep_event_aliases =			\
 				C##_aliases_without_iaf;		\
 		pmc_class_table[n] = &C##_class_table_descr;		\
 	} while (0)
 
 	/* Configure the event name parser. */
 	switch (cpu_info.pm_cputype) {
 #if defined(__amd64__) || defined(__i386__)
 	case PMC_CPU_AMD_K8:
 		PMC_MDEP_INIT(k8);
 		pmc_class_table[n] = &k8_class_table_descr;
 		break;
 #endif
 	case PMC_CPU_GENERIC:
 		PMC_MDEP_INIT(generic);
 		break;
 #if defined(__arm__)
 #if defined(__XSCALE__)
 	case PMC_CPU_INTEL_XSCALE:
 		PMC_MDEP_INIT(xscale);
 		pmc_class_table[n] = &xscale_class_table_descr;
 		break;
 #endif
 	case PMC_CPU_ARMV7_CORTEX_A8:
 		PMC_MDEP_INIT(cortex_a8);
 		pmc_class_table[n] = &cortex_a8_class_table_descr;
 		break;
 	case PMC_CPU_ARMV7_CORTEX_A9:
 		PMC_MDEP_INIT(cortex_a9);
 		pmc_class_table[n] = &cortex_a9_class_table_descr;
 		break;
 #endif
 #if defined(__aarch64__)
 	case PMC_CPU_ARMV8_CORTEX_A53:
 		PMC_MDEP_INIT(cortex_a53);
 		pmc_class_table[n] = &cortex_a53_class_table_descr;
 		break;
 	case PMC_CPU_ARMV8_CORTEX_A57:
 		PMC_MDEP_INIT(cortex_a57);
 		pmc_class_table[n] = &cortex_a57_class_table_descr;
 		break;
 #endif
 #if defined(__mips__)
+	case PMC_CPU_MIPS_BERI:
+		PMC_MDEP_INIT(beri);
+		pmc_class_table[n] = &beri_class_table_descr;
+		break;
 	case PMC_CPU_MIPS_24K:
 		PMC_MDEP_INIT(mips24k);
 		pmc_class_table[n] = &mips24k_class_table_descr;
 		break;
 	case PMC_CPU_MIPS_74K:
 		PMC_MDEP_INIT(mips74k);
 		pmc_class_table[n] = &mips74k_class_table_descr;
 		break;
 	case PMC_CPU_MIPS_OCTEON:
 		PMC_MDEP_INIT(octeon);
 		pmc_class_table[n] = &octeon_class_table_descr;
 		break;
 #endif /* __mips__ */
 #if defined(__powerpc__)
 	case PMC_CPU_PPC_7450:
 		PMC_MDEP_INIT(ppc7450);
 		pmc_class_table[n] = &ppc7450_class_table_descr;
 		break;
 	case PMC_CPU_PPC_970:
 		PMC_MDEP_INIT(ppc970);
 		pmc_class_table[n] = &ppc970_class_table_descr;
 		break;
 	case PMC_CPU_PPC_E500:
 		PMC_MDEP_INIT(e500);
 		pmc_class_table[n] = &e500_class_table_descr;
 		break;
 #endif
 	default:
 		/*
 		 * Some kind of CPU this version of the library knows nothing
 		 * about.  This shouldn't happen since the abi version check
 		 * should have caught this.
 		 */
 #if defined(__amd64__) || defined(__i386__)
 		break;
 #endif
 		errno = ENXIO;
 		return (pmc_syscall = -1);
 	}
 
 	return (0);
 }
 
 const char *
 pmc_name_of_capability(enum pmc_caps cap)
 {
 	int i;
 
 	/*
 	 * 'cap' should have a single bit set and should be in
 	 * range.
 	 */
 	if ((cap & (cap - 1)) || cap < PMC_CAP_FIRST ||
 	    cap > PMC_CAP_LAST) {
 		errno = EINVAL;
 		return (NULL);
 	}
 
 	i = ffs(cap);
 	return (pmc_capability_names[i - 1]);
 }
 
 const char *
 pmc_name_of_class(enum pmc_class pc)
 {
 	size_t n;
 
 	for (n = 0; n < PMC_TABLE_SIZE(pmc_class_names); n++)
 		if (pc == pmc_class_names[n].pm_class)
 			return (pmc_class_names[n].pm_name);
 
 	errno = EINVAL;
 	return (NULL);
 }
 
 const char *
 pmc_name_of_cputype(enum pmc_cputype cp)
 {
 	size_t n;
 
 	for (n = 0; n < PMC_TABLE_SIZE(pmc_cputype_names); n++)
 		if (cp == pmc_cputype_names[n].pm_cputype)
 			return (pmc_cputype_names[n].pm_name);
 
 	errno = EINVAL;
 	return (NULL);
 }
 
 const char *
 pmc_name_of_disposition(enum pmc_disp pd)
 {
 	if ((int) pd >= PMC_DISP_FIRST &&
 	    pd <= PMC_DISP_LAST)
 		return (pmc_disposition_names[pd]);
 
 	errno = EINVAL;
 	return (NULL);
 }
 
 const char *
 _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu)
 {
 	const struct pmc_event_descr *ev, *evfence;
 
 	ev = evfence = NULL;
 	if (pe >= PMC_EV_K8_FIRST && pe <= PMC_EV_K8_LAST) {
 		ev = k8_event_table;
 		evfence = k8_event_table + PMC_EVENT_TABLE_SIZE(k8);
 	} else if (pe >= PMC_EV_XSCALE_FIRST && pe <= PMC_EV_XSCALE_LAST) {
 		ev = xscale_event_table;
 		evfence = xscale_event_table + PMC_EVENT_TABLE_SIZE(xscale);
 	} else if (pe >= PMC_EV_ARMV7_FIRST && pe <= PMC_EV_ARMV7_LAST) {
 		switch (cpu) {
 		case PMC_CPU_ARMV7_CORTEX_A8:
 			ev = cortex_a8_event_table;
 			evfence = cortex_a8_event_table + PMC_EVENT_TABLE_SIZE(cortex_a8);
 			break;
 		case PMC_CPU_ARMV7_CORTEX_A9:
 			ev = cortex_a9_event_table;
 			evfence = cortex_a9_event_table + PMC_EVENT_TABLE_SIZE(cortex_a9);
 			break;
 		default:	/* Unknown CPU type. */
 			break;
 		}
 	} else if (pe >= PMC_EV_ARMV8_FIRST && pe <= PMC_EV_ARMV8_LAST) {
 		switch (cpu) {
 		case PMC_CPU_ARMV8_CORTEX_A53:
 			ev = cortex_a53_event_table;
 			evfence = cortex_a53_event_table + PMC_EVENT_TABLE_SIZE(cortex_a53);
 			break;
 		case PMC_CPU_ARMV8_CORTEX_A57:
 			ev = cortex_a57_event_table;
 			evfence = cortex_a57_event_table + PMC_EVENT_TABLE_SIZE(cortex_a57);
 			break;
 		default:	/* Unknown CPU type. */
 			break;
 		}
+	} else if (pe >= PMC_EV_BERI_FIRST && pe <= PMC_EV_BERI_LAST) {
+		ev = beri_event_table;
+		evfence = beri_event_table + PMC_EVENT_TABLE_SIZE(beri);
 	} else if (pe >= PMC_EV_MIPS24K_FIRST && pe <= PMC_EV_MIPS24K_LAST) {
 		ev = mips24k_event_table;
 		evfence = mips24k_event_table + PMC_EVENT_TABLE_SIZE(mips24k);
 	} else if (pe >= PMC_EV_MIPS74K_FIRST && pe <= PMC_EV_MIPS74K_LAST) {
 		ev = mips74k_event_table;
 		evfence = mips74k_event_table + PMC_EVENT_TABLE_SIZE(mips74k);
 	} else if (pe >= PMC_EV_OCTEON_FIRST && pe <= PMC_EV_OCTEON_LAST) {
 		ev = octeon_event_table;
 		evfence = octeon_event_table + PMC_EVENT_TABLE_SIZE(octeon);
 	} else if (pe >= PMC_EV_PPC7450_FIRST && pe <= PMC_EV_PPC7450_LAST) {
 		ev = ppc7450_event_table;
 		evfence = ppc7450_event_table + PMC_EVENT_TABLE_SIZE(ppc7450);
 	} else if (pe >= PMC_EV_PPC970_FIRST && pe <= PMC_EV_PPC970_LAST) {
 		ev = ppc970_event_table;
 		evfence = ppc970_event_table + PMC_EVENT_TABLE_SIZE(ppc970);
 	} else if (pe >= PMC_EV_E500_FIRST && pe <= PMC_EV_E500_LAST) {
 		ev = e500_event_table;
 		evfence = e500_event_table + PMC_EVENT_TABLE_SIZE(e500);
 	} else if (pe == PMC_EV_TSC_TSC) {
 		ev = tsc_event_table;
 		evfence = tsc_event_table + PMC_EVENT_TABLE_SIZE(tsc);
 	} else if ((int)pe >= PMC_EV_SOFT_FIRST && (int)pe <= PMC_EV_SOFT_LAST) {
 		ev = soft_event_table;
 		evfence = soft_event_table + soft_event_info.pm_nevent;
 	}
 
 	for (; ev != evfence; ev++)
 		if (pe == ev->pm_ev_code)
 			return (ev->pm_ev_name);
 
 	return (NULL);
 }
 
 const char *
 pmc_name_of_event(enum pmc_event pe)
 {
 	const char *n;
 
 	if ((n = _pmc_name_of_event(pe, cpu_info.pm_cputype)) != NULL)
 		return (n);
 
 	errno = EINVAL;
 	return (NULL);
 }
 
 const char *
 pmc_name_of_mode(enum pmc_mode pm)
 {
 	if ((int) pm >= PMC_MODE_FIRST &&
 	    pm <= PMC_MODE_LAST)
 		return (pmc_mode_names[pm]);
 
 	errno = EINVAL;
 	return (NULL);
 }
 
 const char *
 pmc_name_of_state(enum pmc_state ps)
 {
 	if ((int) ps >= PMC_STATE_FIRST &&
 	    ps <= PMC_STATE_LAST)
 		return (pmc_state_names[ps]);
 
 	errno = EINVAL;
 	return (NULL);
 }
 
 int
 pmc_ncpu(void)
 {
 	if (pmc_syscall == -1) {
 		errno = ENXIO;
 		return (-1);
 	}
 
 	return (cpu_info.pm_ncpu);
 }
 
 int
 pmc_npmc(int cpu)
 {
 	if (pmc_syscall == -1) {
 		errno = ENXIO;
 		return (-1);
 	}
 
 	if (cpu < 0 || cpu >= (int) cpu_info.pm_ncpu) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (cpu_info.pm_npmc);
 }
 
 int
 pmc_pmcinfo(int cpu, struct pmc_pmcinfo **ppmci)
 {
 	int nbytes, npmc;
 	struct pmc_op_getpmcinfo *pmci;
 
 	if ((npmc = pmc_npmc(cpu)) < 0)
 		return (-1);
 
 	nbytes = sizeof(struct pmc_op_getpmcinfo) +
 	    npmc * sizeof(struct pmc_info);
 
 	if ((pmci = calloc(1, nbytes)) == NULL)
 		return (-1);
 
 	pmci->pm_cpu  = cpu;
 
 	if (PMC_CALL(GETPMCINFO, pmci) < 0) {
 		free(pmci);
 		return (-1);
 	}
 
 	/* kernel<->library, library<->userland interfaces are identical */
 	*ppmci = (struct pmc_pmcinfo *) pmci;
 	return (0);
 }
 
 int
 pmc_read(pmc_id_t pmc, pmc_value_t *value)
 {
 	struct pmc_op_pmcrw pmc_read_op;
 
 	pmc_read_op.pm_pmcid = pmc;
 	pmc_read_op.pm_flags = PMC_F_OLDVALUE;
 	pmc_read_op.pm_value = -1;
 
 	if (PMC_CALL(PMCRW, &pmc_read_op) < 0)
 		return (-1);
 
 	*value = pmc_read_op.pm_value;
 	return (0);
 }
 
 int
 pmc_release(pmc_id_t pmc)
 {
 	struct pmc_op_simple	pmc_release_args;
 
 	pmc_release_args.pm_pmcid = pmc;
 	return (PMC_CALL(PMCRELEASE, &pmc_release_args));
 }
 
 int
 pmc_rw(pmc_id_t pmc, pmc_value_t newvalue, pmc_value_t *oldvaluep)
 {
 	struct pmc_op_pmcrw pmc_rw_op;
 
 	pmc_rw_op.pm_pmcid = pmc;
 	pmc_rw_op.pm_flags = PMC_F_NEWVALUE | PMC_F_OLDVALUE;
 	pmc_rw_op.pm_value = newvalue;
 
 	if (PMC_CALL(PMCRW, &pmc_rw_op) < 0)
 		return (-1);
 
 	*oldvaluep = pmc_rw_op.pm_value;
 	return (0);
 }
 
 int
 pmc_set(pmc_id_t pmc, pmc_value_t value)
 {
 	struct pmc_op_pmcsetcount sc;
 
 	sc.pm_pmcid = pmc;
 	sc.pm_count = value;
 
 	if (PMC_CALL(PMCSETCOUNT, &sc) < 0)
 		return (-1);
 	return (0);
 }
 
 int
 pmc_start(pmc_id_t pmc)
 {
 	struct pmc_op_simple	pmc_start_args;
 
 	pmc_start_args.pm_pmcid = pmc;
 	return (PMC_CALL(PMCSTART, &pmc_start_args));
 }
 
 int
 pmc_stop(pmc_id_t pmc)
 {
 	struct pmc_op_simple	pmc_stop_args;
 
 	pmc_stop_args.pm_pmcid = pmc;
 	return (PMC_CALL(PMCSTOP, &pmc_stop_args));
 }
 
 int
 pmc_width(pmc_id_t pmcid, uint32_t *width)
 {
 	unsigned int i;
 	enum pmc_class cl;
 
 	cl = PMC_ID_TO_CLASS(pmcid);
 	for (i = 0; i < cpu_info.pm_nclass; i++)
 		if (cpu_info.pm_classes[i].pm_class == cl) {
 			*width = cpu_info.pm_classes[i].pm_width;
 			return (0);
 		}
 	errno = EINVAL;
 	return (-1);
 }
 
 int
 pmc_write(pmc_id_t pmc, pmc_value_t value)
 {
 	struct pmc_op_pmcrw pmc_write_op;
 
 	pmc_write_op.pm_pmcid = pmc;
 	pmc_write_op.pm_flags = PMC_F_NEWVALUE;
 	pmc_write_op.pm_value = value;
 	return (PMC_CALL(PMCRW, &pmc_write_op));
 }
 
 int
 pmc_writelog(uint32_t userdata)
 {
 	struct pmc_op_writelog wl;
 
 	wl.pm_userdata = userdata;
 	return (PMC_CALL(WRITELOG, &wl));
 }
Index: projects/clang900-import/sbin/ifconfig/ifmedia.c
===================================================================
--- projects/clang900-import/sbin/ifconfig/ifmedia.c	(revision 352536)
+++ projects/clang900-import/sbin/ifconfig/ifmedia.c	(revision 352537)
@@ -1,793 +1,815 @@
 /*	$NetBSD: ifconfig.c,v 1.34 1997/04/21 01:17:58 lukem Exp $	*/
 /* $FreeBSD$ */
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 1997 Jason R. Thorpe.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed for the NetBSD Project
  *	by Jason R. Thorpe.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1983, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_media.h>
 #include <net/route.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "ifconfig.h"
 
 static void	domediaopt(const char *, int, int);
 static int	get_media_subtype(int, const char *);
 static int	get_media_mode(int, const char *);
 static int	get_media_options(int, const char *);
 static int	lookup_media_word(struct ifmedia_description *, const char *);
 static void	print_media_word(int, int);
 static void	print_media_word_ifconfig(int);
 
 static struct ifmedia_description *get_toptype_desc(int);
 static struct ifmedia_type_to_subtype *get_toptype_ttos(int);
 static struct ifmedia_description *get_subtype_desc(int,
     struct ifmedia_type_to_subtype *ttos);
 
 #define	IFM_OPMODE(x) \
 	((x) & (IFM_IEEE80211_ADHOC | IFM_IEEE80211_HOSTAP | \
 	 IFM_IEEE80211_IBSS | IFM_IEEE80211_WDS | IFM_IEEE80211_MONITOR | \
 	 IFM_IEEE80211_MBSS))
 #define	IFM_IEEE80211_STA	0
 
 static void
 media_status(int s)
 {
 	struct ifmediareq ifmr;
+	struct ifdownreason ifdr;
 	int *media_list, i;
-	int xmedia = 1;
+	bool no_carrier, xmedia;
 
 	(void) memset(&ifmr, 0, sizeof(ifmr));
 	(void) strlcpy(ifmr.ifm_name, name, sizeof(ifmr.ifm_name));
+	xmedia = true;
 
 	/*
 	 * Check if interface supports extended media types.
 	 */
 	if (ioctl(s, SIOCGIFXMEDIA, (caddr_t)&ifmr) < 0)
-		xmedia = 0;
-	if (xmedia == 0 && ioctl(s, SIOCGIFMEDIA, (caddr_t)&ifmr) < 0) {
+		xmedia = false;
+	if (!xmedia && ioctl(s, SIOCGIFMEDIA, (caddr_t)&ifmr) < 0) {
 		/*
 		 * Interface doesn't support SIOC{G,S}IFMEDIA.
 		 */
 		return;
 	}
 
 	if (ifmr.ifm_count == 0) {
 		warnx("%s: no media types?", name);
 		return;
 	}
 
 	media_list = (int *)malloc(ifmr.ifm_count * sizeof(int));
 	if (media_list == NULL)
 		err(1, "malloc");
 	ifmr.ifm_ulist = media_list;
 
 	if (xmedia) {
 		if (ioctl(s, SIOCGIFXMEDIA, (caddr_t)&ifmr) < 0)
 			err(1, "SIOCGIFXMEDIA");
 	} else {
 		if (ioctl(s, SIOCGIFMEDIA, (caddr_t)&ifmr) < 0)
 			err(1, "SIOCGIFMEDIA");
 	}
 
 	printf("\tmedia: ");
 	print_media_word(ifmr.ifm_current, 1);
 	if (ifmr.ifm_active != ifmr.ifm_current) {
 		putchar(' ');
 		putchar('(');
 		print_media_word(ifmr.ifm_active, 0);
 		putchar(')');
 	}
 
 	putchar('\n');
 
 	if (ifmr.ifm_status & IFM_AVALID) {
+		no_carrier = false;
 		printf("\tstatus: ");
 		switch (IFM_TYPE(ifmr.ifm_active)) {
 		case IFM_ETHER:
 		case IFM_ATM:
 			if (ifmr.ifm_status & IFM_ACTIVE)
 				printf("active");
 			else
-				printf("no carrier");
+				no_carrier = true;
 			break;
 
 		case IFM_IEEE80211:
 			if (ifmr.ifm_status & IFM_ACTIVE) {
 				/* NB: only sta mode associates */
 				if (IFM_OPMODE(ifmr.ifm_active) == IFM_IEEE80211_STA)
 					printf("associated");
 				else
 					printf("running");
 			} else
-				printf("no carrier");
+				no_carrier = true;
 			break;
+		}
+		if (no_carrier) {
+			printf("no carrier");
+			memset(&ifdr, 0, sizeof(ifdr));
+			strlcpy(ifdr.ifdr_name, name, sizeof(ifdr.ifdr_name));
+			if (ioctl(s, SIOCGIFDOWNREASON, (caddr_t)&ifdr) == 0) {
+				switch (ifdr.ifdr_reason) {
+				case IFDR_REASON_MSG:
+					printf(" (%s)", ifdr.ifdr_msg);
+					break;
+				case IFDR_REASON_VENDOR:
+					printf(" (vendor code %d)",
+					    ifdr.ifdr_vendor);
+					break;
+				default:
+					break;
+				}
+			}
 		}
 		putchar('\n');
 	}
 
 	if (ifmr.ifm_count > 0 && supmedia) {
 		printf("\tsupported media:\n");
 		for (i = 0; i < ifmr.ifm_count; i++) {
 			printf("\t\t");
 			print_media_word_ifconfig(media_list[i]);
 			putchar('\n');
 		}
 	}
 
 	free(media_list);
 }
 
 struct ifmediareq *
 ifmedia_getstate(int s)
 {
 	static struct ifmediareq *ifmr = NULL;
 	int *mwords;
 	int xmedia = 1;
 
 	if (ifmr == NULL) {
 		ifmr = (struct ifmediareq *)malloc(sizeof(struct ifmediareq));
 		if (ifmr == NULL)
 			err(1, "malloc");
 
 		(void) memset(ifmr, 0, sizeof(struct ifmediareq));
 		(void) strlcpy(ifmr->ifm_name, name,
 		    sizeof(ifmr->ifm_name));
 
 		ifmr->ifm_count = 0;
 		ifmr->ifm_ulist = NULL;
 
 		/*
 		 * We must go through the motions of reading all
 		 * supported media because we need to know both
 		 * the current media type and the top-level type.
 		 */
 
 		if (ioctl(s, SIOCGIFXMEDIA, (caddr_t)ifmr) < 0) {
 			xmedia = 0;
 		}
 		if (xmedia == 0 && ioctl(s, SIOCGIFMEDIA, (caddr_t)ifmr) < 0) {
 			err(1, "SIOCGIFMEDIA");
 		}
 
 		if (ifmr->ifm_count == 0)
 			errx(1, "%s: no media types?", name);
 
 		mwords = (int *)malloc(ifmr->ifm_count * sizeof(int));
 		if (mwords == NULL)
 			err(1, "malloc");
   
 		ifmr->ifm_ulist = mwords;
 		if (xmedia) {
 			if (ioctl(s, SIOCGIFXMEDIA, (caddr_t)ifmr) < 0)
 				err(1, "SIOCGIFXMEDIA");
 		} else {
 			if (ioctl(s, SIOCGIFMEDIA, (caddr_t)ifmr) < 0)
 				err(1, "SIOCGIFMEDIA");
 		}
 	}
 
 	return ifmr;
 }
 
 static void
 setifmediacallback(int s, void *arg)
 {
 	struct ifmediareq *ifmr = (struct ifmediareq *)arg;
 	static int did_it = 0;
 
 	if (!did_it) {
 		ifr.ifr_media = ifmr->ifm_current;
 		if (ioctl(s, SIOCSIFMEDIA, (caddr_t)&ifr) < 0)
 			err(1, "SIOCSIFMEDIA (media)");
 		free(ifmr->ifm_ulist);
 		free(ifmr);
 		did_it = 1;
 	}
 }
 
 static void
 setmedia(const char *val, int d, int s, const struct afswtch *afp)
 {
 	struct ifmediareq *ifmr;
 	int subtype;
 
 	ifmr = ifmedia_getstate(s);
 
 	/*
 	 * We are primarily concerned with the top-level type.
 	 * However, "current" may be only IFM_NONE, so we just look
 	 * for the top-level type in the first "supported type"
 	 * entry.
 	 *
 	 * (I'm assuming that all supported media types for a given
 	 * interface will be the same top-level type..)
 	 */
 	subtype = get_media_subtype(IFM_TYPE(ifmr->ifm_ulist[0]), val);
 
 	strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
 	ifr.ifr_media = (ifmr->ifm_current & IFM_IMASK) |
 	    IFM_TYPE(ifmr->ifm_ulist[0]) | subtype;
 
 	ifmr->ifm_current = ifr.ifr_media;
 	callback_register(setifmediacallback, (void *)ifmr);
 }
 
 static void
 setmediaopt(const char *val, int d, int s, const struct afswtch *afp)
 {
 
 	domediaopt(val, 0, s);
 }
 
 static void
 unsetmediaopt(const char *val, int d, int s, const struct afswtch *afp)
 {
 
 	domediaopt(val, 1, s);
 }
 
 static void
 domediaopt(const char *val, int clear, int s)
 {
 	struct ifmediareq *ifmr;
 	int options;
 
 	ifmr = ifmedia_getstate(s);
 
 	options = get_media_options(IFM_TYPE(ifmr->ifm_ulist[0]), val);
 
 	strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
 	ifr.ifr_media = ifmr->ifm_current;
 	if (clear)
 		ifr.ifr_media &= ~options;
 	else {
 		if (options & IFM_HDX) {
 			ifr.ifr_media &= ~IFM_FDX;
 			options &= ~IFM_HDX;
 		}
 		ifr.ifr_media |= options;
 	}
 	ifmr->ifm_current = ifr.ifr_media;
 	callback_register(setifmediacallback, (void *)ifmr);
 }
 
 static void
 setmediainst(const char *val, int d, int s, const struct afswtch *afp)
 {
 	struct ifmediareq *ifmr;
 	int inst;
 
 	ifmr = ifmedia_getstate(s);
 
 	inst = atoi(val);
 	if (inst < 0 || inst > (int)IFM_INST_MAX)
 		errx(1, "invalid media instance: %s", val);
 
 	strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
 	ifr.ifr_media = (ifmr->ifm_current & ~IFM_IMASK) | inst << IFM_ISHIFT;
 
 	ifmr->ifm_current = ifr.ifr_media;
 	callback_register(setifmediacallback, (void *)ifmr);
 }
 
 static void
 setmediamode(const char *val, int d, int s, const struct afswtch *afp)
 {
 	struct ifmediareq *ifmr;
 	int mode;
 
 	ifmr = ifmedia_getstate(s);
 
 	mode = get_media_mode(IFM_TYPE(ifmr->ifm_ulist[0]), val);
 
 	strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
 	ifr.ifr_media = (ifmr->ifm_current & ~IFM_MMASK) | mode;
 
 	ifmr->ifm_current = ifr.ifr_media;
 	callback_register(setifmediacallback, (void *)ifmr);
 }
 
 /**********************************************************************
  * A good chunk of this is duplicated from sys/net/if_media.c
  **********************************************************************/
 
 static struct ifmedia_description ifm_type_descriptions[] =
     IFM_TYPE_DESCRIPTIONS;
 
 static struct ifmedia_description ifm_subtype_ethernet_descriptions[] =
     IFM_SUBTYPE_ETHERNET_DESCRIPTIONS;
 
 static struct ifmedia_description ifm_subtype_ethernet_aliases[] =
     IFM_SUBTYPE_ETHERNET_ALIASES;
 
 static struct ifmedia_description ifm_subtype_ethernet_option_descriptions[] =
     IFM_SUBTYPE_ETHERNET_OPTION_DESCRIPTIONS;
 
 static struct ifmedia_description ifm_subtype_ieee80211_descriptions[] =
     IFM_SUBTYPE_IEEE80211_DESCRIPTIONS;
 
 static struct ifmedia_description ifm_subtype_ieee80211_aliases[] =
     IFM_SUBTYPE_IEEE80211_ALIASES;
 
 static struct ifmedia_description ifm_subtype_ieee80211_option_descriptions[] =
     IFM_SUBTYPE_IEEE80211_OPTION_DESCRIPTIONS;
 
 struct ifmedia_description ifm_subtype_ieee80211_mode_descriptions[] =
     IFM_SUBTYPE_IEEE80211_MODE_DESCRIPTIONS;
 
 struct ifmedia_description ifm_subtype_ieee80211_mode_aliases[] =
     IFM_SUBTYPE_IEEE80211_MODE_ALIASES;
 
 static struct ifmedia_description ifm_subtype_atm_descriptions[] =
     IFM_SUBTYPE_ATM_DESCRIPTIONS;
 
 static struct ifmedia_description ifm_subtype_atm_aliases[] =
     IFM_SUBTYPE_ATM_ALIASES;
 
 static struct ifmedia_description ifm_subtype_atm_option_descriptions[] =
     IFM_SUBTYPE_ATM_OPTION_DESCRIPTIONS;
 
 static struct ifmedia_description ifm_subtype_shared_descriptions[] =
     IFM_SUBTYPE_SHARED_DESCRIPTIONS;
 
 static struct ifmedia_description ifm_subtype_shared_aliases[] =
     IFM_SUBTYPE_SHARED_ALIASES;
 
 static struct ifmedia_description ifm_shared_option_descriptions[] =
     IFM_SHARED_OPTION_DESCRIPTIONS;
 
 static struct ifmedia_description ifm_shared_option_aliases[] =
     IFM_SHARED_OPTION_ALIASES;
 
 struct ifmedia_type_to_subtype {
 	struct {
 		struct ifmedia_description *desc;
 		int alias;
 	} subtypes[5];
 	struct {
 		struct ifmedia_description *desc;
 		int alias;
 	} options[4];
 	struct {
 		struct ifmedia_description *desc;
 		int alias;
 	} modes[3];
 };
 
 /* must be in the same order as IFM_TYPE_DESCRIPTIONS */
 static struct ifmedia_type_to_subtype ifmedia_types_to_subtypes[] = {
 	{
 		{
 			{ &ifm_subtype_shared_descriptions[0], 0 },
 			{ &ifm_subtype_shared_aliases[0], 1 },
 			{ &ifm_subtype_ethernet_descriptions[0], 0 },
 			{ &ifm_subtype_ethernet_aliases[0], 1 },
 			{ NULL, 0 },
 		},
 		{
 			{ &ifm_shared_option_descriptions[0], 0 },
 			{ &ifm_shared_option_aliases[0], 1 },
 			{ &ifm_subtype_ethernet_option_descriptions[0], 0 },
 			{ NULL, 0 },
 		},
 		{
 			{ NULL, 0 },
 		},
 	},
 	{
 		{
 			{ &ifm_subtype_shared_descriptions[0], 0 },
 			{ &ifm_subtype_shared_aliases[0], 1 },
 			{ &ifm_subtype_ieee80211_descriptions[0], 0 },
 			{ &ifm_subtype_ieee80211_aliases[0], 1 },
 			{ NULL, 0 },
 		},
 		{
 			{ &ifm_shared_option_descriptions[0], 0 },
 			{ &ifm_shared_option_aliases[0], 1 },
 			{ &ifm_subtype_ieee80211_option_descriptions[0], 0 },
 			{ NULL, 0 },
 		},
 		{
 			{ &ifm_subtype_ieee80211_mode_descriptions[0], 0 },
 			{ &ifm_subtype_ieee80211_mode_aliases[0], 0 },
 			{ NULL, 0 },
 		},
 	},
 	{
 		{
 			{ &ifm_subtype_shared_descriptions[0], 0 },
 			{ &ifm_subtype_shared_aliases[0], 1 },
 			{ &ifm_subtype_atm_descriptions[0], 0 },
 			{ &ifm_subtype_atm_aliases[0], 1 },
 			{ NULL, 0 },
 		},
 		{
 			{ &ifm_shared_option_descriptions[0], 0 },
 			{ &ifm_shared_option_aliases[0], 1 },
 			{ &ifm_subtype_atm_option_descriptions[0], 0 },
 			{ NULL, 0 },
 		},
 		{
 			{ NULL, 0 },
 		},
 	},
 };
 
 static int
 get_media_subtype(int type, const char *val)
 {
 	struct ifmedia_description *desc;
 	struct ifmedia_type_to_subtype *ttos;
 	int rval, i;
 
 	/* Find the top-level interface type. */
 	for (desc = ifm_type_descriptions, ttos = ifmedia_types_to_subtypes;
 	    desc->ifmt_string != NULL; desc++, ttos++)
 		if (type == desc->ifmt_word)
 			break;
 	if (desc->ifmt_string == NULL)
 		errx(1, "unknown media type 0x%x", type);
 
 	for (i = 0; ttos->subtypes[i].desc != NULL; i++) {
 		rval = lookup_media_word(ttos->subtypes[i].desc, val);
 		if (rval != -1)
 			return (rval);
 	}
 	errx(1, "unknown media subtype: %s", val);
 	/*NOTREACHED*/
 }
 
 static int
 get_media_mode(int type, const char *val)
 {
 	struct ifmedia_description *desc;
 	struct ifmedia_type_to_subtype *ttos;
 	int rval, i;
 
 	/* Find the top-level interface type. */
 	for (desc = ifm_type_descriptions, ttos = ifmedia_types_to_subtypes;
 	    desc->ifmt_string != NULL; desc++, ttos++)
 		if (type == desc->ifmt_word)
 			break;
 	if (desc->ifmt_string == NULL)
 		errx(1, "unknown media mode 0x%x", type);
 
 	for (i = 0; ttos->modes[i].desc != NULL; i++) {
 		rval = lookup_media_word(ttos->modes[i].desc, val);
 		if (rval != -1)
 			return (rval);
 	}
 	return -1;
 }
 
 static int
 get_media_options(int type, const char *val)
 {
 	struct ifmedia_description *desc;
 	struct ifmedia_type_to_subtype *ttos;
 	char *optlist, *optptr;
 	int option = 0, i, rval = 0;
 
 	/* We muck with the string, so copy it. */
 	optlist = strdup(val);
 	if (optlist == NULL)
 		err(1, "strdup");
 
 	/* Find the top-level interface type. */
 	for (desc = ifm_type_descriptions, ttos = ifmedia_types_to_subtypes;
 	    desc->ifmt_string != NULL; desc++, ttos++)
 		if (type == desc->ifmt_word)
 			break;
 	if (desc->ifmt_string == NULL)
 		errx(1, "unknown media type 0x%x", type);
 
 	/*
 	 * Look up the options in the user-provided comma-separated
 	 * list.
 	 */
 	optptr = optlist;
 	for (; (optptr = strtok(optptr, ",")) != NULL; optptr = NULL) {
 		for (i = 0; ttos->options[i].desc != NULL; i++) {
 			option = lookup_media_word(ttos->options[i].desc, optptr);
 			if (option != -1)
 				break;
 		}
 		if (option == 0)
 			errx(1, "unknown option: %s", optptr);
 		rval |= option;
 	}
 
 	free(optlist);
 	return (rval);
 }
 
 static int
 lookup_media_word(struct ifmedia_description *desc, const char *val)
 {
 
 	for (; desc->ifmt_string != NULL; desc++)
 		if (strcasecmp(desc->ifmt_string, val) == 0)
 			return (desc->ifmt_word);
 
 	return (-1);
 }
 
 static struct ifmedia_description *get_toptype_desc(int ifmw)
 {
 	struct ifmedia_description *desc;
 
 	for (desc = ifm_type_descriptions; desc->ifmt_string != NULL; desc++)
 		if (IFM_TYPE(ifmw) == desc->ifmt_word)
 			break;
 
 	return desc;
 }
 
 static struct ifmedia_type_to_subtype *get_toptype_ttos(int ifmw)
 {
 	struct ifmedia_description *desc;
 	struct ifmedia_type_to_subtype *ttos;
 
 	for (desc = ifm_type_descriptions, ttos = ifmedia_types_to_subtypes;
 	    desc->ifmt_string != NULL; desc++, ttos++)
 		if (IFM_TYPE(ifmw) == desc->ifmt_word)
 			break;
 
 	return ttos;
 }
 
 static struct ifmedia_description *get_subtype_desc(int ifmw, 
     struct ifmedia_type_to_subtype *ttos)
 {
 	int i;
 	struct ifmedia_description *desc;
 
 	for (i = 0; ttos->subtypes[i].desc != NULL; i++) {
 		if (ttos->subtypes[i].alias)
 			continue;
 		for (desc = ttos->subtypes[i].desc;
 		    desc->ifmt_string != NULL; desc++) {
 			if (IFM_SUBTYPE(ifmw) == desc->ifmt_word)
 				return desc;
 		}
 	}
 
 	return NULL;
 }
 
 static struct ifmedia_description *get_mode_desc(int ifmw, 
     struct ifmedia_type_to_subtype *ttos)
 {
 	int i;
 	struct ifmedia_description *desc;
 
 	for (i = 0; ttos->modes[i].desc != NULL; i++) {
 		if (ttos->modes[i].alias)
 			continue;
 		for (desc = ttos->modes[i].desc;
 		    desc->ifmt_string != NULL; desc++) {
 			if (IFM_MODE(ifmw) == desc->ifmt_word)
 				return desc;
 		}
 	}
 
 	return NULL;
 }
 
 static void
 print_media_word(int ifmw, int print_toptype)
 {
 	struct ifmedia_description *desc;
 	struct ifmedia_type_to_subtype *ttos;
 	int seen_option = 0, i;
 
 	/* Find the top-level interface type. */
 	desc = get_toptype_desc(ifmw);
 	ttos = get_toptype_ttos(ifmw);
 	if (desc->ifmt_string == NULL) {
 		printf("<unknown type>");
 		return;
 	} else if (print_toptype) {
 		printf("%s", desc->ifmt_string);
 	}
 
 	/*
 	 * Don't print the top-level type; it's not like we can
 	 * change it, or anything.
 	 */
 
 	/* Find subtype. */
 	desc = get_subtype_desc(ifmw, ttos);
 	if (desc == NULL) {
 		printf("<unknown subtype>");
 		return;
 	}
 
 	if (print_toptype)
 		putchar(' ');
 
 	printf("%s", desc->ifmt_string);
 
 	if (print_toptype) {
 		desc = get_mode_desc(ifmw, ttos);
 		if (desc != NULL && strcasecmp("autoselect", desc->ifmt_string))
 			printf(" mode %s", desc->ifmt_string);
 	}
 
 	/* Find options. */
 	for (i = 0; ttos->options[i].desc != NULL; i++) {
 		if (ttos->options[i].alias)
 			continue;
 		for (desc = ttos->options[i].desc;
 		    desc->ifmt_string != NULL; desc++) {
 			if (ifmw & desc->ifmt_word) {
 				if (seen_option == 0)
 					printf(" <");
 				printf("%s%s", seen_option++ ? "," : "",
 				    desc->ifmt_string);
 			}
 		}
 	}
 	printf("%s", seen_option ? ">" : "");
 
 	if (print_toptype && IFM_INST(ifmw) != 0)
 		printf(" instance %d", IFM_INST(ifmw));
 }
 
 static void
 print_media_word_ifconfig(int ifmw)
 {
 	struct ifmedia_description *desc;
 	struct ifmedia_type_to_subtype *ttos;
 	int seen_option = 0, i;
 
 	/* Find the top-level interface type. */
 	desc = get_toptype_desc(ifmw);
 	ttos = get_toptype_ttos(ifmw);
 	if (desc->ifmt_string == NULL) {
 		printf("<unknown type>");
 		return;
 	}
 
 	/*
 	 * Don't print the top-level type; it's not like we can
 	 * change it, or anything.
 	 */
 
 	/* Find subtype. */
 	desc = get_subtype_desc(ifmw, ttos);
 	if (desc == NULL) {
 		printf("<unknown subtype>");
 		return;
 	}
 
 	printf("media %s", desc->ifmt_string);
 
 	desc = get_mode_desc(ifmw, ttos);
 	if (desc != NULL)
 		printf(" mode %s", desc->ifmt_string);
 
 	/* Find options. */
 	for (i = 0; ttos->options[i].desc != NULL; i++) {
 		if (ttos->options[i].alias)
 			continue;
 		for (desc = ttos->options[i].desc;
 		    desc->ifmt_string != NULL; desc++) {
 			if (ifmw & desc->ifmt_word) {
 				if (seen_option == 0)
 					printf(" mediaopt ");
 				printf("%s%s", seen_option++ ? "," : "",
 				    desc->ifmt_string);
 			}
 		}
 	}
 
 	if (IFM_INST(ifmw) != 0)
 		printf(" instance %d", IFM_INST(ifmw));
 }
 
 /**********************************************************************
  * ...until here.
  **********************************************************************/
 
 static struct cmd media_cmds[] = {
 	DEF_CMD_ARG("media",	setmedia),
 	DEF_CMD_ARG("mode",	setmediamode),
 	DEF_CMD_ARG("mediaopt",	setmediaopt),
 	DEF_CMD_ARG("-mediaopt",unsetmediaopt),
 	DEF_CMD_ARG("inst",	setmediainst),
 	DEF_CMD_ARG("instance",	setmediainst),
 };
 static struct afswtch af_media = {
 	.af_name	= "af_media",
 	.af_af		= AF_UNSPEC,
 	.af_other_status = media_status,
 };
 
 static __constructor void
 ifmedia_ctor(void)
 {
 	size_t i;
 
 	for (i = 0; i < nitems(media_cmds);  i++)
 		cmd_register(&media_cmds[i]);
 	af_register(&af_media);
 }
Index: projects/clang900-import/share/man/man5/src.conf.5
===================================================================
--- projects/clang900-import/share/man/man5/src.conf.5	(revision 352536)
+++ projects/clang900-import/share/man/man5/src.conf.5	(revision 352537)
@@ -1,2009 +1,2020 @@
 .\" DO NOT EDIT-- this file is @generated by tools/build/options/makeman.
 .\" $FreeBSD$
-.Dd August 16, 2019
+.Dd September 17, 2019
 .Dt SRC.CONF 5
 .Os
 .Sh NAME
 .Nm src.conf
 .Nd "source build options"
 .Sh DESCRIPTION
 The
 .Nm
 file contains settings that will apply to every build involving the
 .Fx
 source tree; see
 .Xr build 7 .
 .Pp
 The
 .Nm
 file uses the standard makefile syntax.
 However,
 .Nm
 should not specify any dependencies to
 .Xr make 1 .
 Instead,
 .Nm
 is to set
 .Xr make 1
 variables that control the aspects of how the system builds.
 .Pp
 The default location of
 .Nm
 is
 .Pa /etc/src.conf ,
 though an alternative location can be specified in the
 .Xr make 1
 variable
 .Va SRCCONF .
 Overriding the location of
 .Nm
 may be necessary if the system-wide settings are not suitable
 for a particular build.
 For instance, setting
 .Va SRCCONF
 to
 .Pa /dev/null
 effectively resets all build controls to their defaults.
 .Pp
 The only purpose of
 .Nm
 is to control the compilation of the
 .Fx
 source code, which is usually located in
 .Pa /usr/src .
 As a rule, the system administrator creates
 .Nm
 when the values of certain control variables need to be changed
 from their defaults.
 .Pp
 In addition, control variables can be specified
 for a particular build via the
 .Fl D
 option of
 .Xr make 1
 or in its environment; see
 .Xr environ 7 .
 .Pp
 The environment of
 .Xr make 1
 for the build can be controlled via the
 .Va SRC_ENV_CONF
 variable, which defaults to
 .Pa /etc/src-env.conf .
 Some examples that may only be set in this file are
 .Va WITH_DIRDEPS_BUILD ,
 and
 .Va WITH_META_MODE ,
 and
 .Va MAKEOBJDIRPREFIX
 as they are environment-only variables.
 .Pp
 The values of variables are ignored regardless of their setting;
 even if they would be set to
 .Dq Li FALSE
 or
 .Dq Li NO .
 The presence of an option causes
 it to be honored by
 .Xr make 1 .
 .Pp
 This list provides a name and short description for variables
 that can be used for source builds.
 .Bl -tag -width indent
 .It Va WITHOUT_ACCT
 Set to not build process accounting tools such as
 .Xr accton 8
 and
 .Xr sa 8 .
 .It Va WITHOUT_ACPI
 Set to not build
 .Xr acpiconf 8 ,
 .Xr acpidump 8
 and related programs.
 .It Va WITHOUT_AMD
 Set to not build
 .Xr amd 8 ,
 and related programs.
 .It Va WITHOUT_APM
 Set to not build
 .Xr apm 8 ,
 .Xr apmd 8
 and related programs.
 .It Va WITHOUT_ASSERT_DEBUG
 Set to compile programs and libraries without the
 .Xr assert 3
 checks.
 .It Va WITHOUT_AT
 Set to not build
 .Xr at 1
 and related utilities.
 .It Va WITHOUT_ATM
 Set to not build
 programs and libraries related to ATM networking.
 .It Va WITHOUT_AUDIT
 Set to not build audit support into system programs.
 .It Va WITHOUT_AUTHPF
 Set to not build
 .Xr authpf 8 .
 .It Va WITHOUT_AUTOFS
 Set to not build
 .Xr autofs 5
 related programs, libraries, and kernel modules.
 .It Va WITHOUT_AUTO_OBJ
 Disable automatic creation of objdirs.
 This is enabled by default if the wanted OBJDIR is writable by the current user.
 .Pp
 This must be set in the environment, make command line, or
 .Pa /etc/src-env.conf ,
 not
 .Pa /etc/src.conf .
 .It Va WITH_BEARSSL
 Build the BearSSL library.
 .Pp
 BearSSL is a tiny SSL library suitable for embedded environments.
 For details see
 .Lk http://www.BearSSL.org/
 .Pp
 This library is currently only used to perform
 signature verification and related operations
 for Verified Exec and
 .Xr loader 8 .
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITH_LOADER_EFI_SECUREBOOT
 (unless
 .Va WITHOUT_LOADER_EFI_SECUREBOOT
 is set explicitly)
 .It Va WITH_LOADER_VERIEXEC
 (unless
 .Va WITHOUT_LOADER_VERIEXEC
 is set explicitly)
 .It Va WITH_VERIEXEC
 (unless
 .Va WITHOUT_VERIEXEC
 is set explicitly)
 .El
 .It Va WITHOUT_BHYVE
 Set to not build or install
 .Xr bhyve 8 ,
 associated utilities, and examples.
 .Pp
 This option only affects amd64/amd64.
 .It Va WITH_BIND_NOW
 Build all binaries with the
 .Dv DF_BIND_NOW
 flag set to indicate that the run-time loader should perform all relocation
 processing at process startup rather than on demand.
 .It Va WITHOUT_BINUTILS
 Set to not build or install GNU
 .Xr as 1 ,
 .Xr objdump 1 ,
 and for some CPU architectures
 .Xr ld.bfd 1
 as part
 of the normal system build.
 The resulting system cannot build programs from source.
 .Pp
 This is a default setting on
 arm64/aarch64 and riscv/riscv64.
 .It Va WITH_BINUTILS
 Set to build and install GNU
 .Xr as 1 ,
 .Xr objdump 1 ,
 and for some CPU architectures
 .Xr ld.bfd 1
 as part
 of the normal system build.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64.
 .It Va WITHOUT_BINUTILS_BOOTSTRAP
 Set to not build binutils (as, ld, and objdump)
 as part of the bootstrap process.
 .Bf -symbolic
 The option does not work for build targets unless some alternative
 toolchain is provided.
 .Ef
 .Pp
 This is a default setting on
 arm64/aarch64 and riscv/riscv64.
 .It Va WITH_BINUTILS_BOOTSTRAP
 Set build binutils (as, ld, and objdump)
 as part of the bootstrap process.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64.
 .It Va WITHOUT_BLACKLIST
 Set this if you do not want to build
 .Xr blacklistd 8
 and
 .Xr blacklistctl 8 .
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_BLACKLIST_SUPPORT
 (unless
 .Va WITH_BLACKLIST_SUPPORT
 is set explicitly)
 .El
 .It Va WITHOUT_BLACKLIST_SUPPORT
 Set to build some programs without
 .Xr libblacklist 3
 support, like
 .Xr fingerd 8 ,
 .Xr ftpd 8 ,
 and
 .Xr sshd 8 .
 .It Va WITHOUT_BLUETOOTH
 Set to not build Bluetooth related kernel modules, programs and libraries.
 .It Va WITHOUT_BOOT
 Set to not build the boot blocks and loader.
 .It Va WITHOUT_BOOTPARAMD
 Set to not build or install
 .Xr bootparamd 8 .
 .It Va WITHOUT_BOOTPD
 Set to not build or install
 .Xr bootpd 8 .
 .It Va WITHOUT_BSDINSTALL
 Set to not build
 .Xr bsdinstall 8 ,
 .Xr sade 8 ,
 and related programs.
 .It Va WITHOUT_BSD_CPIO
 Set to not build the BSD licensed version of cpio based on
 .Xr libarchive 3 .
 .It Va WITHOUT_BSD_CRTBEGIN
 Disable the BSD licensed
 .Pa crtbegin.o
 and
 .Pa crtend.o .
 .Pp
 This is a default setting on
 sparc64/sparc64.
 .It Va WITH_BSD_CRTBEGIN
 Enable the BSD licensed
 .Pa crtbegin.o
 and
 .Pa crtend.o .
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and riscv/riscv64.
 .It Va WITH_BSD_GREP
 Install BSD-licensed grep as '[ef]grep' instead of GNU grep.
 .It Va WITHOUT_BSNMP
 Set to not build or install
 .Xr bsnmpd 1
 and related libraries and data files.
 .It Va WITHOUT_BZIP2
 Set to not build contributed bzip2 software as a part of the base system.
 .Bf -symbolic
 The option has no effect yet.
 .Ef
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_BZIP2_SUPPORT
 (unless
 .Va WITH_BZIP2_SUPPORT
 is set explicitly)
 .El
 .It Va WITHOUT_BZIP2_SUPPORT
 Set to build some programs without optional bzip2 support.
 .It Va WITHOUT_CALENDAR
 Set to not build
 .Xr calendar 1 .
 .It Va WITHOUT_CAPSICUM
 Set to not build Capsicum support into system programs.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_CASPER
 .El
 .It Va WITHOUT_CASPER
 Set to not build Casper program and related libraries.
 .It Va WITH_CCACHE_BUILD
 Set to use
 .Xr ccache 1
 for the build.
 No configuration is required except to install the
 .Sy devel/ccache
 package.
 When using with
 .Xr distcc 1 ,
 set
 .Sy CCACHE_PREFIX=/usr/local/bin/distcc .
 The default cache directory of
 .Pa $HOME/.ccache
 will be used, which can be overridden by setting
 .Sy CCACHE_DIR .
 The
 .Sy CCACHE_COMPILERCHECK
 option defaults to
 .Sy content
 when using the in-tree bootstrap compiler,
 and
 .Sy mtime
 when using an external compiler.
 The
 .Sy CCACHE_CPP2
 option is used for Clang but not GCC.
 .Pp
 Sharing a cache between multiple work directories requires using a layout
 similar to
 .Pa /some/prefix/src
 .Pa /some/prefix/obj
 and an environment such as:
 .Bd -literal -offset indent
 CCACHE_BASEDIR='${SRCTOP:H}' MAKEOBJDIRPREFIX='${SRCTOP:H}/obj'
 .Ed
 .Pp
 See
 .Xr ccache 1
 for more configuration options.
 .It Va WITHOUT_CCD
 Set to not build
 .Xr geom_ccd 4
 and related utilities.
 .It Va WITHOUT_CDDL
 Set to not build code licensed under Sun's CDDL.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_CTF
 .It
 .Va WITHOUT_LOADER_ZFS
 .It
 .Va WITHOUT_ZFS
 .El
 .It Va WITHOUT_CLANG
 Set to not build the Clang C/C++ compiler during the regular phase of the build.
 .Pp
 This is a default setting on
 riscv/riscv64 and sparc64/sparc64.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_CLANG_EXTRAS
 .It
 .Va WITHOUT_CLANG_FULL
 .It
 .Va WITHOUT_LLVM_COV
 .El
 .Pp
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_LLVM_TARGET_AARCH64
 (unless
 .Va WITH_LLVM_TARGET_AARCH64
 is set explicitly)
 .It Va WITHOUT_LLVM_TARGET_ALL
 (unless
 .Va WITH_LLVM_TARGET_ALL
 is set explicitly)
 .It Va WITHOUT_LLVM_TARGET_ARM
 (unless
 .Va WITH_LLVM_TARGET_ARM
 is set explicitly)
 .It Va WITHOUT_LLVM_TARGET_MIPS
 (unless
 .Va WITH_LLVM_TARGET_MIPS
 is set explicitly)
 .It Va WITHOUT_LLVM_TARGET_POWERPC
 (unless
 .Va WITH_LLVM_TARGET_POWERPC
 is set explicitly)
 .It Va WITHOUT_LLVM_TARGET_SPARC
 (unless
 .Va WITH_LLVM_TARGET_SPARC
 is set explicitly)
 .It Va WITHOUT_LLVM_TARGET_X86
 (unless
 .Va WITH_LLVM_TARGET_X86
 is set explicitly)
 .El
 .It Va WITH_CLANG
 Set to build the Clang C/C++ compiler during the normal phase of the build.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe.
 .It Va WITHOUT_CLANG_BOOTSTRAP
 Set to not build the Clang C/C++ compiler during the bootstrap phase of
 the build.
 To be able to build the system, either gcc or clang bootstrap must be
 enabled unless an alternate compiler is provided via XCC.
 .Pp
 This is a default setting on
 mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_CLANG_BOOTSTRAP
 Set to build the Clang C/C++ compiler during the bootstrap phase of the build.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64 and i386/i386.
 .It Va WITH_CLANG_EXTRAS
 Set to build additional clang and llvm tools, such as bugpoint and
 clang-format.
 .It Va WITHOUT_CLANG_FULL
 Set to avoid building the ARCMigrate, Rewriter and StaticAnalyzer components of
 the Clang C/C++ compiler.
 .Pp
 This is a default setting on
 riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_CLANG_FULL
 Set to build the ARCMigrate, Rewriter and StaticAnalyzer components of the
 Clang C/C++ compiler.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe.
 .It Va WITHOUT_CLANG_IS_CC
 Set to install the GCC compiler as
 .Pa /usr/bin/cc ,
 .Pa /usr/bin/c++
 and
 .Pa /usr/bin/cpp .
 .Pp
 This is a default setting on
 mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_CLANG_IS_CC
 Set to install the Clang C/C++ compiler as
 .Pa /usr/bin/cc ,
 .Pa /usr/bin/c++
 and
 .Pa /usr/bin/cpp .
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64 and i386/i386.
 .It Va WITHOUT_CPP
 Set to not build
 .Xr cpp 1 .
 .It Va WITHOUT_CROSS_COMPILER
 Set to not build any cross compiler in the cross-tools stage of buildworld.
 When compiling a different version of
 .Fx
 than what is installed on the system, provide an alternate
 compiler with XCC to ensure success.
 When compiling with an identical version of
 .Fx
 to the host, this option may be safely used.
 This option may also be safe when the host version of
 .Fx
 is close to the sources being built, but all bets are off if there have
 been any changes to the toolchain between the versions.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_BINUTILS_BOOTSTRAP
 .It
 .Va WITHOUT_CLANG_BOOTSTRAP
 .It
 .Va WITHOUT_ELFTOOLCHAIN_BOOTSTRAP
 .It
 .Va WITHOUT_GCC_BOOTSTRAP
 .It
 .Va WITHOUT_LLD_BOOTSTRAP
 .El
 .It Va WITHOUT_CRYPT
 Set to not build any crypto code.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_KERBEROS
 .It
 .Va WITHOUT_OPENSSH
 .It
 .Va WITHOUT_OPENSSL
 .El
 .Pp
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_GSSAPI
 (unless
 .Va WITH_GSSAPI
 is set explicitly)
 .El
 .It Va WITH_CTF
 Set to compile with CTF (Compact C Type Format) data.
 CTF data encapsulates a reduced form of debugging information
 similar to DWARF and the venerable stabs and is required for DTrace.
 .It Va WITHOUT_CUSE
 Set to not build CUSE-related programs and libraries.
 .It Va WITHOUT_CXGBETOOL
 Set to not build
 .Xr cxgbetool 8
 .Pp
 This is a default setting on
 arm/arm, arm/armv6, arm/armv7, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpcspe and riscv/riscv64.
 .It Va WITH_CXGBETOOL
 Set to build
 .Xr cxgbetool 8
 .Pp
 This is a default setting on
 amd64/amd64, arm64/aarch64, i386/i386, powerpc/powerpc64 and sparc64/sparc64.
 .It Va WITHOUT_CXX
 Set to not build
 .Xr c++ 1
 and related libraries.
 It will also prevent building of
 .Xr gperf 1
 and
 .Xr devd 8 .
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_CLANG
 .It
 .Va WITHOUT_CLANG_EXTRAS
 .It
 .Va WITHOUT_CLANG_FULL
 .It
 .Va WITHOUT_DTRACE_TESTS
 .It
 .Va WITHOUT_GNUCXX
 .It
 .Va WITHOUT_LLVM_COV
 .It
 .Va WITHOUT_TESTS
 .El
 .It Va WITHOUT_DEBUG_FILES
 Set to avoid building or installing standalone debug files for each
 executable binary and shared library.
 .It Va WITHOUT_DIALOG
 Set to not build
 .Xr dialog 1 ,
 .Xr dialog 3 ,
 .Xr dpv 1 ,
 and
 .Xr dpv 3 .
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_BSDINSTALL
 .El
 .It Va WITHOUT_DICT
 Set to not build the Webster dictionary files.
 .It Va WITH_DIRDEPS_BUILD
 This is an experimental build system.
 For details see
 http://www.crufty.net/sjg/docs/freebsd-meta-mode.htm.
 Build commands can be seen from the top-level with:
 .Dl make show-valid-targets
 The build is driven by dirdeps.mk using
 .Va DIRDEPS
 stored in
 Makefile.depend files found in each directory.
 .Pp
 The build can be started from anywhere, and behaves the same.
 The initial instance of
 .Xr make 1
 recursively reads
 .Va DIRDEPS
 from
 .Pa Makefile.depend ,
 computing a graph of tree dependencies from the current origin.
 Setting
 .Va NO_DIRDEPS
 skips checking dirdep dependencies and will only build in the current
 and child directories.
 .Va NO_DIRDEPS_BELOW
 skips building any dirdeps and only build the current directory.
 .Pp
 This also utilizes the
 .Va WITH_META_MODE
 logic for incremental builds.
 .Pp
 The build hides commands executed unless
 .Va NO_SILENT
 is defined.
 .Pp
 Note that there is currently no mass install feature for this.
 .Pp
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITH_INSTALL_AS_USER
 .El
 .Pp
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITH_META_MODE
 (unless
 .Va WITHOUT_META_MODE
 is set explicitly)
 .It Va WITH_STAGING
 (unless
 .Va WITHOUT_STAGING
 is set explicitly)
 .It Va WITH_STAGING_MAN
 (unless
 .Va WITHOUT_STAGING_MAN
 is set explicitly)
 .It Va WITH_STAGING_PROG
 (unless
 .Va WITHOUT_STAGING_PROG
 is set explicitly)
 .It Va WITH_SYSROOT
 (unless
 .Va WITHOUT_SYSROOT
 is set explicitly)
 .El
 .Pp
 This must be set in the environment, make command line, or
 .Pa /etc/src-env.conf ,
 not
 .Pa /etc/src.conf .
 .It Va WITH_DIRDEPS_CACHE
 Cache result of dirdeps.mk which can save significant time
 for subsequent builds.
 Depends on
 .Va WITH_DIRDEPS_BUILD .
 .Pp
 This must be set in the environment, make command line, or
 .Pa /etc/src-env.conf ,
 not
 .Pa /etc/src.conf .
 .It Va WITHOUT_DMAGENT
 Set to not build dma Mail Transport Agent.
 .It Va WITHOUT_DOCCOMPRESS
 Set to not install compressed system documentation.
 Only the uncompressed version will be installed.
 .It Va WITH_DTRACE_TESTS
 Set to build and install the DTrace test suite in
 .Pa /usr/tests/cddl/usr.sbin/dtrace .
 This test suite is considered experimental on architectures other than
 amd64/amd64 and running it may cause system instability.
 .It Va WITHOUT_DYNAMICROOT
 Set this if you do not want to link
 .Pa /bin
 and
 .Pa /sbin
 dynamically.
 .It Va WITHOUT_EE
 Set to not build and install
 .Xr edit 1 ,
 .Xr ee 1 ,
 and related programs.
 .It Va WITHOUT_EFI
 Set not to build
 .Xr efivar 3
 and
 .Xr efivar 8 .
 .Pp
 This is a default setting on
 mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_EFI
 Set to build
 .Xr efivar 3
 and
 .Xr efivar 8 .
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64 and i386/i386.
 .It Va WITHOUT_ELFTOOLCHAIN_BOOTSTRAP
 Set to not build ELF Tool Chain tools
 (addr2line, nm, size, strings and strip)
 as part of the bootstrap process.
 .Bf -symbolic
 An alternate bootstrap tool chain must be provided.
 .Ef
 .It Va WITHOUT_EXAMPLES
 Set to avoid installing examples to
 .Pa /usr/share/examples/ .
 .It Va WITH_EXPERIMENTAL
 Set to include experimental features in the build.
 .It Va WITH_EXTRA_TCP_STACKS
 Set to build extra TCP stack modules.
 .It Va WITHOUT_FDT
 Set to not build Flattened Device Tree support as part of the base system.
 This includes the device tree compiler (dtc) and libfdt support library.
 .It Va WITHOUT_FILE
 Set to not build
 .Xr file 1
 and related programs.
 .It Va WITHOUT_FINGER
 Set to not build or install
 .Xr finger 1
 and
 .Xr fingerd 8 .
 .It Va WITHOUT_FLOPPY
 Set to not build or install programs
 for operating floppy disk driver.
 .It Va WITHOUT_FMTREE
 Set to not build and install
 .Pa /usr/sbin/fmtree .
 .It Va WITHOUT_FORMAT_EXTENSIONS
 Set to not enable
 .Fl fformat-extensions
 when compiling the kernel.
 Also disables all format checking.
 .It Va WITHOUT_FORTH
 Set to build bootloaders without Forth support.
 .It Va WITHOUT_FP_LIBC
 Set to build
 .Nm libc
 without floating-point support.
 .It Va WITHOUT_FREEBSD_UPDATE
 Set to not build
 .Xr freebsd-update 8 .
 .It Va WITHOUT_FTP
 Set to not build or install
 .Xr ftp 1
 and
 .Xr ftpd 8 .
 .It Va WITHOUT_GAMES
 Set to not build games.
 .It Va WITHOUT_GCC
 Set to not build and install gcc and g++ as part of the normal build process.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386 and riscv/riscv64.
 .It Va WITH_GCC
 Set to build and install gcc and g++.
 .Pp
 This is a default setting on
 mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64.
 .It Va WITHOUT_GCC_BOOTSTRAP
 Set to not build gcc and g++ as part of the bootstrap process.
 You must enable either gcc or clang bootstrap to be able to build the system,
 unless an alternative compiler is provided via
 XCC.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386 and riscv/riscv64.
 .It Va WITH_GCC_BOOTSTRAP
 Set to build gcc and g++ as part of the bootstrap process.
 .Pp
 This is a default setting on
 mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64.
 .It Va WITHOUT_GCOV
 Set to not build the
 .Xr gcov 1
 tool.
 .It Va WITHOUT_GDB
 Set to not build
 .Xr gdb 1 .
 .Pp
 This is a default setting on
 arm64/aarch64 and riscv/riscv64.
 .It Va WITH_GDB
 Set to build
 .Xr gdb 1 .
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64.
 .It Va WITHOUT_GDB_LIBEXEC
 Set to install
 .Xr gdb 1
 into
 .Pa /usr/bin .
 .Pp
 This is a default setting on
 sparc64/sparc64.
 .It Va WITH_GDB_LIBEXEC
 Set to install
 .Xr gdb 1
 into
 .Pa /usr/libexec .
 This permits
 .Xr gdb 1
 to be used as a fallback for
 .Xr crashinfo 8
 if a newer version is not installed.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and riscv/riscv64.
 .It Va WITHOUT_GNUCXX
 Do not build the GNU C++ stack (g++, libstdc++).
 This is the default on platforms where clang is the system compiler.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64 and i386/i386.
 .It Va WITH_GNUCXX
 Build the GNU C++ stack (g++, libstdc++).
 This is the default on platforms where gcc is the system compiler.
 .Pp
 This is a default setting on
 mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64.
 .It Va WITHOUT_GNU_DIFF
 Set to not build GNU
 .Xr diff 1
 and
 .Xr diff3 1 .
 .It Va WITHOUT_GNU_GREP
 Set to not build GNU
 .Xr grep 1 .
 .It Va WITH_GNU_GREP_COMPAT
 Set this option to include GNU extensions in
 .Xr bsdgrep 1
 by linking against libgnuregex.
 .It Va WITHOUT_GOOGLETEST
 Set to neither build nor install
 .Lb libgmock ,
 .Lb libgtest ,
 and dependent tests.
+.Pp
+This is a default setting on
+mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf and mips/mips64hf.
+.It Va WITH_GOOGLETEST
+Set to build and install
+.Lb libgmock ,
+.Lb libgtest ,
+and dependent tests.
+.Pp
+This is a default setting on
+amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64.
 .It Va WITHOUT_GPIO
 Set to not build
 .Xr gpioctl 8
 as part of the base system.
 .It Va WITHOUT_GPL_DTC
 Set to build the BSD licensed version of the device tree compiler rather
 than the GPLed one from elinux.org.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64 and i386/i386.
 .It Va WITH_GPL_DTC
 Set to build the GPL'd version of the device tree compiler from elinux.org,
 instead of the BSD licensed one.
 .Pp
 This is a default setting on
 mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64.
 .It Va WITHOUT_GSSAPI
 Set to not build libgssapi.
 .It Va WITHOUT_HAST
 Set to not build
 .Xr hastd 8
 and related utilities.
 .It Va WITH_HESIOD
 Set to build Hesiod support.
 .It Va WITHOUT_HTML
 Set to not build HTML docs.
 .It Va WITHOUT_HYPERV
 Set to not build or install HyperV utilities.
 .Pp
 This is a default setting on
 arm/arm, arm/armv6, arm/armv7, arm64/aarch64, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_HYPERV
 Set to build or install HyperV utilities.
 .Pp
 This is a default setting on
 amd64/amd64 and i386/i386.
 .It Va WITHOUT_ICONV
 Set to not build iconv as part of libc.
 .It Va WITHOUT_INCLUDES
 Set to not install header files.
 This option used to be spelled
 .Va NO_INCS .
 .Bf -symbolic
 The option does not work for build targets.
 .Ef
 .It Va WITHOUT_INET
 Set to not build programs and libraries related to IPv4 networking.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_INET_SUPPORT
 .El
 .It Va WITHOUT_INET6
 Set to not build
 programs and libraries related to IPv6 networking.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_INET6_SUPPORT
 .El
 .It Va WITHOUT_INET6_SUPPORT
 Set to build libraries, programs, and kernel modules without IPv6 support.
 .It Va WITHOUT_INETD
 Set to not build
 .Xr inetd 8 .
 .It Va WITHOUT_INET_SUPPORT
 Set to build libraries, programs, and kernel modules without IPv4 support.
 .It Va WITHOUT_INSTALLLIB
 Set this to not install optional libraries.
 For example, when creating a
 .Xr nanobsd 8
 image.
 .Bf -symbolic
 The option does not work for build targets.
 .Ef
 .It Va WITH_INSTALL_AS_USER
 Set to make install targets succeed for non-root users by installing
 files with owner and group attributes set to that of the user running
 the
 .Xr make 1
 command.
 The user still must set the
 .Va DESTDIR
 variable to point to a directory where the user has write permissions.
 .It Va WITHOUT_IPFILTER
 Set to not build IP Filter package.
 .It Va WITHOUT_IPFW
 Set to not build IPFW tools.
 .It Va WITHOUT_IPSEC_SUPPORT
 Set to not build the kernel with
 .Xr ipsec 4
 support.
 This option is needed for
 .Xr ipsec 4
 and
 .Xr tcpmd5 4 .
 .It Va WITHOUT_ISCSI
 Set to not build
 .Xr iscsid 8
 and related utilities.
 .It Va WITHOUT_JAIL
 Set to not build tools for the support of jails; e.g.,
 .Xr jail 8 .
 .It Va WITHOUT_KDUMP
 Set to not build
 .Xr kdump 1
 and
 .Xr truss 1 .
 .It Va WITHOUT_KERBEROS
 Set this to not build Kerberos 5 (KTH Heimdal).
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_GSSAPI
 (unless
 .Va WITH_GSSAPI
 is set explicitly)
 .It Va WITHOUT_KERBEROS_SUPPORT
 (unless
 .Va WITH_KERBEROS_SUPPORT
 is set explicitly)
 .El
 .It Va WITHOUT_KERBEROS_SUPPORT
 Set to build some programs without Kerberos support, like
 .Xr ssh 1 ,
 .Xr telnet 1 ,
 .Xr sshd 8 ,
 and
 .Xr telnetd 8 .
 .It Va WITH_KERNEL_RETPOLINE
 Set to enable the "retpoline" mitigation for CVE-2017-5715 in the kernel
 build.
 .It Va WITHOUT_KERNEL_SYMBOLS
 Set to not install kernel symbol files.
 .Bf -symbolic
 This option is recommended for those people who have small root partitions.
 .Ef
 .It Va WITHOUT_KVM
 Set to not build the
 .Nm libkvm
 library as a part of the base system.
 .Bf -symbolic
 The option has no effect yet.
 .Ef
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_KVM_SUPPORT
 (unless
 .Va WITH_KVM_SUPPORT
 is set explicitly)
 .El
 .It Va WITHOUT_KVM_SUPPORT
 Set to build some programs without optional
 .Nm libkvm
 support.
 .It Va WITHOUT_LDNS
 Setting this variable will prevent the LDNS library from being built.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_LDNS_UTILS
 .It
 .Va WITHOUT_UNBOUND
 .El
 .It Va WITHOUT_LDNS_UTILS
 Setting this variable will prevent building the LDNS utilities
 .Xr drill 1
 and
 .Xr host 1 .
 .It Va WITHOUT_LEGACY_CONSOLE
 Set to not build programs that support a legacy PC console; e.g.,
 .Xr kbdcontrol 1
 and
 .Xr vidcontrol 1 .
 .It Va WITHOUT_LIB32
 On 64-bit platforms, set to not build 32-bit library set and a
 .Nm ld-elf32.so.1
 runtime linker.
 .It Va WITHOUT_LIBCPLUSPLUS
 Set to avoid building libcxxrt and libc++.
 .It Va WITHOUT_LIBPTHREAD
 Set to not build the
 .Nm libpthread
 providing library,
 .Nm libthr .
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_LIBTHR
 .El
 .It Va WITH_LIBSOFT
 On armv6 only, set to enable soft float ABI compatibility libraries.
 This option is for transitioning to the new hard float ABI.
 .It Va WITHOUT_LIBTHR
 Set to not build the
 .Nm libthr
 (1:1 threading)
 library.
 .It Va WITHOUT_LLD
 Set to not build LLVM's lld linker.
 .Pp
 This is a default setting on
 riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_LLD
 Set to build LLVM's lld linker.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe.
 .It Va WITHOUT_LLDB
 Set to not build the LLDB debugger.
 .Pp
 This is a default setting on
 arm/arm, arm/armv6, arm/armv7, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_LLDB
 Set to build the LLDB debugger.
 .Pp
 This is a default setting on
 amd64/amd64, arm64/aarch64 and i386/i386.
 .It Va WITHOUT_LLD_BOOTSTRAP
 Set to not build the LLD linker during the bootstrap phase of
 the build.
 To be able to build the system, either Binutils or LLD bootstrap must be
 enabled unless an alternate linker is provided via XLD.
 .Pp
 This is a default setting on
 arm/arm, arm/armv6, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_LLD_BOOTSTRAP
 Set to build the LLD linker during the bootstrap phase of the build,
 and use it during buildworld and buildkernel.
 .Pp
 This is a default setting on
 amd64/amd64, arm/armv7, arm64/aarch64 and i386/i386.
 .It Va WITHOUT_LLD_IS_LD
 Set to use GNU binutils ld as the system linker, instead of LLVM's LLD.
 .Pp
 This is a default setting on
 arm/arm, arm/armv6, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_LLD_IS_LD
 Set to use LLVM's LLD as the system linker, instead of GNU binutils ld.
 .Pp
 This is a default setting on
 amd64/amd64, arm/armv7, arm64/aarch64 and i386/i386.
 .It Va WITHOUT_LLVM_COV
 Set to not build the
 .Xr llvm-cov 1
 tool.
 .Pp
 This is a default setting on
 riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_LLVM_COV
 Set to build the
 .Xr llvm-cov 1
 tool.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe.
 .It Va WITHOUT_LLVM_LIBUNWIND
 Set to use GCC's stack unwinder (instead of LLVM's libunwind).
 .Pp
 This is a default setting on
 arm/arm, arm/armv6, arm/armv7, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64.
 .It Va WITH_LLVM_LIBUNWIND
 Set to use LLVM's libunwind stack unwinder (instead of GCC's unwinder).
 .Pp
 This is a default setting on
 amd64/amd64, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf and riscv/riscv64.
 .It Va WITHOUT_LLVM_TARGET_AARCH64
 Set to not build LLVM target support for AArch64.
 The
 .Va LLVM_TARGET_ALL
 option should be used rather than this in most cases.
 .Pp
 This is a default setting on
 arm/arm, arm/armv6, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_LLVM_TARGET_AARCH64
 Set to build LLVM target support for AArch64.
 The
 .Va LLVM_TARGET_ALL
 option should be used rather than this in most cases.
 .Pp
 This is a default setting on
 amd64/amd64, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe.
 .It Va WITHOUT_LLVM_TARGET_ALL
 Set to only build the required LLVM target support.
 This option is preferred to specific target support options.
 .Pp
 This is a default setting on
 riscv/riscv64 and sparc64/sparc64.
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_LLVM_TARGET_AARCH64
 (unless
 .Va WITH_LLVM_TARGET_AARCH64
 is set explicitly)
 .It Va WITHOUT_LLVM_TARGET_ARM
 (unless
 .Va WITH_LLVM_TARGET_ARM
 is set explicitly)
 .It Va WITHOUT_LLVM_TARGET_MIPS
 (unless
 .Va WITH_LLVM_TARGET_MIPS
 is set explicitly)
 .It Va WITHOUT_LLVM_TARGET_POWERPC
 (unless
 .Va WITH_LLVM_TARGET_POWERPC
 is set explicitly)
 .It Va WITHOUT_LLVM_TARGET_SPARC
 (unless
 .Va WITH_LLVM_TARGET_SPARC
 is set explicitly)
 .El
 .It Va WITH_LLVM_TARGET_ALL
 Set to build support for all LLVM targets.
 This option is always applied to the bootstrap compiler for buildworld when
 LLVM is used.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe.
 .It Va WITHOUT_LLVM_TARGET_ARM
 Set to not build LLVM target support for ARM.
 The
 .Va LLVM_TARGET_ALL
 option should be used rather than this in most cases.
 .Pp
 This is a default setting on
 riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_LLVM_TARGET_ARM
 Set to build LLVM target support for ARM.
 The
 .Va LLVM_TARGET_ALL
 option should be used rather than this in most cases.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe.
 .It Va WITH_LLVM_TARGET_BPF
 Set to build LLVM target support for BPF.
 The
 .Va LLVM_TARGET_ALL
 option should be used rather than this in most cases.
 .It Va WITHOUT_LLVM_TARGET_MIPS
 Set to not build LLVM target support for MIPS.
 The
 .Va LLVM_TARGET_ALL
 option should be used rather than this in most cases.
 .Pp
 This is a default setting on
 arm/arm, arm/armv6, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_LLVM_TARGET_MIPS
 Set to build LLVM target support for MIPS.
 The
 .Va LLVM_TARGET_ALL
 option should be used rather than this in most cases.
 .Pp
 This is a default setting on
 amd64/amd64, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe.
 .It Va WITHOUT_LLVM_TARGET_POWERPC
 Set to not build LLVM target support for PowerPC.
 The
 .Va LLVM_TARGET_ALL
 option should be used rather than this in most cases.
 .Pp
 This is a default setting on
 arm/arm, arm/armv6, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_LLVM_TARGET_POWERPC
 Set to build LLVM target support for PowerPC.
 The
 .Va LLVM_TARGET_ALL
 option should be used rather than this in most cases.
 .Pp
 This is a default setting on
 amd64/amd64, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe.
 .It Va WITH_LLVM_TARGET_RISCV
 Set to build LLVM target support for RISC-V.
 The
 .Va LLVM_TARGET_ALL
 option should be used rather than this in most cases.
 .It Va WITHOUT_LLVM_TARGET_SPARC
 Set to not build LLVM target support for SPARC.
 The
 .Va LLVM_TARGET_ALL
 option should be used rather than this in most cases.
 .Pp
 This is a default setting on
 arm/arm, arm/armv6, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_LLVM_TARGET_SPARC
 Set to build LLVM target support for SPARC.
 The
 .Va LLVM_TARGET_ALL
 option should be used rather than this in most cases.
 .Pp
 This is a default setting on
 amd64/amd64, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe.
 .It Va WITHOUT_LLVM_TARGET_X86
 Set to not build LLVM target support for X86.
 The
 .Va LLVM_TARGET_ALL
 option should be used rather than this in most cases.
 .Pp
 This is a default setting on
 arm/arm, arm/armv6, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_LLVM_TARGET_X86
 Set to build LLVM target support for X86.
 The
 .Va LLVM_TARGET_ALL
 option should be used rather than this in most cases.
 .Pp
 This is a default setting on
 amd64/amd64, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe.
 .It Va WITH_LOADER_EFI_SECUREBOOT
 Enable building
 .Xr loader 8
 with support for verification based on certificates obtained from UEFI.
 .Pp
 .It Va WITH_LOADER_FIREWIRE
 Enable firewire support in /boot/loader on x86. This option is a nop
 on all other platforms.
 .It Va WITH_LOADER_FORCE_LE
 Set to force the powerpc boot loader to launch the kernel in little
 endian mode.
 .It Va WITHOUT_LOADER_GELI
 Disable inclusion of GELI crypto support in the boot chain binaries.
 .Pp
 This is a default setting on
 powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64.
 .It Va WITH_LOADER_GELI
 Set to build GELI bootloader support.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf and riscv/riscv64.
 .It Va WITHOUT_LOADER_LUA
 Set to not build LUA bindings for the boot loader.
 .Pp
 This is a default setting on
 powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64.
 .It Va WITH_LOADER_LUA
 Set to build LUA bindings for the boot loader.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf and riscv/riscv64.
 .It Va WITHOUT_LOADER_OFW
 Disable building of openfirmware bootloader components.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf and riscv/riscv64.
 .It Va WITH_LOADER_OFW
 Set to build openfirmware bootloader components.
 .Pp
 This is a default setting on
 powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64.
 .It Va WITHOUT_LOADER_UBOOT
 Disable building of ubldr.
 .Pp
 This is a default setting on
 amd64/amd64, arm64/aarch64, i386/i386, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_LOADER_UBOOT
 Set to build ubldr.
 .Pp
 This is a default setting on
 arm/arm, arm/armv6, arm/armv7, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe.
 .It Va WITH_LOADER_VERBOSE
 Set to build with extra verbose debugging in the loader.
 May explode already nearly too large loader over the limit.
 Use with care.
 
 .It Va WITH_LOADER_VERIEXEC
 Enable building
 .Xr loader 8
 with support for verifcation similar to Verified Exec.
 .Pp
 It depends on
 .Va WITH_BEARSSL
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITH_LOADER_EFI_SECUREBOOT
 (unless
 .Va WITHOUT_LOADER_EFI_SECUREBOOT
 is set explicitly)
 .El
 .It Va WITHOUT_LOADER_ZFS
 Set to not build ZFS file system boot loader support.
 .It Va WITHOUT_LOCALES
 Set to not build localization files; see
 .Xr locale 1 .
 .It Va WITHOUT_LOCATE
 Set to not build
 .Xr locate 1
 and related programs.
 .It Va WITHOUT_LPR
 Set to not build
 .Xr lpr 1
 and related programs.
 .It Va WITHOUT_LS_COLORS
 Set to build
 .Xr ls 1
 without support for colors to distinguish file types.
 .It Va WITHOUT_LZMA_SUPPORT
 Set to build some programs without optional lzma compression support.
 .It Va WITHOUT_MAIL
 Set to not build any mail support (MUA or MTA).
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_DMAGENT
 .It
 .Va WITHOUT_MAILWRAPPER
 .It
 .Va WITHOUT_SENDMAIL
 .El
 .It Va WITHOUT_MAILWRAPPER
 Set to not build the
 .Xr mailwrapper 8
 MTA selector.
 .It Va WITHOUT_MAKE
 Set to not install
 .Xr make 1
 and related support files.
 .It Va WITHOUT_MAKE_CHECK_USE_SANDBOX
 Set to not execute
 .Dq Li "make check"
 in limited sandbox mode.
 This option should be paired with
 .Va WITH_INSTALL_AS_USER
 if executed as an unprivileged user.
 See
 .Xr tests 7
 for more details.
 .It Va WITHOUT_MAN
 Set to not build manual pages.
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_MAN_UTILS
 (unless
 .Va WITH_MAN_UTILS
 is set explicitly)
 .El
 .It Va WITHOUT_MANCOMPRESS
 Set to not to install compressed man pages.
 Only the uncompressed versions will be installed.
 .It Va WITHOUT_MAN_UTILS
 Set to not build utilities for manual pages,
 .Xr apropos 1 ,
 .Xr makewhatis 1 ,
 .Xr man 1 ,
 .Xr whatis 1 ,
 .Xr manctl 8 ,
 and related support files.
 .It Va WITH_META_MODE
 Create
 .Xr make 1
 meta files when building, which can provide a reliable incremental build when
 using
 .Xr filemon 4 .
 The meta file is created in OBJDIR as
 .Pa target.meta .
 These meta files track the command that was executed, its output, and the
 current directory.
 The
 .Xr filemon 4
 module is required unless
 .Va NO_FILEMON
 is defined.
 When the module is loaded, any files used by the commands executed are
 tracked as dependencies for the target in its meta file.
 The target is considered out-of-date and rebuilt if any of these
 conditions are true compared to the last build:
 .Bl -bullet -compact
 .It
 The command to execute changes.
 .It
 The current working directory changes.
 .It
 The target's meta file is missing.
 .It
 The target's meta file is missing filemon data when filemon is loaded
 and a previous run did not have it loaded.
 .It
 [requires
 .Xr filemon 4 ]
 Files read, executed or linked to are newer than the target.
 .It
 [requires
 .Xr filemon 4 ]
 Files read, written, executed or linked are missing.
 .El
 The meta files can also be useful for debugging.
 .Pp
 The build hides commands that are executed unless
 .Va NO_SILENT
 is defined.
 Errors cause
 .Xr make 1
 to show some of its environment for further debugging.
 .Pp
 The build operates as it normally would otherwise.
 This option originally invoked a different build system but that was renamed
 to
 .Va WITH_DIRDEPS_BUILD .
 .Pp
 This must be set in the environment, make command line, or
 .Pa /etc/src-env.conf ,
 not
 .Pa /etc/src.conf .
 .It Va WITHOUT_MLX5TOOL
 Set to not build
 .Xr mlx5tool 8
 .Pp
 This is a default setting on
 arm/arm, arm/armv6, arm/armv7, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpcspe and riscv/riscv64.
 .It Va WITH_MLX5TOOL
 Set to build
 .Xr mlx5tool 8
 .Pp
 This is a default setting on
 amd64/amd64, arm64/aarch64, i386/i386, powerpc/powerpc64 and sparc64/sparc64.
 .It Va WITHOUT_NDIS
 Set to not build programs and libraries
 related to NDIS emulation support.
 .It Va WITHOUT_NETCAT
 Set to not build
 .Xr nc 1
 utility.
 .It Va WITHOUT_NETGRAPH
 Set to not build applications to support
 .Xr netgraph 4 .
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_ATM
 .It
 .Va WITHOUT_BLUETOOTH
 .El
 .Pp
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_NETGRAPH_SUPPORT
 (unless
 .Va WITH_NETGRAPH_SUPPORT
 is set explicitly)
 .El
 .It Va WITHOUT_NETGRAPH_SUPPORT
 Set to build libraries, programs, and kernel modules without netgraph support.
 .It Va WITHOUT_NIS
 Set to not build
 .Xr NIS 8
 support and related programs.
 If set, you might need to adopt your
 .Xr nsswitch.conf 5
 and remove
 .Sq nis
 entries.
 .It Va WITHOUT_NLS
 Set to not build NLS catalogs.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_NLS_CATALOGS
 .El
 .It Va WITHOUT_NLS_CATALOGS
 Set to not build NLS catalog support for
 .Xr csh 1 .
 .It Va WITHOUT_NS_CACHING
 Set to disable name caching in the
 .Pa nsswitch
 subsystem.
 The generic caching daemon,
 .Xr nscd 8 ,
 will not be built either if this option is set.
 .It Va WITHOUT_NTP
 Set to not build
 .Xr ntpd 8
 and related programs.
 .It Va WITHOUT_NVME
 Set to not build nvme related tools and kernel modules.
 .Pp
 This is a default setting on
 arm/arm, arm/armv6, arm/armv7, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_NVME
 Set to build nvme related tools and kernel modules.
 
 .Pp
 This is a default setting on
 amd64/amd64, arm64/aarch64, i386/i386 and powerpc/powerpc64.
 .It Va WITH_OFED
 Set to build the
 .Dq "OpenFabrics Enterprise Distribution"
 Infiniband software stack.
 .It Va WITH_OFED_EXTRA
 Set to build the non-essential components of the
 .Dq "OpenFabrics Enterprise Distribution"
 Infiniband software stack, mostly examples.
 .It Va WITH_OPENLDAP
 Enable building openldap support for kerberos.
 .It Va WITHOUT_OPENMP
 Set to not build LLVM's OpenMP runtime.
 .Pp
 This is a default setting on
 arm/arm, arm/armv6, arm/armv7, arm64/aarch64, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_OPENMP
 Set to build LLVM's OpenMP runtime.
 .Pp
 This is a default setting on
 amd64/amd64 and i386/i386.
 .It Va WITHOUT_OPENSSH
 Set to not build OpenSSH.
 .It Va WITHOUT_OPENSSL
 Set to not build OpenSSL.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_KERBEROS
 .It
 .Va WITHOUT_OPENSSH
 .El
 .Pp
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_GSSAPI
 (unless
 .Va WITH_GSSAPI
 is set explicitly)
 .El
 .It Va WITHOUT_PAM
 Set to not build PAM library and modules.
 .Bf -symbolic
 This option is deprecated and does nothing.
 .Ef
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_PAM_SUPPORT
 (unless
 .Va WITH_PAM_SUPPORT
 is set explicitly)
 .El
 .It Va WITHOUT_PAM_SUPPORT
 Set to build some programs without PAM support, particularly
 .Xr ftpd 8
 and
 .Xr ppp 8 .
 .It Va WITHOUT_PF
 Set to not build PF firewall package.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_AUTHPF
 .El
 .It Va WITH_PIE
 Build dynamically linked binaries as
 Position-Independent Executable (PIE).
 .It Va WITHOUT_PKGBOOTSTRAP
 Set to not build
 .Xr pkg 7
 bootstrap tool.
 .It Va WITHOUT_PMC
 Set to not build
 .Xr pmccontrol 8
 and related programs.
 .It Va WITHOUT_PORTSNAP
 Set to not build or install
 .Xr portsnap 8
 and related files.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_FREEBSD_UPDATE
 .El
 .It Va WITHOUT_PPP
 Set to not build
 .Xr ppp 8
 and related programs.
 .It Va WITHOUT_PROFILE
 Set to not build profiled libraries for use with
 .Xr gprof 8 .
 .Pp
 This is a default setting on
 mips/mips64el, mips/mips64, mips/mips64elhf and mips/mips64hf.
 .It Va WITH_PROFILE
 Set to build profiled libraries for use with
 .Xr gprof 8 .
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mipsn32, mips/mipselhf, mips/mipshf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64.
 .It Va WITHOUT_QUOTAS
 Set to not build
 .Xr quota 1
 and related programs.
 .It Va WITHOUT_RADIUS_SUPPORT
 Set to not build radius support into various applications, like
 .Xr pam_radius 8
 and
 .Xr ppp 8 .
 .It Va WITH_RATELIMIT
 Set to build the system with rate limit support.
 .Pp
 This makes
 .Dv SO_MAX_PACING_RATE
 effective in
 .Xr getsockopt 2 ,
 and
 .Ar txrlimit
 support in
 .Xr ifconfig 8 ,
 by proxy.
 .It Va WITHOUT_RBOOTD
 Set to not build or install
 .Xr rbootd 8 .
 .It Va WITH_REPRODUCIBLE_BUILD
 Set to exclude build metadata (such as the build time, user, or host)
 from the kernel, boot loaders, and uname output, so that builds produce
 bit-for-bit identical output.
 .It Va WITHOUT_RESCUE
 Set to not build
 .Xr rescue 8 .
 .It Va WITH_RETPOLINE
 Set to build the base system with the retpoline speculative execution
 vulnerability mitigation for CVE-2017-5715.
 .It Va WITHOUT_ROUTED
 Set to not build
 .Xr routed 8
 utility.
 .It Va WITH_RPCBIND_WARMSTART_SUPPORT
 Set to build
 .Xr rpcbind 8
 with warmstart support.
 .It Va WITHOUT_SENDMAIL
 Set to not build
 .Xr sendmail 8
 and related programs.
 .It Va WITHOUT_SERVICESDB
 Set to not install
 .Pa /var/db/services.db .
 .It Va WITHOUT_SETUID_LOGIN
 Set this to disable the installation of
 .Xr login 1
 as a set-user-ID root program.
 .It Va WITHOUT_SHAREDOCS
 Set to not build the
 .Bx 4.4
 legacy docs.
 .It Va WITH_SHARED_TOOLCHAIN
 Set to build the toolchain binaries shared.
 The set includes
 .Xr cc 1 ,
 .Xr make 1
 and necessary utilities like assembler, linker and library archive manager.
 .It Va WITH_SORT_THREADS
 Set to enable threads in
 .Xr sort 1 .
 .It Va WITHOUT_SOURCELESS
 Set to not build kernel modules that include sourceless code (either microcode or native code for host CPU).
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_SOURCELESS_HOST
 .It
 .Va WITHOUT_SOURCELESS_UCODE
 .El
 .It Va WITHOUT_SOURCELESS_HOST
 Set to not build kernel modules that include sourceless native code for host CPU.
 .It Va WITHOUT_SOURCELESS_UCODE
 Set to not build kernel modules that include sourceless microcode.
 .It Va WITHOUT_SSP
 Set to not build world with propolice stack smashing protection.
 .Pp
 This is a default setting on
 mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf and mips/mips64hf.
 .It Va WITH_SSP
 Set to build world with propolice stack smashing protection.
 .Pp
 This is a default setting on
 amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64.
 .It Va WITH_STAGING
 Enable staging of files to a stage tree.
 This can be best thought of as auto-install to
 .Va DESTDIR
 with some extra meta data to ensure dependencies can be tracked.
 Depends on
 .Va WITH_DIRDEPS_BUILD .
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITH_STAGING_MAN
 (unless
 .Va WITHOUT_STAGING_MAN
 is set explicitly)
 .It Va WITH_STAGING_PROG
 (unless
 .Va WITHOUT_STAGING_PROG
 is set explicitly)
 .El
 .Pp
 This must be set in the environment, make command line, or
 .Pa /etc/src-env.conf ,
 not
 .Pa /etc/src.conf .
 .It Va WITH_STAGING_MAN
 Enable staging of man pages to stage tree.
 .It Va WITH_STAGING_PROG
 Enable staging of PROGs to stage tree.
 .It Va WITH_STALE_STAGED
 Check staged files are not stale.
 .It Va WITH_SVN
 Set to install
 .Xr svnlite 1
 as
 .Xr svn 1 .
 .It Va WITHOUT_SVNLITE
 Set to not build
 .Xr svnlite 1
 and related programs.
 .It Va WITHOUT_SYMVER
 Set to disable symbol versioning when building shared libraries.
 .It Va WITHOUT_SYSCONS
 Set to not build
 .Xr syscons 4
 support files such as keyboard maps, fonts, and screen output maps.
 .It Va WITH_SYSROOT
 Enable use of sysroot during build.
 Depends on
 .Va WITH_DIRDEPS_BUILD .
 .Pp
 This must be set in the environment, make command line, or
 .Pa /etc/src-env.conf ,
 not
 .Pa /etc/src.conf .
 .It Va WITHOUT_SYSTEM_COMPILER
 Set to not opportunistically skip building a cross-compiler during the
 bootstrap phase of the build.
 Normally, if the currently installed compiler matches the planned bootstrap
 compiler type and revision, then it will not be built.
 This does not prevent a compiler from being built for installation though,
 only for building one for the build itself.
 The
 .Va WITHOUT_CLANG
 and
 .Va WITHOUT_GCC
 options control those.
 .It Va WITHOUT_SYSTEM_LINKER
 Set to not opportunistically skip building a cross-linker during the
 bootstrap phase of the build.
 Normally, if the currently installed linker matches the planned bootstrap
 linker type and revision, then it will not be built.
 This does not prevent a linker from being built for installation though,
 only for building one for the build itself.
 The
 .Va WITHOUT_LLD
 and
 .Va WITHOUT_BINUTILS
 options control those.
 .Pp
 This option is only relevant when
 .Va WITH_LLD_BOOTSTRAP
 is set.
 .It Va WITHOUT_TALK
 Set to not build or install
 .Xr talk 1
 and
 .Xr talkd 8 .
 .It Va WITHOUT_TCP_WRAPPERS
 Set to not build or install
 .Xr tcpd 8 ,
 and related utilities.
 .It Va WITHOUT_TCSH
 Set to not build and install
 .Pa /bin/csh
 (which is
 .Xr tcsh 1 ) .
 .It Va WITHOUT_TELNET
 Set to not build
 .Xr telnet 1
 and related programs.
 .It Va WITHOUT_TESTS
 Set to not build nor install the
 .Fx
 Test Suite in
 .Pa /usr/tests/ .
 See
 .Xr tests 7
 for more details.
 This also disables the build of all test-related dependencies, including ATF.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_DTRACE_TESTS
 .El
 .Pp
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_GOOGLETEST
 (unless
 .Va WITH_GOOGLETEST
 is set explicitly)
 .It Va WITHOUT_TESTS_SUPPORT
 (unless
 .Va WITH_TESTS_SUPPORT
 is set explicitly)
 .El
 .It Va WITHOUT_TESTS_SUPPORT
 Set to disables the build of all test-related dependencies, including ATF.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_GOOGLETEST
 .El
 .It Va WITHOUT_TEXTPROC
 Set to not build
 programs used for text processing.
 .It Va WITHOUT_TFTP
 Set to not build or install
 .Xr tftp 1
 and
 .Xr tftpd 8 .
 .It Va WITHOUT_TOOLCHAIN
 Set to not install header or
 programs used for program development,
 compilers, debuggers etc.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_BINUTILS
 .It
 .Va WITHOUT_CLANG
 .It
 .Va WITHOUT_CLANG_EXTRAS
 .It
 .Va WITHOUT_CLANG_FULL
 .It
 .Va WITHOUT_GCC
 .It
 .Va WITHOUT_GDB
 .It
 .Va WITHOUT_INCLUDES
 .It
 .Va WITHOUT_LLD
 .It
 .Va WITHOUT_LLDB
 .It
 .Va WITHOUT_LLVM_COV
 .El
 .It Va WITHOUT_UNBOUND
 Set to not build
 .Xr unbound 8
 and related programs.
 .It Va WITHOUT_UNIFIED_OBJDIR
 Set to use the historical object directory format for
 .Xr build 7
 targets.
 For native-builds and builds done directly in sub-directories the format of
 .Pa ${MAKEOBJDIRPREFIX}/${.CURDIR}
 is used,
 while for cross-builds
 .Pa ${MAKEOBJDIRPREFIX}/${TARGET}.${TARGET_ARCH}/${.CURDIR}
 is used.
 .Pp
 This option is transitional and will be removed before the 12.0 release,
 at which time
 .va WITH_UNIFIED_OBJDIR
 will be enabled permanently.
 .Pp
 This must be set in the environment, make command line, or
 .Pa /etc/src-env.conf ,
 not
 .Pa /etc/src.conf .
 .It Va WITHOUT_USB
 Set to not build USB-related programs and libraries.
 .It Va WITHOUT_USB_GADGET_EXAMPLES
 Set to not build USB gadget kernel modules.
 .It Va WITHOUT_UTMPX
 Set to not build user accounting tools such as
 .Xr last 1 ,
 .Xr users 1 ,
 .Xr who 1 ,
 .Xr ac 8 ,
 .Xr lastlogin 8
 and
 .Xr utx 8 .
 .It Va WITH_VERIEXEC
 Enable building
 .Xr veriexec 8
 which loads the contents of verified manifests into the kernel
 for use by
 .Xr mac_veriexec 4
 .Pp
 It depends on
 .Va WITH_BEARSSL
 .It Va WITHOUT_VI
 Set to not build and install vi, view, ex and related programs.
 .It Va WITHOUT_VT
 Set to not build
 .Xr vt 4
 support files (fonts and keymaps).
 .It Va WITHOUT_WARNS
 Set this to not add warning flags to the compiler invocations.
 Useful as a temporary workaround when code enters the tree
 which triggers warnings in environments that differ from the
 original developer.
 .It Va WITHOUT_WIRELESS
 Set to not build programs used for 802.11 wireless networks; especially
 .Xr wpa_supplicant 8
 and
 .Xr hostapd 8 .
 When set, these options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_WIRELESS_SUPPORT
 (unless
 .Va WITH_WIRELESS_SUPPORT
 is set explicitly)
 .El
 .It Va WITHOUT_WIRELESS_SUPPORT
 Set to build libraries, programs, and kernel modules without
 802.11 wireless support.
 .It Va WITHOUT_WPA_SUPPLICANT_EAPOL
 Build
 .Xr wpa_supplicant 8
 without support for the IEEE 802.1X protocol and without
 support for EAP-PEAP, EAP-TLS, EAP-LEAP, and EAP-TTLS
 protocols (usable only via 802.1X).
 .It Va WITHOUT_ZFS
 Set to not build ZFS file system kernel module, libraries, and user commands.
 .It Va WITHOUT_ZONEINFO
 Set to not build the timezone database.
 When set, it enforces these options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_ZONEINFO_LEAPSECONDS_SUPPORT
 .It
 .Va WITHOUT_ZONEINFO_OLD_TIMEZONES_SUPPORT
 .El
 .It Va WITH_ZONEINFO_LEAPSECONDS_SUPPORT
 Set to build leapsecond information in to the timezone database.
 .It Va WITH_ZONEINFO_OLD_TIMEZONES_SUPPORT
 Set to build backward compatibility timezone aliases in to the timezone
 database.
 .El
 .Sh FILES
 .Bl -tag -compact -width Pa
 .It Pa /etc/src.conf
 .It Pa /etc/src-env.conf
 .It Pa /usr/share/mk/bsd.own.mk
 .El
 .Sh SEE ALSO
 .Xr make 1 ,
 .Xr make.conf 5 ,
 .Xr build 7 ,
 .Xr ports 7
 .Sh HISTORY
 The
 .Nm
 file appeared in
 .Fx 7.0 .
 .Sh AUTHORS
 This manual page was autogenerated by
 .An tools/build/options/makeman .
Index: projects/clang900-import/share/mk/src.libnames.mk
===================================================================
--- projects/clang900-import/share/mk/src.libnames.mk	(revision 352536)
+++ projects/clang900-import/share/mk/src.libnames.mk	(revision 352537)
@@ -1,655 +1,655 @@
 # $FreeBSD$
 #
 # The include file <src.libnames.mk> define library names suitable
 # for INTERNALLIB and PRIVATELIB definition
 
 .if !target(__<bsd.init.mk>__)
 .error src.libnames.mk cannot be included directly.
 .endif
 
 .if !target(__<src.libnames.mk>__)
 __<src.libnames.mk>__:
 
 .include <src.opts.mk>
 
 _PRIVATELIBS=	\
 		atf_c \
 		atf_cxx \
 		bsdstat \
 		devdctl \
 		event \
 		gmock \
 		gtest \
 		gmock_main \
 		gtest_main \
 		heimipcc \
 		heimipcs \
 		ldns \
 		sqlite3 \
 		ssh \
 		ucl \
 		unbound \
 		zstd
 
 _INTERNALLIBS=	\
 		amu \
 		bsnmptools \
 		c_nossp_pic \
 		cron \
 		elftc \
 		fifolog \
 		ifconfig \
 		ipf \
 		lpr \
 		netbsd \
 		ntp \
 		ntpevent \
 		openbsd \
 		opts \
 		parse \
 		pe \
 		pmcstat \
 		sl \
 		sm \
 		smdb \
 		smutil \
 		telnet \
 		vers
 
 _LIBRARIES=	\
 		${_PRIVATELIBS} \
 		${_INTERNALLIBS} \
 		${LOCAL_LIBRARIES} \
 		80211 \
 		alias \
 		archive \
 		asn1 \
 		auditd \
 		avl \
 		be \
 		begemot \
 		bluetooth \
 		bsdxml \
 		bsm \
 		bsnmp \
 		bz2 \
 		c \
 		c_pic \
 		calendar \
 		cam \
 		casper \
 		cap_dns \
 		cap_fileargs \
 		cap_grp \
 		cap_pwd \
 		cap_sysctl \
 		cap_syslog \
 		com_err \
 		compiler_rt \
 		crypt \
 		crypto \
 		ctf \
 		cuse \
 		cxxrt \
 		devctl \
 		devdctl \
 		devinfo \
 		devstat \
 		dialog \
 		dl \
 		dpv \
 		dtrace \
 		dwarf \
 		edit \
 		efivar \
 		elf \
 		execinfo \
 		fetch \
 		figpar \
 		geom \
 		gnuregex \
 		gpio \
 		gssapi \
 		gssapi_krb5 \
 		hdb \
 		heimbase \
 		heimntlm \
 		heimsqlite \
 		hx509 \
 		ipsec \
 		ipt \
 		jail \
 		kadm5clnt \
 		kadm5srv \
 		kafs5 \
 		kdc \
 		kiconv \
 		krb5 \
 		kvm \
 		l \
 		lzma \
 		m \
 		magic \
 		md \
 		memstat \
 		mp \
 		mt \
 		ncurses \
 		ncursesw \
 		netgraph \
 		ngatm \
 		nv \
 		nvpair \
 		opencsd \
 		opie \
 		pam \
 		panel \
 		panelw \
 		pcap \
 		pcsclite \
 		pjdlog \
 		pmc \
 		proc \
 		procstat \
 		pthread \
 		radius \
 		regex \
 		roken \
 		rpcsec_gss \
 		rpcsvc \
 		rt \
 		rtld_db \
 		sbuf \
 		sdp \
 		sm \
 		smb \
 		ssl \
 		ssp_nonshared \
 		stdthreads \
 		supcplusplus \
 		sysdecode \
 		tacplus \
 		termcap \
 		termcapw \
 		ufs \
 		ugidfw \
 		ulog \
 		umem \
 		usb \
 		usbhid \
 		util \
 		uutil \
 		vmmapi \
 		wind \
 		wrap \
 		xo \
 		y \
 		ypclnt \
 		z \
 		zfs_core \
 		zfs \
 		zpool \
 
 .if ${MK_BLACKLIST} != "no"
 _LIBRARIES+= \
 		blacklist \
 
 .endif
 
 .if ${MK_OFED} != "no"
 _LIBRARIES+= \
 		cxgb4 \
 		ibcm \
 		ibmad \
 		ibnetdisc \
 		ibumad \
 		ibverbs \
 		mlx4 \
 		mlx5 \
 		rdmacm \
 		osmcomp \
 		opensm \
 		osmvendor
 .endif
 
 .if ${MK_BEARSSL} == "yes"
 _INTERNALLIBS+= \
 		bearssl \
 		secureboot \
 
 LIBBEARSSL?=	${LIBBEARSSLDIR}/libbearssl${PIE_SUFFIX}.a
 LIBSECUREBOOT?=	${LIBSECUREBOOTDIR}/libsecureboot${PIE_SUFFIX}.a
 .endif
 
 .if ${MK_VERIEXEC} == "yes"
 _INTERNALLIBS+= veriexec
 
 LIBVERIEXEC?=	${LIBVERIEXECDIR}/libveriexec${PIE_SUFFIX}.a
 .endif
 
 # Each library's LIBADD needs to be duplicated here for static linkage of
 # 2nd+ order consumers.  Auto-generating this would be better.
 _DP_80211=	sbuf bsdxml
-_DP_archive=	z bz2 lzma bsdxml
+_DP_archive=	z bz2 lzma bsdxml zstd
 _DP_zstd=	pthread
 .if ${MK_BLACKLIST} != "no"
 _DP_blacklist+=	pthread
 .endif
 _DP_crypto=	pthread
 .if ${MK_OPENSSL} != "no"
 _DP_archive+=	crypto
 .else
 _DP_archive+=	md
 .endif
 _DP_sqlite3=	pthread
 _DP_ssl=	crypto
 _DP_ssh=	crypto crypt z
 .if ${MK_LDNS} != "no"
 _DP_ssh+=	ldns
 .endif
 _DP_edit=	ncursesw
 .if ${MK_OPENSSL} != "no"
 _DP_bsnmp=	crypto
 .endif
 _DP_geom=	bsdxml sbuf
 _DP_cam=	sbuf
 _DP_kvm=	elf
 _DP_casper=	nv
 _DP_cap_dns=	nv
 _DP_cap_fileargs=	nv
 _DP_cap_grp=	nv
 _DP_cap_pwd=	nv
 _DP_cap_sysctl=	nv
 _DP_cap_syslog=	nv
 .if ${MK_OFED} != "no"
 _DP_pcap=	ibverbs mlx5
 .endif
 _DP_pjdlog=	util
 _DP_opie=	md
 _DP_usb=	pthread
 _DP_unbound=	ssl crypto pthread
 _DP_rt=	pthread
 .if ${MK_OPENSSL} == "no"
 _DP_radius=	md
 .else
 _DP_radius=	crypto
 .endif
 _DP_rtld_db=	elf procstat
 _DP_procstat=	kvm util elf
 .if ${MK_CXX} == "yes"
 .if ${MK_LIBCPLUSPLUS} != "no"
 _DP_proc=	cxxrt
 .else
 _DP_proc=	supcplusplus
 .endif
 .endif
 .if ${MK_CDDL} != "no"
 _DP_proc+=	ctf
 .endif
 _DP_proc+=	elf procstat rtld_db util
 _DP_mp=	crypto
 _DP_memstat=	kvm
 _DP_magic=	z
 _DP_mt=		sbuf bsdxml
 _DP_ldns=	ssl crypto
 .if ${MK_OPENSSL} != "no"
 _DP_fetch=	ssl crypto
 .else
 _DP_fetch=	md
 .endif
 _DP_execinfo=	elf
 _DP_dwarf=	elf
 _DP_dpv=	dialog figpar util ncursesw
 _DP_dialog=	ncursesw m
 _DP_cuse=	pthread
 _DP_atf_cxx=	atf_c
 _DP_gtest=	pthread
 _DP_gmock=	gtest
 _DP_gmock_main=	gmock
 _DP_gtest_main=	gtest
 _DP_devstat=	kvm
 _DP_pam=	radius tacplus opie md util
 .if ${MK_KERBEROS} != "no"
 _DP_pam+=	krb5
 .endif
 .if ${MK_OPENSSH} != "no"
 _DP_pam+=	ssh
 .endif
 .if ${MK_NIS} != "no"
 _DP_pam+=	ypclnt
 .endif
 _DP_roken=	crypt
 _DP_kadm5clnt=	com_err krb5 roken
 _DP_kadm5srv=	com_err hdb krb5 roken
 _DP_heimntlm=	crypto com_err krb5 roken
 _DP_hx509=	asn1 com_err crypto roken wind
 _DP_hdb=	asn1 com_err krb5 roken sqlite3
 _DP_asn1=	com_err roken
 _DP_kdc=	roken hdb hx509 krb5 heimntlm asn1 crypto
 _DP_wind=	com_err roken
 _DP_heimbase=	pthread
 _DP_heimipcc=	heimbase roken pthread
 _DP_heimipcs=	heimbase roken pthread
 _DP_kafs5=	asn1 krb5 roken
 _DP_krb5+=	asn1 com_err crypt crypto hx509 roken wind heimbase heimipcc
 _DP_gssapi_krb5+=	gssapi krb5 crypto roken asn1 com_err
 _DP_lzma=	pthread
 _DP_ucl=	m
 _DP_vmmapi=	util
 _DP_opencsd=	cxxrt
 _DP_ctf=	z
 _DP_dtrace=	ctf elf proc pthread rtld_db
 _DP_xo=		util
 # The libc dependencies are not strictly needed but are defined to make the
 # assert happy.
 _DP_c=		compiler_rt
 .if ${MK_SSP} != "no"
 _DP_c+=		ssp_nonshared
 .endif
 _DP_stdthreads=	pthread
 _DP_tacplus=	md
 _DP_panel=	ncurses
 _DP_panelw=	ncursesw
 _DP_rpcsec_gss=	gssapi
 _DP_smb=	kiconv
 _DP_ulog=	md
 _DP_fifolog=	z
 _DP_ipf=	kvm
 _DP_zfs=	md pthread umem util uutil m nvpair avl bsdxml geom nvpair z \
 		zfs_core
 _DP_zfs_core=	nvpair
 _DP_zpool=	md pthread z nvpair avl umem
 _DP_be=		zfs nvpair
 
 # OFED support
 .if ${MK_OFED} != "no"
 _DP_cxgb4=	ibverbs pthread
 _DP_ibcm=	ibverbs
 _DP_ibmad=	ibumad
 _DP_ibnetdisc=	osmcomp ibmad ibumad
 _DP_ibumad=	
 _DP_ibverbs=
 _DP_mlx4=	ibverbs pthread
 _DP_mlx5=	ibverbs pthread
 _DP_rdmacm=	ibverbs
 _DP_osmcomp=	pthread
 _DP_opensm=	pthread
 _DP_osmvendor=	ibumad pthread
 .endif
 
 # Define special cases
 LDADD_supcplusplus=	-lsupc++
 LIBATF_C=	${LIBDESTDIR}${LIBDIR_BASE}/libprivateatf-c.a
 LIBATF_CXX=	${LIBDESTDIR}${LIBDIR_BASE}/libprivateatf-c++.a
 LDADD_atf_c=	-lprivateatf-c
 LDADD_atf_cxx=	-lprivateatf-c++
 
 LIBGMOCK=	${LIBDESTDIR}${LIBDIR_BASE}/libprivategmock.a
 LIBGMOCK_MAIN=	${LIBDESTDIR}${LIBDIR_BASE}/libprivategmock_main.a
 LIBGTEST=	${LIBDESTDIR}${LIBDIR_BASE}/libprivategtest.a
 LIBGTEST_MAIN=	${LIBDESTDIR}${LIBDIR_BASE}/libprivategtest_main.a
 LDADD_gmock=	-lprivategmock
 LDADD_gtest=	-lprivategtest
 LDADD_gmock_main= -lprivategmock_main
 LDADD_gtest_main= -lprivategtest_main
 
 .for _l in ${_PRIVATELIBS}
 LIB${_l:tu}?=	${LIBDESTDIR}${LIBDIR_BASE}/libprivate${_l}.a
 .endfor
 
 .if ${MK_PIE} != "no"
 PIE_SUFFIX=	_pie
 .endif
 
 .for _l in ${_LIBRARIES}
 .if ${_INTERNALLIBS:M${_l}} || !defined(SYSROOT)
 LDADD_${_l}_L+=		-L${LIB${_l:tu}DIR}
 .endif
 DPADD_${_l}?=	${LIB${_l:tu}}
 .if ${_PRIVATELIBS:M${_l}}
 LDADD_${_l}?=	-lprivate${_l}
 .elif ${_INTERNALLIBS:M${_l}}
 LDADD_${_l}?=	${LDADD_${_l}_L} -l${_l:S/${PIE_SUFFIX}//}${PIE_SUFFIX}
 .else
 LDADD_${_l}?=	${LDADD_${_l}_L} -l${_l}
 .endif
 # Add in all dependencies for static linkage.
 .if defined(_DP_${_l}) && (${_INTERNALLIBS:M${_l}} || \
     (defined(NO_SHARED) && ${NO_SHARED:tl} != "no"))
 .for _d in ${_DP_${_l}}
 DPADD_${_l}+=	${DPADD_${_d}}
 LDADD_${_l}+=	${LDADD_${_d}}
 .endfor
 .endif
 .endfor
 
 # These are special cases where the library is broken and anything that uses
 # it needs to add more dependencies.  Broken usually means that it has a
 # cyclic dependency and cannot link its own dependencies.  This is bad, please
 # fix the library instead.
 # Unless the library itself is broken then the proper place to define
 # dependencies is _DP_* above.
 
 # libatf-c++ exposes libatf-c abi hence we need to explicit link to atf_c for
 # atf_cxx
 DPADD_atf_cxx+=	${DPADD_atf_c}
 LDADD_atf_cxx+=	${LDADD_atf_c}
 
 DPADD_gmock+=	${DPADD_gtest}
 LDADD_gmock+=	${LDADD_gtest}
 
 DPADD_gmock_main+=	${DPADD_gmock}
 LDADD_gmock_main+=	${LDADD_gmock}
 
 DPADD_gtest_main+=	${DPADD_gtest}
 LDADD_gtest_main+=	${LDADD_gtest}
 
 # Detect LDADD/DPADD that should be LIBADD, before modifying LDADD here.
 _BADLDADD=
 .for _l in ${LDADD:M-l*:N-l*/*:C,^-l,,}
 .if ${_LIBRARIES:M${_l}} && !${_PRIVATELIBS:M${_l}}
 _BADLDADD+=	${_l}
 .endif
 .endfor
 .if !empty(_BADLDADD)
 .error ${.CURDIR}: These libraries should be LIBADD+=foo rather than DPADD/LDADD+=-lfoo: ${_BADLDADD}
 .endif
 
 .for _l in ${LIBADD}
 DPADD+=		${DPADD_${_l}}
 LDADD+=		${LDADD_${_l}}
 .endfor
 
 # INTERNALLIB definitions.
 LIBELFTCDIR=	${OBJTOP}/lib/libelftc
 LIBELFTC?=	${LIBELFTCDIR}/libelftc${PIE_SUFFIX}.a
 
 LIBPEDIR=	${OBJTOP}/lib/libpe
 LIBPE?=		${LIBPEDIR}/libpe${PIE_SUFFIX}.a
 
 LIBOPENBSDDIR=	${OBJTOP}/lib/libopenbsd
 LIBOPENBSD?=	${LIBOPENBSDDIR}/libopenbsd${PIE_SUFFIX}.a
 
 LIBSMDIR=	${OBJTOP}/lib/libsm
 LIBSM?=		${LIBSMDIR}/libsm${PIE_SUFFIX}.a
 
 LIBSMDBDIR=	${OBJTOP}/lib/libsmdb
 LIBSMDB?=	${LIBSMDBDIR}/libsmdb${PIE_SUFFIX}.a
 
 LIBSMUTILDIR=	${OBJTOP}/lib/libsmutil
 LIBSMUTIL?=	${LIBSMUTILDIR}/libsmutil${PIE_SUFFIX}.a
 
 LIBNETBSDDIR?=	${OBJTOP}/lib/libnetbsd
 LIBNETBSD?=	${LIBNETBSDDIR}/libnetbsd${PIE_SUFFIX}.a
 
 LIBVERSDIR?=	${OBJTOP}/kerberos5/lib/libvers
 LIBVERS?=	${LIBVERSDIR}/libvers${PIE_SUFFIX}.a
 
 LIBSLDIR=	${OBJTOP}/kerberos5/lib/libsl
 LIBSL?=		${LIBSLDIR}/libsl${PIE_SUFFIX}.a
 
 LIBIFCONFIGDIR=	${OBJTOP}/lib/libifconfig
 LIBIFCONFIG?=	${LIBIFCONFIGDIR}/libifconfig${PIE_SUFFIX}.a
 
 LIBIPFDIR=	${OBJTOP}/sbin/ipf/libipf
 LIBIPF?=	${LIBIPFDIR}/libipf${PIE_SUFFIX}.a
 
 LIBTELNETDIR=	${OBJTOP}/lib/libtelnet
 LIBTELNET?=	${LIBTELNETDIR}/libtelnet${PIE_SUFFIX}.a
 
 LIBCRONDIR=	${OBJTOP}/usr.sbin/cron/lib
 LIBCRON?=	${LIBCRONDIR}/libcron${PIE_SUFFIX}.a
 
 LIBNTPDIR=	${OBJTOP}/usr.sbin/ntp/libntp
 LIBNTP?=	${LIBNTPDIR}/libntp${PIE_SUFFIX}.a
 
 LIBNTPEVENTDIR=	${OBJTOP}/usr.sbin/ntp/libntpevent
 LIBNTPEVENT?=	${LIBNTPEVENTDIR}/libntpevent${PIE_SUFFIX}.a
 
 LIBOPTSDIR=	${OBJTOP}/usr.sbin/ntp/libopts
 LIBOPTS?=	${LIBOPTSDIR}/libopts${PIE_SUFFIX}.a
 
 LIBPARSEDIR=	${OBJTOP}/usr.sbin/ntp/libparse
 LIBPARSE?=	${LIBPARSEDIR}/libparse${PIE_SUFFIX}.a
 
 LIBLPRDIR=	${OBJTOP}/usr.sbin/lpr/common_source
 LIBLPR?=	${LIBLPRDIR}/liblpr${PIE_SUFFIX}.a
 
 LIBFIFOLOGDIR=	${OBJTOP}/usr.sbin/fifolog/lib
 LIBFIFOLOG?=	${LIBFIFOLOGDIR}/libfifolog${PIE_SUFFIX}.a
 
 LIBBSNMPTOOLSDIR=	${OBJTOP}/usr.sbin/bsnmpd/tools/libbsnmptools
 LIBBSNMPTOOLS?=	${LIBBSNMPTOOLSDIR}/libbsnmptools${PIE_SUFFIX}.a
 
 LIBAMUDIR=	${OBJTOP}/usr.sbin/amd/libamu
 LIBAMU?=	${LIBAMUDIR}/libamu${PIE_SUFFIX}.a
 
 LIBBE?=		${LIBBEDIR}/libbe${PIE_SUFFIX}.a
 
 LIBPMCSTATDIR=	${OBJTOP}/lib/libpmcstat
 LIBPMCSTAT?=	${LIBPMCSTATDIR}/libpmcstat${PIE_SUFFIX}.a
 
 LIBC_NOSSP_PICDIR=	${OBJTOP}/lib/libc
 LIBC_NOSSP_PIC?=	${LIBC_NOSSP_PICDIR}/libc_nossp_pic.a
 
 # Define a directory for each library.  This is useful for adding -L in when
 # not using a --sysroot or for meta mode bootstrapping when there is no
 # Makefile.depend.  These are sorted by directory.
 LIBAVLDIR=	${OBJTOP}/cddl/lib/libavl
 LIBCTFDIR=	${OBJTOP}/cddl/lib/libctf
 LIBDTRACEDIR=	${OBJTOP}/cddl/lib/libdtrace
 LIBNVPAIRDIR=	${OBJTOP}/cddl/lib/libnvpair
 LIBUMEMDIR=	${OBJTOP}/cddl/lib/libumem
 LIBUUTILDIR=	${OBJTOP}/cddl/lib/libuutil
 LIBZFSDIR=	${OBJTOP}/cddl/lib/libzfs
 LIBZFS_COREDIR=	${OBJTOP}/cddl/lib/libzfs_core
 LIBZPOOLDIR=	${OBJTOP}/cddl/lib/libzpool
 
 # OFED support
 LIBCXGB4DIR=	${OBJTOP}/lib/ofed/libcxgb4
 LIBIBCMDIR=	${OBJTOP}/lib/ofed/libibcm
 LIBIBMADDIR=	${OBJTOP}/lib/ofed/libibmad
 LIBIBNETDISCDIR=${OBJTOP}/lib/ofed/libibnetdisc
 LIBIBUMADDIR=	${OBJTOP}/lib/ofed/libibumad
 LIBIBVERBSDIR=	${OBJTOP}/lib/ofed/libibverbs
 LIBMLX4DIR=	${OBJTOP}/lib/ofed/libmlx4
 LIBMLX5DIR=	${OBJTOP}/lib/ofed/libmlx5
 LIBRDMACMDIR=	${OBJTOP}/lib/ofed/librdmacm
 LIBOSMCOMPDIR=	${OBJTOP}/lib/ofed/complib
 LIBOPENSMDIR=	${OBJTOP}/lib/ofed/libopensm
 LIBOSMVENDORDIR=${OBJTOP}/lib/ofed/libvendor
 
 LIBDIALOGDIR=	${OBJTOP}/gnu/lib/libdialog
 LIBGCOVDIR=	${OBJTOP}/gnu/lib/libgcov
 LIBGOMPDIR=	${OBJTOP}/gnu/lib/libgomp
 LIBGNUREGEXDIR=	${OBJTOP}/gnu/lib/libregex
 LIBSSPDIR=	${OBJTOP}/gnu/lib/libssp
 LIBSSP_NONSHAREDDIR=	${OBJTOP}/gnu/lib/libssp/libssp_nonshared
 LIBSUPCPLUSPLUSDIR=	${OBJTOP}/gnu/lib/libsupc++
 LIBASN1DIR=	${OBJTOP}/kerberos5/lib/libasn1
 LIBGSSAPI_KRB5DIR=	${OBJTOP}/kerberos5/lib/libgssapi_krb5
 LIBGSSAPI_NTLMDIR=	${OBJTOP}/kerberos5/lib/libgssapi_ntlm
 LIBGSSAPI_SPNEGODIR=	${OBJTOP}/kerberos5/lib/libgssapi_spnego
 LIBHDBDIR=	${OBJTOP}/kerberos5/lib/libhdb
 LIBHEIMBASEDIR=	${OBJTOP}/kerberos5/lib/libheimbase
 LIBHEIMIPCCDIR=	${OBJTOP}/kerberos5/lib/libheimipcc
 LIBHEIMIPCSDIR=	${OBJTOP}/kerberos5/lib/libheimipcs
 LIBHEIMNTLMDIR=	${OBJTOP}/kerberos5/lib/libheimntlm
 LIBHX509DIR=	${OBJTOP}/kerberos5/lib/libhx509
 LIBKADM5CLNTDIR=	${OBJTOP}/kerberos5/lib/libkadm5clnt
 LIBKADM5SRVDIR=	${OBJTOP}/kerberos5/lib/libkadm5srv
 LIBKAFS5DIR=	${OBJTOP}/kerberos5/lib/libkafs5
 LIBKDCDIR=	${OBJTOP}/kerberos5/lib/libkdc
 LIBKRB5DIR=	${OBJTOP}/kerberos5/lib/libkrb5
 LIBROKENDIR=	${OBJTOP}/kerberos5/lib/libroken
 LIBWINDDIR=	${OBJTOP}/kerberos5/lib/libwind
 LIBATF_CDIR=	${OBJTOP}/lib/atf/libatf-c
 LIBATF_CXXDIR=	${OBJTOP}/lib/atf/libatf-c++
 LIBGMOCKDIR=	${OBJTOP}/lib/googletest/gmock
 LIBGMOCK_MAINDIR=	${OBJTOP}/lib/googletest/gmock_main
 LIBGTESTDIR=	${OBJTOP}/lib/googletest/gtest
 LIBGTEST_MAINDIR=	${OBJTOP}/lib/googletest/gtest_main
 LIBALIASDIR=	${OBJTOP}/lib/libalias/libalias
 LIBBLACKLISTDIR=	${OBJTOP}/lib/libblacklist
 LIBBLOCKSRUNTIMEDIR=	${OBJTOP}/lib/libblocksruntime
 LIBBSNMPDIR=	${OBJTOP}/lib/libbsnmp/libbsnmp
 LIBCASPERDIR=	${OBJTOP}/lib/libcasper/libcasper
 LIBCAP_DNSDIR=	${OBJTOP}/lib/libcasper/services/cap_dns
 LIBCAP_GRPDIR=	${OBJTOP}/lib/libcasper/services/cap_grp
 LIBCAP_PWDDIR=	${OBJTOP}/lib/libcasper/services/cap_pwd
 LIBCAP_SYSCTLDIR=	${OBJTOP}/lib/libcasper/services/cap_sysctl
 LIBCAP_SYSLOGDIR=	${OBJTOP}/lib/libcasper/services/cap_syslog
 LIBBSDXMLDIR=	${OBJTOP}/lib/libexpat
 LIBKVMDIR=	${OBJTOP}/lib/libkvm
 LIBPTHREADDIR=	${OBJTOP}/lib/libthr
 LIBMDIR=	${OBJTOP}/lib/msun
 LIBFORMDIR=	${OBJTOP}/lib/ncurses/form
 LIBFORMLIBWDIR=	${OBJTOP}/lib/ncurses/formw
 LIBMENUDIR=	${OBJTOP}/lib/ncurses/menu
 LIBMENULIBWDIR=	${OBJTOP}/lib/ncurses/menuw
 LIBNCURSESDIR=	${OBJTOP}/lib/ncurses/ncurses
 LIBNCURSESWDIR=	${OBJTOP}/lib/ncurses/ncursesw
 LIBPANELDIR=	${OBJTOP}/lib/ncurses/panel
 LIBPANELWDIR=	${OBJTOP}/lib/ncurses/panelw
 LIBCRYPTODIR=	${OBJTOP}/secure/lib/libcrypto
 LIBSSHDIR=	${OBJTOP}/secure/lib/libssh
 LIBSSLDIR=	${OBJTOP}/secure/lib/libssl
 LIBTEKENDIR=	${OBJTOP}/sys/teken/libteken
 LIBEGACYDIR=	${OBJTOP}/tools/build
 LIBLNDIR=	${OBJTOP}/usr.bin/lex/lib
 
 LIBTERMCAPDIR=	${LIBNCURSESDIR}
 LIBTERMCAPWDIR=	${LIBNCURSESWDIR}
 
 # Default other library directories to lib/libNAME.
 .for lib in ${_LIBRARIES}
 LIB${lib:tu}DIR?=	${OBJTOP}/lib/lib${lib}
 .endfor
 
 # Validate that listed LIBADD are valid.
 .for _l in ${LIBADD}
 .if empty(_LIBRARIES:M${_l})
 _BADLIBADD+= ${_l}
 .endif
 .endfor
 .if !empty(_BADLIBADD)
 .error ${.CURDIR}: Invalid LIBADD used which may need to be added to ${_this:T}: ${_BADLIBADD}
 .endif
 
 # Sanity check that libraries are defined here properly when building them.
 .if defined(LIB) && ${_LIBRARIES:M${LIB}} != ""
 .if !empty(LIBADD) && \
     (!defined(_DP_${LIB}) || ${LIBADD:O:u} != ${_DP_${LIB}:O:u})
 .error ${.CURDIR}: Missing or incorrect _DP_${LIB} entry in ${_this:T}.  Should match LIBADD for ${LIB} ('${LIBADD}' vs '${_DP_${LIB}}')
 .endif
 # Note that OBJTOP is not yet defined here but for the purpose of the check
 # it is fine as it resolves to the SRC directory.
 .if !defined(LIB${LIB:tu}DIR) || !exists(${SRCTOP}/${LIB${LIB:tu}DIR:S,^${OBJTOP}/,,})
 .error ${.CURDIR}: Missing or incorrect value for LIB${LIB:tu}DIR in ${_this:T}: ${LIB${LIB:tu}DIR:S,^${OBJTOP}/,,}
 .endif
 .if ${_INTERNALLIBS:M${LIB}} != "" && !defined(LIB${LIB:tu})
 .error ${.CURDIR}: Missing value for LIB${LIB:tu} in ${_this:T}.  Likely should be: LIB${LIB:tu}?= $${LIB${LIB:tu}DIR}/lib${LIB}.a
 .endif
 .endif
 
 .endif	# !target(__<src.libnames.mk>__)
Index: projects/clang900-import/share/mk/src.opts.mk
===================================================================
--- projects/clang900-import/share/mk/src.opts.mk	(revision 352536)
+++ projects/clang900-import/share/mk/src.opts.mk	(revision 352537)
@@ -1,588 +1,596 @@
 # $FreeBSD$
 #
 # Option file for FreeBSD /usr/src builds.
 #
 # Users define WITH_FOO and WITHOUT_FOO on the command line or in /etc/src.conf
 # and /etc/make.conf files. These translate in the build system to MK_FOO={yes,no}
 # with sensible (usually) defaults.
 #
 # Makefiles must include bsd.opts.mk after defining specific MK_FOO options that
 # are applicable for that Makefile (typically there are none, but sometimes there
 # are exceptions). Recursive makes usually add MK_FOO=no for options that they wish
 # to omit from that make.
 #
 # Makefiles must include bsd.mkopt.mk before they test the value of any MK_FOO
 # variable.
 #
 # Makefiles may also assume that this file is included by src.opts.mk should it
 # need variables defined there prior to the end of the Makefile where
 # bsd.{subdir,lib.bin}.mk is traditionally included.
 #
 # The old-style YES_FOO and NO_FOO are being phased out. No new instances of them
 # should be added. Old instances should be removed since they were just to
 # bridge the gap between FreeBSD 4 and FreeBSD 5.
 #
 # Makefiles should never test WITH_FOO or WITHOUT_FOO directly (although an
 # exception is made for _WITHOUT_SRCONF which turns off this mechanism
 # completely inside bsd.*.mk files).
 #
 
 .if !target(__<src.opts.mk>__)
 __<src.opts.mk>__:
 
 .include <bsd.own.mk>
 
 #
 # Define MK_* variables (which are either "yes" or "no") for users
 # to set via WITH_*/WITHOUT_* in /etc/src.conf and override in the
 # make(1) environment.
 # These should be tested with `== "no"' or `!= "no"' in makefiles.
 # The NO_* variables should only be set by makefiles for variables
 # that haven't been converted over.
 #
 
 # These options are used by the src builds. Those listed in
 # __DEFAULT_YES_OPTIONS default to 'yes' and will build unless turned
 # off.  __DEFAULT_NO_OPTIONS will default to 'no' and won't build
 # unless turned on. Any options listed in 'BROKEN_OPTIONS' will be
 # hard-wired to 'no'.  "Broken" here means not working or
 # not-appropriate and/or not supported. It doesn't imply something is
 # wrong with the code. There's not a single good word for this, so
 # BROKEN was selected as the least imperfect one considered at the
 # time. Options are added to BROKEN_OPTIONS list on a per-arch basis.
 # At this time, there's no provision for mutually incompatible options.
 
 __DEFAULT_YES_OPTIONS = \
     ACCT \
     ACPI \
     AMD \
     APM \
     AT \
     ATM \
     AUDIT \
     AUTHPF \
     AUTOFS \
     BHYVE \
     BINUTILS \
     BINUTILS_BOOTSTRAP \
     BLACKLIST \
     BLUETOOTH \
     BOOT \
     BOOTPARAMD \
     BOOTPD \
     BSD_CPIO \
     BSD_CRTBEGIN \
     BSDINSTALL \
     BSNMP \
     BZIP2 \
     CALENDAR \
     CAPSICUM \
     CASPER \
     CCD \
     CDDL \
     CPP \
     CROSS_COMPILER \
     CRYPT \
     CUSE \
     CXX \
     CXGBETOOL \
     DIALOG \
     DICT \
     DMAGENT \
     DYNAMICROOT \
     EE \
     EFI \
     ELFTOOLCHAIN_BOOTSTRAP \
     EXAMPLES \
     FDT \
     FILE \
     FINGER \
     FLOPPY \
     FMTREE \
     FORTH \
     FP_LIBC \
     FREEBSD_UPDATE \
     FTP \
     GAMES \
     GCOV \
     GDB \
     GNU_DIFF \
     GNU_GREP \
-    GOOGLETEST \
     GPIO \
     HAST \
     HTML \
     HYPERV \
     ICONV \
     INET \
     INET6 \
     INETD \
     IPFILTER \
     IPFW \
     ISCSI \
     JAIL \
     KDUMP \
     KVM \
     LDNS \
     LDNS_UTILS \
     LEGACY_CONSOLE \
     LIB32 \
     LIBPTHREAD \
     LIBTHR \
     LLVM_COV \
     LOADER_GELI \
     LOADER_LUA \
     LOADER_OFW \
     LOADER_UBOOT \
     LOCALES \
     LOCATE \
     LPR \
     LS_COLORS \
     LZMA_SUPPORT \
     MAIL \
     MAILWRAPPER \
     MAKE \
     MLX5TOOL \
     NDIS \
     NETCAT \
     NETGRAPH \
     NLS_CATALOGS \
     NS_CACHING \
     NTP \
     NVME \
     OFED \
     OPENSSL \
     PAM \
     PF \
     PKGBOOTSTRAP \
     PMC \
     PORTSNAP \
     PPP \
     QUOTAS \
     RADIUS_SUPPORT \
     RBOOTD \
     RESCUE \
     ROUTED \
     SENDMAIL \
     SERVICESDB \
     SETUID_LOGIN \
     SHAREDOCS \
     SOURCELESS \
     SOURCELESS_HOST \
     SOURCELESS_UCODE \
     SVNLITE \
     SYSCONS \
     SYSTEM_COMPILER \
     SYSTEM_LINKER \
     TALK \
     TCP_WRAPPERS \
     TCSH \
     TELNET \
     TEXTPROC \
     TFTP \
     UNBOUND \
     USB \
     UTMPX \
     VI \
     VT \
     WIRELESS \
     WPA_SUPPLICANT_EAPOL \
     ZFS \
     LOADER_ZFS \
     ZONEINFO
 
 __DEFAULT_NO_OPTIONS = \
     BEARSSL \
     BSD_GREP \
     CLANG_EXTRAS \
     DTRACE_TESTS \
     EXPERIMENTAL \
     GNU_GREP_COMPAT \
     HESIOD \
     LIBSOFT \
     LOADER_FIREWIRE \
     LOADER_FORCE_LE \
     LOADER_VERBOSE \
     LOADER_VERIEXEC_PASS_MANIFEST \
     OFED_EXTRA \
     OPENLDAP \
     REPRODUCIBLE_BUILD \
     RPCBIND_WARMSTART_SUPPORT \
     SHARED_TOOLCHAIN \
     SORT_THREADS \
     SVN \
     ZONEINFO_LEAPSECONDS_SUPPORT \
     ZONEINFO_OLD_TIMEZONES_SUPPORT \
 
 # LEFT/RIGHT. Left options which default to "yes" unless their corresponding
 # RIGHT option is disabled.
 __DEFAULT_DEPENDENT_OPTIONS= \
 	CLANG_FULL/CLANG \
 	LLVM_TARGET_ALL/CLANG \
 	LOADER_VERIEXEC/BEARSSL \
 	LOADER_EFI_SECUREBOOT/LOADER_VERIEXEC \
 	VERIEXEC/BEARSSL \
 
 # MK_*_SUPPORT options which default to "yes" unless their corresponding
 # MK_* variable is set to "no".
 #
 .for var in \
     BLACKLIST \
     BZIP2 \
     INET \
     INET6 \
     KERBEROS \
     KVM \
     NETGRAPH \
     PAM \
     TESTS \
     WIRELESS
 __DEFAULT_DEPENDENT_OPTIONS+= ${var}_SUPPORT/${var}
 .endfor
 
 #
 # Default behaviour of some options depends on the architecture.  Unfortunately
 # this means that we have to test TARGET_ARCH (the buildworld case) as well
 # as MACHINE_ARCH (the non-buildworld case).  Normally TARGET_ARCH is not
 # used at all in bsd.*.mk, but we have to make an exception here if we want
 # to allow defaults for some things like clang to vary by target architecture.
 # Additional, per-target behavior should be rarely added only after much
 # gnashing of teeth and grinding of gears.
 #
 .if defined(TARGET_ARCH)
 __T=${TARGET_ARCH}
 .else
 __T=${MACHINE_ARCH}
 .endif
 .if defined(TARGET)
 __TT=${TARGET}
 .else
 __TT=${MACHINE}
+.endif
+
+# Default GOOGLETEST to off for MIPS while LLVM PR 43263 is active.  Part
+# of the fusefs tests trigger excessively long compile times.  It does
+# eventually succeed, but this shouldn't be forced on those building by default.
+.if ${__TT} == "mips"
+__DEFAULT_NO_OPTIONS+=	GOOGLETEST
+.else
+__DEFAULT_YES_OPTIONS+=	GOOGLETEST
 .endif
 
 # All supported backends for LLVM_TARGET_XXX
 __LLVM_TARGETS= \
 		aarch64 \
 		arm \
 		mips \
 		powerpc \
 		sparc \
 		x86
 __LLVM_TARGET_FILT=	C/(amd64|i386)/x86/:S/sparc64/sparc/:S/arm64/aarch64/:S/powerpc64/powerpc/
 .for __llt in ${__LLVM_TARGETS}
 # Default the given TARGET's LLVM_TARGET support to the value of MK_CLANG.
 .if ${__TT:${__LLVM_TARGET_FILT}} == ${__llt}
 __DEFAULT_DEPENDENT_OPTIONS+=	LLVM_TARGET_${__llt:${__LLVM_TARGET_FILT}:tu}/CLANG
 # Disable other targets for arm and armv6, to work around "relocation truncated
 # to fit" errors with BFD ld, since libllvm.a will get too large to link.
 .elif ${__T} == "arm" || ${__T} == "armv6"
 __DEFAULT_NO_OPTIONS+=LLVM_TARGET_${__llt:tu}
 # aarch64 needs arm for -m32 support.
 .elif ${__TT} == "arm64" && ${__llt} == "arm"
 __DEFAULT_DEPENDENT_OPTIONS+=	LLVM_TARGET_ARM/LLVM_TARGET_AARCH64
 # Default the rest of the LLVM_TARGETs to the value of MK_LLVM_TARGET_ALL
 # which is based on MK_CLANG.
 .else
 __DEFAULT_DEPENDENT_OPTIONS+=	LLVM_TARGET_${__llt:${__LLVM_TARGET_FILT}:tu}/LLVM_TARGET_ALL
 .endif
 .endfor
 
 __DEFAULT_NO_OPTIONS+=LLVM_TARGET_BPF
 __DEFAULT_NO_OPTIONS+=LLVM_TARGET_RISCV
 
 .include <bsd.compiler.mk>
 # If the compiler is not C++11 capable, disable Clang and use GCC instead.
 # This means that architectures that have GCC 4.2 as default can not
 # build Clang without using an external compiler.
 
 .if ${COMPILER_FEATURES:Mc++11} && (${__T} == "aarch64" || \
     ${__T} == "amd64" || ${__TT} == "arm" || ${__T} == "i386")
 # Clang is enabled, and will be installed as the default /usr/bin/cc.
 __DEFAULT_YES_OPTIONS+=CLANG CLANG_BOOTSTRAP CLANG_IS_CC LLD
 __DEFAULT_NO_OPTIONS+=GCC GCC_BOOTSTRAP GNUCXX GPL_DTC
 .elif ${COMPILER_FEATURES:Mc++11} && ${__T:Mriscv*} == "" && ${__T} != "sparc64"
 # If an external compiler that supports C++11 is used as ${CC} and Clang
 # supports the target, then Clang is enabled but GCC is installed as the
 # default /usr/bin/cc.
 __DEFAULT_YES_OPTIONS+=CLANG GCC GCC_BOOTSTRAP GNUCXX GPL_DTC LLD
 __DEFAULT_NO_OPTIONS+=CLANG_BOOTSTRAP CLANG_IS_CC
 .else
 # Everything else disables Clang, and uses GCC instead.
 __DEFAULT_YES_OPTIONS+=GCC GCC_BOOTSTRAP GNUCXX GPL_DTC
 __DEFAULT_NO_OPTIONS+=CLANG CLANG_BOOTSTRAP CLANG_IS_CC LLD
 .endif
 # In-tree binutils/gcc are older versions without modern architecture support.
 .if ${__T} == "aarch64" || ${__T:Mriscv*} != ""
 BROKEN_OPTIONS+=BINUTILS BINUTILS_BOOTSTRAP GCC GCC_BOOTSTRAP GDB
 .endif
 .if ${__T:Mriscv*} != ""
 BROKEN_OPTIONS+=OFED
 .endif
 .if ${__T} == "aarch64" || ${__T} == "amd64" || ${__T} == "i386" || \
     ${__T:Mriscv*} != "" || ${__TT} == "mips"
 __DEFAULT_YES_OPTIONS+=LLVM_LIBUNWIND
 .else
 __DEFAULT_NO_OPTIONS+=LLVM_LIBUNWIND
 .endif
 .if ${__T} == "aarch64" || ${__T} == "amd64" || ${__T} == "armv7" || \
     ${__T} == "i386"
 __DEFAULT_YES_OPTIONS+=LLD_BOOTSTRAP LLD_IS_LD
 .else
 __DEFAULT_NO_OPTIONS+=LLD_BOOTSTRAP LLD_IS_LD
 .endif
 .if ${__T} == "aarch64" || ${__T} == "amd64" || ${__T} == "i386"
 __DEFAULT_YES_OPTIONS+=LLDB
 .else
 __DEFAULT_NO_OPTIONS+=LLDB
 .endif
 # LLVM lacks support for FreeBSD 64-bit atomic operations for ARMv4/ARMv5
 .if ${__T} == "arm"
 BROKEN_OPTIONS+=LLDB
 .endif
 # GDB in base is generally less functional than GDB in ports.  Ports GDB
 # sparc64 kernel support has not been tested.
 .if ${__T} == "sparc64"
 __DEFAULT_NO_OPTIONS+=GDB_LIBEXEC
 .else
 __DEFAULT_YES_OPTIONS+=GDB_LIBEXEC
 .endif
 # Only doing soft float API stuff on armv6 and armv7
 .if ${__T} != "armv6" && ${__T} != "armv7"
 BROKEN_OPTIONS+=LIBSOFT
 .endif
 .if ${__T:Mmips*}
 BROKEN_OPTIONS+=SSP
 .endif
 # EFI doesn't exist on mips, powerpc, sparc or riscv.
 .if ${__T:Mmips*} || ${__T:Mpowerpc*} || ${__T:Msparc64} || ${__T:Mriscv*}
 BROKEN_OPTIONS+=EFI
 .endif
 # OFW is only for powerpc and sparc64, exclude others
 .if ${__T:Mpowerpc*} == "" && ${__T:Msparc64} == ""
 BROKEN_OPTIONS+=LOADER_OFW
 .endif
 # UBOOT is only for arm, mips and powerpc, exclude others
 .if ${__T:Marm*} == "" && ${__T:Mmips*} == "" && ${__T:Mpowerpc*} == ""
 BROKEN_OPTIONS+=LOADER_UBOOT
 .endif
 # GELI and Lua in loader currently cause boot failures on sparc64 and powerpc.
 # Further debugging is required -- probably they are just broken on big
 # endian systems generically (they jump to null pointers or try to read
 # crazy high addresses, which is typical of endianness problems).
 .if ${__T} == "sparc64" || ${__T:Mpowerpc*}
 BROKEN_OPTIONS+=LOADER_GELI LOADER_LUA
 .endif
 
 .if ${__T:Mmips64*}
 # profiling won't work on MIPS64 because there is only assembly for o32
 BROKEN_OPTIONS+=PROFILE
 .endif
 .if ${__T} != "aarch64" && ${__T} != "amd64" && ${__T} != "i386" && \
     ${__T} != "powerpc64" && ${__T} != "sparc64"
 BROKEN_OPTIONS+=CXGBETOOL
 BROKEN_OPTIONS+=MLX5TOOL
 .endif
 
 # HyperV is currently x86-only
 .if ${__T} != "amd64" && ${__T} != "i386"
 BROKEN_OPTIONS+=HYPERV
 .endif
 
 # NVME is only aarch64, x86 and powerpc64
 .if ${__T} != "aarch64" && ${__T} != "amd64" && ${__T} != "i386" && ${__T} != "powerpc64"
 BROKEN_OPTIONS+=NVME
 .endif
 
 # Sparc64 need extra crt*.o files
 .if ${__T:Msparc64}
 BROKEN_OPTIONS+=BSD_CRTBEGIN
 .endif
 
 .if ${COMPILER_FEATURES:Mc++11} && (${__T} == "amd64" || ${__T} == "i386")
 __DEFAULT_YES_OPTIONS+=OPENMP
 .else
 __DEFAULT_NO_OPTIONS+=OPENMP
 .endif
 
 .include <bsd.mkopt.mk>
 
 #
 # MK_* options that default to "yes" if the compiler is a C++11 compiler.
 #
 .for var in \
     LIBCPLUSPLUS
 .if !defined(MK_${var})
 .if ${COMPILER_FEATURES:Mc++11}
 .if defined(WITHOUT_${var})
 MK_${var}:=	no
 .else
 MK_${var}:=	yes
 .endif
 .else
 .if defined(WITH_${var})
 MK_${var}:=	yes
 .else
 MK_${var}:=	no
 .endif
 .endif
 .endif
 .endfor
 
 #
 # Force some options off if their dependencies are off.
 # Order is somewhat important.
 #
 .if !${COMPILER_FEATURES:Mc++11}
 MK_GOOGLETEST:=	no
 MK_LLVM_LIBUNWIND:=	no
 .endif
 
 .if ${MK_CAPSICUM} == "no"
 MK_CASPER:=	no
 .endif
 
 .if ${MK_LIBPTHREAD} == "no"
 MK_LIBTHR:=	no
 .endif
 
 .if ${MK_LDNS} == "no"
 MK_LDNS_UTILS:=	no
 MK_UNBOUND:= no
 .endif
 
 .if ${MK_SOURCELESS} == "no"
 MK_SOURCELESS_HOST:=	no
 MK_SOURCELESS_UCODE:= no
 .endif
 
 .if ${MK_CDDL} == "no"
 MK_ZFS:=	no
 MK_LOADER_ZFS:=	no
 MK_CTF:=	no
 .endif
 
 .if ${MK_CRYPT} == "no"
 MK_OPENSSL:=	no
 MK_OPENSSH:=	no
 MK_KERBEROS:=	no
 .endif
 
 .if ${MK_CXX} == "no"
 MK_CLANG:=	no
 MK_GNUCXX:=	no
 MK_TESTS:=	no
 .endif
 
 .if ${MK_DIALOG} == "no"
 MK_BSDINSTALL:=	no
 .endif
 
 .if ${MK_MAIL} == "no"
 MK_MAILWRAPPER:= no
 MK_SENDMAIL:=	no
 MK_DMAGENT:=	no
 .endif
 
 .if ${MK_NETGRAPH} == "no"
 MK_ATM:=	no
 MK_BLUETOOTH:=	no
 .endif
 
 .if ${MK_NLS} == "no"
 MK_NLS_CATALOGS:= no
 .endif
 
 .if ${MK_OPENSSL} == "no"
 MK_OPENSSH:=	no
 MK_KERBEROS:=	no
 .endif
 
 .if ${MK_PF} == "no"
 MK_AUTHPF:=	no
 .endif
 
 .if ${MK_OFED} == "no"
 MK_OFED_EXTRA:=	no
 .endif
 
 .if ${MK_PORTSNAP} == "no"
 # freebsd-update depends on phttpget from portsnap
 MK_FREEBSD_UPDATE:=	no
 .endif
 
 .if ${MK_TESTS} == "no"
 MK_DTRACE_TESTS:= no
 .endif
 
 .if ${MK_TESTS_SUPPORT} == "no"
 MK_GOOGLETEST:=	no
 .endif
 
 .if ${MK_ZONEINFO} == "no"
 MK_ZONEINFO_LEAPSECONDS_SUPPORT:= no
 MK_ZONEINFO_OLD_TIMEZONES_SUPPORT:= no
 .endif
 
 .if ${MK_CROSS_COMPILER} == "no"
 MK_BINUTILS_BOOTSTRAP:= no
 MK_CLANG_BOOTSTRAP:= no
 MK_ELFTOOLCHAIN_BOOTSTRAP:= no
 MK_GCC_BOOTSTRAP:= no
 MK_LLD_BOOTSTRAP:= no
 .endif
 
 .if ${MK_TOOLCHAIN} == "no"
 MK_BINUTILS:=	no
 MK_CLANG:=	no
 MK_GCC:=	no
 MK_GDB:=	no
 MK_INCLUDES:=	no
 MK_LLD:=	no
 MK_LLDB:=	no
 .endif
 
 .if ${MK_CLANG} == "no"
 MK_CLANG_EXTRAS:= no
 MK_CLANG_FULL:= no
 MK_LLVM_COV:= no
 .endif
 
 .if ${MK_LOADER_VERIEXEC} == "no"
 MK_LOADER_VERIEXEC_PASS_MANIFEST := no
 .endif
 
 #
 # MK_* options whose default value depends on another option.
 #
 .for vv in \
     GSSAPI/KERBEROS \
     MAN_UTILS/MAN
 .if defined(WITH_${vv:H})
 MK_${vv:H}:=	yes
 .elif defined(WITHOUT_${vv:H})
 MK_${vv:H}:=	no
 .else
 MK_${vv:H}:=	${MK_${vv:T}}
 .endif
 .endfor
 
 #
 # Set defaults for the MK_*_SUPPORT variables.
 #
 
 .if !${COMPILER_FEATURES:Mc++11}
 MK_LLDB:=	no
 .endif
 
 # gcc 4.8 and newer supports libc++, so suppress gnuc++ in that case.
 # while in theory we could build it with that, we don't want to do
 # that since it creates too much confusion for too little gain.
 # XXX: This is incomplete and needs X_COMPILER_TYPE/VERSION checks too
 #      to prevent Makefile.inc1 from bootstrapping unneeded dependencies
 #      and to support 'make delete-old' when supplying an external toolchain.
 .if ${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} >= 40800
 MK_GNUCXX:=no
 MK_GCC:=no
 .endif
 
 .endif #  !target(__<src.opts.mk>__)
Index: projects/clang900-import/stand/efi/libefi/efipart.c
===================================================================
--- projects/clang900-import/stand/efi/libefi/efipart.c	(revision 352536)
+++ projects/clang900-import/stand/efi/libefi/efipart.c	(revision 352537)
@@ -1,1067 +1,1156 @@
 /*-
  * Copyright (c) 2010 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/disk.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/queue.h>
 #include <stddef.h>
 #include <stdarg.h>
 
 #include <bootstrap.h>
 
 #include <efi.h>
 #include <efilib.h>
 #include <efiprot.h>
 #include <efichar.h>
 #include <disk.h>
 
 static EFI_GUID blkio_guid = BLOCK_IO_PROTOCOL;
 
 static int efipart_initfd(void);
 static int efipart_initcd(void);
 static int efipart_inithd(void);
 
 static int efipart_strategy(void *, int, daddr_t, size_t, char *, size_t *);
 static int efipart_realstrategy(void *, int, daddr_t, size_t, char *, size_t *);
 
 static int efipart_open(struct open_file *, ...);
 static int efipart_close(struct open_file *);
 static int efipart_ioctl(struct open_file *, u_long, void *);
 
 static int efipart_printfd(int);
 static int efipart_printcd(int);
 static int efipart_printhd(int);
 
 /* EISA PNP ID's for floppy controllers */
 #define	PNP0604	0x604
 #define	PNP0700	0x700
 #define	PNP0701	0x701
 
+/* Bounce buffer max size */
+#define	BIO_BUFFER_SIZE	0x4000
+
 struct devsw efipart_fddev = {
 	.dv_name = "fd",
 	.dv_type = DEVT_FD,
 	.dv_init = efipart_initfd,
 	.dv_strategy = efipart_strategy,
 	.dv_open = efipart_open,
 	.dv_close = efipart_close,
 	.dv_ioctl = efipart_ioctl,
 	.dv_print = efipart_printfd,
 	.dv_cleanup = NULL
 };
 
 struct devsw efipart_cddev = {
 	.dv_name = "cd",
 	.dv_type = DEVT_CD,
 	.dv_init = efipart_initcd,
 	.dv_strategy = efipart_strategy,
 	.dv_open = efipart_open,
 	.dv_close = efipart_close,
 	.dv_ioctl = efipart_ioctl,
 	.dv_print = efipart_printcd,
 	.dv_cleanup = NULL
 };
 
 struct devsw efipart_hddev = {
 	.dv_name = "disk",
 	.dv_type = DEVT_DISK,
 	.dv_init = efipart_inithd,
 	.dv_strategy = efipart_strategy,
 	.dv_open = efipart_open,
 	.dv_close = efipart_close,
 	.dv_ioctl = efipart_ioctl,
 	.dv_print = efipart_printhd,
 	.dv_cleanup = NULL
 };
 
 static pdinfo_list_t fdinfo = STAILQ_HEAD_INITIALIZER(fdinfo);
 static pdinfo_list_t cdinfo = STAILQ_HEAD_INITIALIZER(cdinfo);
 static pdinfo_list_t hdinfo = STAILQ_HEAD_INITIALIZER(hdinfo);
 
 /*
  * efipart_inithandles() is used to build up the pdinfo list from
  * block device handles. Then each devsw init callback is used to
  * pick items from pdinfo and move to proper device list.
  * In ideal world, we should end up with empty pdinfo once all
  * devsw initializers are called.
  */
 static pdinfo_list_t pdinfo = STAILQ_HEAD_INITIALIZER(pdinfo);
 
 pdinfo_list_t *
 efiblk_get_pdinfo_list(struct devsw *dev)
 {
 	if (dev->dv_type == DEVT_DISK)
 		return (&hdinfo);
 	if (dev->dv_type == DEVT_CD)
 		return (&cdinfo);
 	if (dev->dv_type == DEVT_FD)
 		return (&fdinfo);
 	return (NULL);
 }
 
 /* XXX this gets called way way too often, investigate */
 pdinfo_t *
 efiblk_get_pdinfo(struct devdesc *dev)
 {
 	pdinfo_list_t *pdi;
 	pdinfo_t *pd = NULL;
 
 	pdi = efiblk_get_pdinfo_list(dev->d_dev);
 	if (pdi == NULL)
 		return (pd);
 
 	STAILQ_FOREACH(pd, pdi, pd_link) {
 		if (pd->pd_unit == dev->d_unit)
 			return (pd);
 	}
 	return (pd);
 }
 
 pdinfo_t *
 efiblk_get_pdinfo_by_device_path(EFI_DEVICE_PATH *path)
 {
 	EFI_HANDLE h;
 	EFI_STATUS status;
 	EFI_DEVICE_PATH *devp = path;
 
 	status = BS->LocateDevicePath(&blkio_guid, &devp, &h);
 	if (EFI_ERROR(status))
 		return (NULL);
 	return (efiblk_get_pdinfo_by_handle(h));
 }
 
 static bool
 same_handle(pdinfo_t *pd, EFI_HANDLE h)
 {
 
 	return (pd->pd_handle == h || pd->pd_alias == h);
 }
 
 pdinfo_t *
 efiblk_get_pdinfo_by_handle(EFI_HANDLE h)
 {
 	pdinfo_t *dp, *pp;
 
 	/*
 	 * Check hard disks, then cd, then floppy
 	 */
 	STAILQ_FOREACH(dp, &hdinfo, pd_link) {
 		if (same_handle(dp, h))
 			return (dp);
 		STAILQ_FOREACH(pp, &dp->pd_part, pd_link) {
 			if (same_handle(pp, h))
 				return (pp);
 		}
 	}
 	STAILQ_FOREACH(dp, &cdinfo, pd_link) {
 		if (same_handle(dp, h))
 			return (dp);
 		STAILQ_FOREACH(pp, &dp->pd_part, pd_link) {
 			if (same_handle(pp, h))
 				return (pp);
 		}
 	}
 	STAILQ_FOREACH(dp, &fdinfo, pd_link) {
 		if (same_handle(dp, h))
 			return (dp);
 	}
 	return (NULL);
 }
 
 static int
 efiblk_pdinfo_count(pdinfo_list_t *pdi)
 {
 	pdinfo_t *pd;
 	int i = 0;
 
 	STAILQ_FOREACH(pd, pdi, pd_link) {
 		i++;
 	}
 	return (i);
 }
 
 int
 efipart_inithandles(void)
 {
 	unsigned i, nin;
 	UINTN sz;
 	EFI_HANDLE *hin;
 	EFI_DEVICE_PATH *devpath;
 	EFI_BLOCK_IO *blkio;
 	EFI_STATUS status;
 	pdinfo_t *pd;
 
 	if (!STAILQ_EMPTY(&pdinfo))
 		return (0);
 
 	sz = 0;
 	hin = NULL;
 	status = BS->LocateHandle(ByProtocol, &blkio_guid, 0, &sz, hin);
 	if (status == EFI_BUFFER_TOO_SMALL) {
 		hin = malloc(sz);
 		status = BS->LocateHandle(ByProtocol, &blkio_guid, 0, &sz,
 		    hin);
 		if (EFI_ERROR(status))
 			free(hin);
 	}
 	if (EFI_ERROR(status))
 		return (efi_status_to_errno(status));
 
 	nin = sz / sizeof(*hin);
 #ifdef EFIPART_DEBUG
 	printf("%s: Got %d BLOCK IO MEDIA handle(s)\n", __func__, nin);
 #endif
 
 	for (i = 0; i < nin; i++) {
 		/*
 		 * Get devpath and open protocol.
 		 * We should not get errors here
 		 */
 		if ((devpath = efi_lookup_devpath(hin[i])) == NULL)
 			continue;
 
 		status = OpenProtocolByHandle(hin[i], &blkio_guid,
 		    (void **)&blkio);
 		if (EFI_ERROR(status)) {
 			printf("error %lu\n", EFI_ERROR_CODE(status));
 			continue;
 		}
 
 		/*
 		 * We assume the block size 512 or greater power of 2.
 		 * Also skip devices with block size > 64k (16 is max
 		 * ashift supported by zfs).
 		 * iPXE is known to insert stub BLOCK IO device with
 		 * BlockSize 1.
 		 */
 		if (blkio->Media->BlockSize < 512 ||
 		    blkio->Media->BlockSize > (1 << 16) ||
 		    !powerof2(blkio->Media->BlockSize)) {
 			continue;
 		}
 
+		/* Allowed values are 0, 1 and power of 2. */
+		if (blkio->Media->IoAlign > 1 &&
+		    !powerof2(blkio->Media->IoAlign)) {
+			continue;
+		}
+
 		/* This is bad. */
 		if ((pd = calloc(1, sizeof(*pd))) == NULL) {
 			printf("efipart_inithandles: Out of memory.\n");
 			free(hin);
 			return (ENOMEM);
 		}
 		STAILQ_INIT(&pd->pd_part);
 
 		pd->pd_handle = hin[i];
 		pd->pd_devpath = devpath;
 		pd->pd_blkio = blkio;
 		STAILQ_INSERT_TAIL(&pdinfo, pd, pd_link);
 	}
 
 	free(hin);
 	return (0);
 }
 
 static ACPI_HID_DEVICE_PATH *
 efipart_floppy(EFI_DEVICE_PATH *node)
 {
 	ACPI_HID_DEVICE_PATH *acpi;
 
 	if (DevicePathType(node) == ACPI_DEVICE_PATH &&
 	    DevicePathSubType(node) == ACPI_DP) {
 		acpi = (ACPI_HID_DEVICE_PATH *) node;
 		if (acpi->HID == EISA_PNP_ID(PNP0604) ||
 		    acpi->HID == EISA_PNP_ID(PNP0700) ||
 		    acpi->HID == EISA_PNP_ID(PNP0701)) {
 			return (acpi);
 		}
 	}
 	return (NULL);
 }
 
 static pdinfo_t *
 efipart_find_parent(pdinfo_list_t *pdi, EFI_DEVICE_PATH *devpath)
 {
 	pdinfo_t *pd;
 
 	STAILQ_FOREACH(pd, pdi, pd_link) {
 		if (efi_devpath_is_prefix(pd->pd_devpath, devpath))
 			return (pd);
 	}
 	return (NULL);
 }
 
 static int
 efipart_initfd(void)
 {
 	EFI_DEVICE_PATH *node;
 	ACPI_HID_DEVICE_PATH *acpi;
 	pdinfo_t *parent, *fd;
 
 restart:
 	STAILQ_FOREACH(fd, &pdinfo, pd_link) {
 		if ((node = efi_devpath_last_node(fd->pd_devpath)) == NULL)
 			continue;
 
 		if ((acpi = efipart_floppy(node)) == NULL)
 			continue;
 
 		STAILQ_REMOVE(&pdinfo, fd, pdinfo, pd_link);
 		parent = efipart_find_parent(&pdinfo, fd->pd_devpath);
 		if (parent != NULL) {
 			STAILQ_REMOVE(&pdinfo, parent, pdinfo, pd_link);
 			parent->pd_alias = fd->pd_handle;
 			parent->pd_unit = acpi->UID;
 			free(fd);
 			fd = parent;
 		} else {
 			fd->pd_unit = acpi->UID;
 		}
 		fd->pd_devsw = &efipart_fddev;
 		STAILQ_INSERT_TAIL(&fdinfo, fd, pd_link);
 		goto restart;
 	}
 
 	bcache_add_dev(efiblk_pdinfo_count(&fdinfo));
 	return (0);
 }
 
 /*
  * Add or update entries with new handle data.
  */
 static void
 efipart_cdinfo_add(pdinfo_t *cd)
 {
 	pdinfo_t *pd, *last;
 
 	STAILQ_FOREACH(pd, &cdinfo, pd_link) {
 		if (efi_devpath_is_prefix(pd->pd_devpath, cd->pd_devpath)) {
 			last = STAILQ_LAST(&pd->pd_part, pdinfo, pd_link);
 			if (last != NULL)
 				cd->pd_unit = last->pd_unit + 1;
 			else
 				cd->pd_unit = 0;
 			cd->pd_parent = pd;
 			cd->pd_devsw = &efipart_cddev;
 			STAILQ_INSERT_TAIL(&pd->pd_part, cd, pd_link);
 			return;
 		}
 	}
 
 	last = STAILQ_LAST(&cdinfo, pdinfo, pd_link);
 	if (last != NULL)
 		cd->pd_unit = last->pd_unit + 1;
 	else
 		cd->pd_unit = 0;
 
 	cd->pd_parent = NULL;
 	cd->pd_devsw = &efipart_cddev;
 	STAILQ_INSERT_TAIL(&cdinfo, cd, pd_link);
 }
 
 static bool
 efipart_testcd(EFI_DEVICE_PATH *node, EFI_BLOCK_IO *blkio)
 {
 	if (DevicePathType(node) == MEDIA_DEVICE_PATH &&
 	    DevicePathSubType(node) == MEDIA_CDROM_DP) {
 		return (true);
 	}
 
 	/* cd drive without the media. */
 	if (blkio->Media->RemovableMedia &&
 	    !blkio->Media->MediaPresent) {
 		return (true);
 	}
 
 	return (false);
 }
 
 static void
 efipart_updatecd(void)
 {
 	EFI_DEVICE_PATH *devpath, *node;
 	EFI_STATUS status;
 	pdinfo_t *parent, *cd;
 
 restart:
 	STAILQ_FOREACH(cd, &pdinfo, pd_link) {
 		if ((node = efi_devpath_last_node(cd->pd_devpath)) == NULL)
 			continue;
 
 		if (efipart_floppy(node) != NULL)
 			continue;
 
 		/* Is parent of this device already registered? */
 		parent = efipart_find_parent(&cdinfo, cd->pd_devpath);
 		if (parent != NULL) {
 			STAILQ_REMOVE(&pdinfo, cd, pdinfo, pd_link);
 			efipart_cdinfo_add(cd);
 			goto restart;
 		}
 
 		if (!efipart_testcd(node, cd->pd_blkio))
 			continue;
 
 		/* Find parent and unlink both parent and cd from pdinfo */
 		STAILQ_REMOVE(&pdinfo, cd, pdinfo, pd_link);
 		parent = efipart_find_parent(&pdinfo, cd->pd_devpath);
 		if (parent != NULL) {
 			STAILQ_REMOVE(&pdinfo, parent, pdinfo, pd_link);
 			efipart_cdinfo_add(parent);
 		}
 
 		if (parent == NULL)
 			parent = efipart_find_parent(&cdinfo, cd->pd_devpath);
 
 		/*
 		 * If we come across a logical partition of subtype CDROM
 		 * it doesn't refer to the CD filesystem itself, but rather
 		 * to any usable El Torito boot image on it. In this case
 		 * we try to find the parent device and add that instead as
 		 * that will be the CD filesystem.
 		 */
 		if (DevicePathType(node) == MEDIA_DEVICE_PATH &&
 		    DevicePathSubType(node) == MEDIA_CDROM_DP &&
 		    parent == NULL) {
 			parent = calloc(1, sizeof(*parent));
 			if (parent == NULL) {
 				printf("efipart_updatecd: out of memory\n");
 				/* this device is lost but try again. */
 				free(cd);
 				goto restart;
 			}
 
 			devpath = efi_devpath_trim(cd->pd_devpath);
 			if (devpath == NULL) {
 				printf("efipart_updatecd: out of memory\n");
 				/* this device is lost but try again. */
 				free(parent);
 				free(cd);
 				goto restart;
 			}
 			parent->pd_devpath = devpath;
 			status = BS->LocateDevicePath(&blkio_guid,
 			    &parent->pd_devpath, &parent->pd_handle);
 			free(devpath);
 			if (EFI_ERROR(status)) {
 				printf("efipart_updatecd: error %lu\n",
 				    EFI_ERROR_CODE(status));
 				free(parent);
 				free(cd);
 				goto restart;
 			}
 			parent->pd_devpath =
 			    efi_lookup_devpath(parent->pd_handle);
 			efipart_cdinfo_add(parent);
 		}
 
 		efipart_cdinfo_add(cd);
 		goto restart;
 	}
 }
 
 static int
 efipart_initcd(void)
 {
 	efipart_updatecd();
 
 	bcache_add_dev(efiblk_pdinfo_count(&cdinfo));
 	return (0);
 }
 
 static void
 efipart_hdinfo_add(pdinfo_t *hd, HARDDRIVE_DEVICE_PATH *node)
 {
 	pdinfo_t *pd, *last;
 
 	STAILQ_FOREACH(pd, &hdinfo, pd_link) {
 		if (efi_devpath_is_prefix(pd->pd_devpath, hd->pd_devpath)) {
 			/* Add the partition. */
 			hd->pd_unit = node->PartitionNumber;
 			hd->pd_parent = pd;
 			hd->pd_devsw = &efipart_hddev;
 			STAILQ_INSERT_TAIL(&pd->pd_part, hd, pd_link);
 			return;
 		}
 	}
 
 	last = STAILQ_LAST(&hdinfo, pdinfo, pd_link);
 	if (last != NULL)
 		hd->pd_unit = last->pd_unit + 1;
 	else
 		hd->pd_unit = 0;
 
 	/* Add the disk. */
 	hd->pd_devsw = &efipart_hddev;
 	STAILQ_INSERT_TAIL(&hdinfo, hd, pd_link);
 }
 
 /*
  * The MEDIA_FILEPATH_DP has device name.
  * From U-Boot sources it looks like names are in the form
  * of typeN:M, where type is interface type, N is disk id
  * and M is partition id.
  */
 static void
 efipart_hdinfo_add_filepath(pdinfo_t *hd, FILEPATH_DEVICE_PATH *node)
 {
 	char *pathname, *p;
 	int len;
 	pdinfo_t *last;
 
 	last = STAILQ_LAST(&hdinfo, pdinfo, pd_link);
 	if (last != NULL)
 		hd->pd_unit = last->pd_unit + 1;
 	else
 		hd->pd_unit = 0;
 
 	/* FILEPATH_DEVICE_PATH has 0 terminated string */
 	len = ucs2len(node->PathName);
 	if ((pathname = malloc(len + 1)) == NULL) {
 		printf("Failed to add disk, out of memory\n");
 		free(hd);
 		return;
 	}
 	cpy16to8(node->PathName, pathname, len + 1);
 	p = strchr(pathname, ':');
 
 	/*
 	 * Assume we are receiving handles in order, first disk handle,
 	 * then partitions for this disk. If this assumption proves
 	 * false, this code would need update.
 	 */
 	if (p == NULL) {	/* no colon, add the disk */
 		hd->pd_devsw = &efipart_hddev;
 		STAILQ_INSERT_TAIL(&hdinfo, hd, pd_link);
 		free(pathname);
 		return;
 	}
 	p++;	/* skip the colon */
 	errno = 0;
 	hd->pd_unit = (int)strtol(p, NULL, 0);
 	if (errno != 0) {
 		printf("Bad unit number for partition \"%s\"\n", pathname);
 		free(pathname);
 		free(hd);
 		return;
 	}
 
 	/*
 	 * We should have disk registered, if not, we are receiving
 	 * handles out of order, and this code should be reworked
 	 * to create "blank" disk for partition, and to find the
 	 * disk based on PathName compares.
 	 */
 	if (last == NULL) {
 		printf("BUG: No disk for partition \"%s\"\n", pathname);
 		free(pathname);
 		free(hd);
 		return;
 	}
 	/* Add the partition. */
 	hd->pd_parent = last;
 	hd->pd_devsw = &efipart_hddev;
 	STAILQ_INSERT_TAIL(&last->pd_part, hd, pd_link);
 	free(pathname);
 }
 
 static void
 efipart_updatehd(void)
 {
 	EFI_DEVICE_PATH *devpath, *node;
 	EFI_STATUS status;
 	pdinfo_t *parent, *hd;
 
 restart:
 	STAILQ_FOREACH(hd, &pdinfo, pd_link) {
 		if ((node = efi_devpath_last_node(hd->pd_devpath)) == NULL)
 			continue;
 
 		if (efipart_floppy(node) != NULL)
 			continue;
 
 		if (efipart_testcd(node, hd->pd_blkio))
 			continue;
 
 		if (DevicePathType(node) == HARDWARE_DEVICE_PATH &&
 		    (DevicePathSubType(node) == HW_PCI_DP ||
 		     DevicePathSubType(node) == HW_VENDOR_DP)) {
 			STAILQ_REMOVE(&pdinfo, hd, pdinfo, pd_link);
 			efipart_hdinfo_add(hd, NULL);
 			goto restart;
 		}
 
 		if (DevicePathType(node) == MEDIA_DEVICE_PATH &&
 		    DevicePathSubType(node) == MEDIA_FILEPATH_DP) {
 			STAILQ_REMOVE(&pdinfo, hd, pdinfo, pd_link);
 			efipart_hdinfo_add_filepath(hd,
 			    (FILEPATH_DEVICE_PATH *)node);
 			goto restart;
 		}
 
 		STAILQ_REMOVE(&pdinfo, hd, pdinfo, pd_link);
 		parent = efipart_find_parent(&pdinfo, hd->pd_devpath);
 		if (parent != NULL) {
 			STAILQ_REMOVE(&pdinfo, parent, pdinfo, pd_link);
 			efipart_hdinfo_add(parent, NULL);
 		} else {
 			parent = efipart_find_parent(&hdinfo, hd->pd_devpath);
 		}
 
 		if (DevicePathType(node) == MEDIA_DEVICE_PATH &&
 		    DevicePathSubType(node) == MEDIA_HARDDRIVE_DP &&
 		    parent == NULL) {
 			parent = calloc(1, sizeof(*parent));
 			if (parent == NULL) {
 				printf("efipart_updatehd: out of memory\n");
 				/* this device is lost but try again. */
 				free(hd);
 				goto restart;
 			}
 
 			devpath = efi_devpath_trim(hd->pd_devpath);
 			if (devpath == NULL) {
 				printf("efipart_updatehd: out of memory\n");
 				/* this device is lost but try again. */
 				free(parent);
 				free(hd);
 				goto restart;
 			}
 
 			parent->pd_devpath = devpath;
 			status = BS->LocateDevicePath(&blkio_guid,
 			    &parent->pd_devpath, &parent->pd_handle);
 			free(devpath);
 			if (EFI_ERROR(status)) {
 				printf("efipart_updatehd: error %lu\n",
 				    EFI_ERROR_CODE(status));
 				free(parent);
 				free(hd);
 				goto restart;
 			}
 
 			parent->pd_devpath =
 			    efi_lookup_devpath(&parent->pd_handle);
 
 			efipart_hdinfo_add(parent, NULL);
 		}
 
 		efipart_hdinfo_add(hd, (HARDDRIVE_DEVICE_PATH *)node);
 		goto restart;
 	}
 }
 
 static int
 efipart_inithd(void)
 {
 
 	efipart_updatehd();
 
 	bcache_add_dev(efiblk_pdinfo_count(&hdinfo));
 	return (0);
 }
 
 static int
 efipart_print_common(struct devsw *dev, pdinfo_list_t *pdlist, int verbose)
 {
 	int ret = 0;
 	EFI_BLOCK_IO *blkio;
 	EFI_STATUS status;
 	EFI_HANDLE h;
 	pdinfo_t *pd;
 	CHAR16 *text;
 	struct disk_devdesc pd_dev;
 	char line[80];
 
 	if (STAILQ_EMPTY(pdlist))
 		return (0);
 
 	printf("%s devices:", dev->dv_name);
 	if ((ret = pager_output("\n")) != 0)
 		return (ret);
 
 	STAILQ_FOREACH(pd, pdlist, pd_link) {
 		h = pd->pd_handle;
 		if (verbose) {	/* Output the device path. */
 			text = efi_devpath_name(efi_lookup_devpath(h));
 			if (text != NULL) {
 				printf("  %S", text);
 				efi_free_devpath_name(text);
 				if ((ret = pager_output("\n")) != 0)
 					break;
 			}
 		}
 		snprintf(line, sizeof(line),
 		    "    %s%d", dev->dv_name, pd->pd_unit);
 		printf("%s:", line);
 		status = OpenProtocolByHandle(h, &blkio_guid, (void **)&blkio);
 		if (!EFI_ERROR(status)) {
 			printf("    %llu",
 			    blkio->Media->LastBlock == 0? 0:
 			    (unsigned long long) (blkio->Media->LastBlock + 1));
 			if (blkio->Media->LastBlock != 0) {
 				printf(" X %u", blkio->Media->BlockSize);
 			}
 			printf(" blocks");
 			if (blkio->Media->MediaPresent) {
 				if (blkio->Media->RemovableMedia)
 					printf(" (removable)");
 			} else {
 				printf(" (no media)");
 			}
 			if ((ret = pager_output("\n")) != 0)
 				break;
 			if (!blkio->Media->MediaPresent)
 				continue;
 
 			pd->pd_blkio = blkio;
 			pd_dev.dd.d_dev = dev;
 			pd_dev.dd.d_unit = pd->pd_unit;
 			pd_dev.d_slice = D_SLICENONE;
 			pd_dev.d_partition = D_PARTNONE;
 			ret = disk_open(&pd_dev, blkio->Media->BlockSize *
 			    (blkio->Media->LastBlock + 1),
 			    blkio->Media->BlockSize);
 			if (ret == 0) {
 				ret = disk_print(&pd_dev, line, verbose);
 				disk_close(&pd_dev);
 				if (ret != 0)
 					return (ret);
 			} else {
 				/* Do not fail from disk_open() */
 				ret = 0;
 			}
 		} else {
 			if ((ret = pager_output("\n")) != 0)
 				break;
 		}
 	}
 	return (ret);
 }
 
 static int
 efipart_printfd(int verbose)
 {
 	return (efipart_print_common(&efipart_fddev, &fdinfo, verbose));
 }
 
 static int
 efipart_printcd(int verbose)
 {
 	return (efipart_print_common(&efipart_cddev, &cdinfo, verbose));
 }
 
 static int
 efipart_printhd(int verbose)
 {
 	return (efipart_print_common(&efipart_hddev, &hdinfo, verbose));
 }
 
 static int
 efipart_open(struct open_file *f, ...)
 {
 	va_list args;
 	struct disk_devdesc *dev;
 	pdinfo_t *pd;
 	EFI_BLOCK_IO *blkio;
 	EFI_STATUS status;
 
 	va_start(args, f);
 	dev = va_arg(args, struct disk_devdesc *);
 	va_end(args);
 	if (dev == NULL)
 		return (EINVAL);
 
 	pd = efiblk_get_pdinfo((struct devdesc *)dev);
 	if (pd == NULL)
 		return (EIO);
 
 	if (pd->pd_blkio == NULL) {
 		status = OpenProtocolByHandle(pd->pd_handle, &blkio_guid,
 		    (void **)&pd->pd_blkio);
 		if (EFI_ERROR(status))
 			return (efi_status_to_errno(status));
 	}
 
 	blkio = pd->pd_blkio;
 	if (!blkio->Media->MediaPresent)
 		return (EAGAIN);
 
 	pd->pd_open++;
 	if (pd->pd_bcache == NULL)
 		pd->pd_bcache = bcache_allocate();
 
 	if (dev->dd.d_dev->dv_type == DEVT_DISK) {
 		int rc;
 
 		rc = disk_open(dev,
 		    blkio->Media->BlockSize * (blkio->Media->LastBlock + 1),
 		    blkio->Media->BlockSize);
 		if (rc != 0) {
 			pd->pd_open--;
 			if (pd->pd_open == 0) {
 				pd->pd_blkio = NULL;
 				bcache_free(pd->pd_bcache);
 				pd->pd_bcache = NULL;
 			}
 		}
 		return (rc);
 	}
 	return (0);
 }
 
 static int
 efipart_close(struct open_file *f)
 {
 	struct disk_devdesc *dev;
 	pdinfo_t *pd;
 
 	dev = (struct disk_devdesc *)(f->f_devdata);
 	if (dev == NULL)
 		return (EINVAL);
 
 	pd = efiblk_get_pdinfo((struct devdesc *)dev);
 	if (pd == NULL)
 		return (EINVAL);
 
 	pd->pd_open--;
 	if (pd->pd_open == 0) {
 		pd->pd_blkio = NULL;
 		bcache_free(pd->pd_bcache);
 		pd->pd_bcache = NULL;
 	}
 	if (dev->dd.d_dev->dv_type == DEVT_DISK)
 		return (disk_close(dev));
 	return (0);
 }
 
 static int
 efipart_ioctl(struct open_file *f, u_long cmd, void *data)
 {
 	struct disk_devdesc *dev;
 	pdinfo_t *pd;
 	int rc;
 
 	dev = (struct disk_devdesc *)(f->f_devdata);
 	if (dev == NULL)
 		return (EINVAL);
 
 	pd = efiblk_get_pdinfo((struct devdesc *)dev);
 	if (pd == NULL)
 		return (EINVAL);
 
 	if (dev->dd.d_dev->dv_type == DEVT_DISK) {
 		rc = disk_ioctl(dev, cmd, data);
 		if (rc != ENOTTY)
 			return (rc);
 	}
 
 	switch (cmd) {
 	case DIOCGSECTORSIZE:
 		*(u_int *)data = pd->pd_blkio->Media->BlockSize;
 		break;
 	case DIOCGMEDIASIZE:
 		*(uint64_t *)data = pd->pd_blkio->Media->BlockSize *
 		    (pd->pd_blkio->Media->LastBlock + 1);
 		break;
 	default:
 		return (ENOTTY);
 	}
 
 	return (0);
 }
 
 /*
  * efipart_readwrite()
  * Internal equivalent of efipart_strategy(), which operates on the
  * media-native block size. This function expects all I/O requests
  * to be within the media size and returns an error if such is not
  * the case.
  */
 static int
 efipart_readwrite(EFI_BLOCK_IO *blkio, int rw, daddr_t blk, daddr_t nblks,
     char *buf)
 {
 	EFI_STATUS status;
 
 	if (blkio == NULL)
 		return (ENXIO);
 	if (blk < 0 || blk > blkio->Media->LastBlock)
 		return (EIO);
 	if ((blk + nblks - 1) > blkio->Media->LastBlock)
 		return (EIO);
 
 	switch (rw & F_MASK) {
 	case F_READ:
 		status = blkio->ReadBlocks(blkio, blkio->Media->MediaId, blk,
 		    nblks * blkio->Media->BlockSize, buf);
 		break;
 	case F_WRITE:
 		if (blkio->Media->ReadOnly)
 			return (EROFS);
 		status = blkio->WriteBlocks(blkio, blkio->Media->MediaId, blk,
 		    nblks * blkio->Media->BlockSize, buf);
 		break;
 	default:
 		return (ENOSYS);
 	}
 
 	if (EFI_ERROR(status)) {
 		printf("%s: rw=%d, blk=%ju size=%ju status=%lu\n", __func__, rw,
 		    blk, nblks, EFI_ERROR_CODE(status));
 	}
 	return (efi_status_to_errno(status));
 }
 
 static int
 efipart_strategy(void *devdata, int rw, daddr_t blk, size_t size,
     char *buf, size_t *rsize)
 {
 	struct bcache_devdata bcd;
 	struct disk_devdesc *dev;
 	pdinfo_t *pd;
 
 	dev = (struct disk_devdesc *)devdata;
 	if (dev == NULL)
 		return (EINVAL);
 
 	pd = efiblk_get_pdinfo((struct devdesc *)dev);
 	if (pd == NULL)
 		return (EINVAL);
 
 	if (pd->pd_blkio->Media->RemovableMedia &&
 	    !pd->pd_blkio->Media->MediaPresent)
 		return (ENXIO);
 
 	bcd.dv_strategy = efipart_realstrategy;
 	bcd.dv_devdata = devdata;
 	bcd.dv_cache = pd->pd_bcache;
 
 	if (dev->dd.d_dev->dv_type == DEVT_DISK) {
 		daddr_t offset;
 
 		offset = dev->d_offset * pd->pd_blkio->Media->BlockSize;
 		offset /= 512;
 		return (bcache_strategy(&bcd, rw, blk + offset,
 		    size, buf, rsize));
 	}
 	return (bcache_strategy(&bcd, rw, blk, size, buf, rsize));
 }
 
 static int
 efipart_realstrategy(void *devdata, int rw, daddr_t blk, size_t size,
     char *buf, size_t *rsize)
 {
 	struct disk_devdesc *dev = (struct disk_devdesc *)devdata;
 	pdinfo_t *pd;
 	EFI_BLOCK_IO *blkio;
 	uint64_t off, disk_blocks, d_offset = 0;
 	char *blkbuf;
-	size_t blkoff, blksz;
-	int error;
+	size_t blkoff, blksz, bio_size;
+	unsigned ioalign;
+	bool need_buf;
+	int rc;
 	uint64_t diskend, readstart;
 
 	if (dev == NULL || blk < 0)
 		return (EINVAL);
 
 	pd = efiblk_get_pdinfo((struct devdesc *)dev);
 	if (pd == NULL)
 		return (EINVAL);
 
 	blkio = pd->pd_blkio;
 	if (blkio == NULL)
 		return (ENXIO);
 
 	if (size == 0 || (size % 512) != 0)
 		return (EIO);
 
 	off = blk * 512;
 	/*
 	 * Get disk blocks, this value is either for whole disk or for
 	 * partition.
 	 */
 	disk_blocks = 0;
 	if (dev->dd.d_dev->dv_type == DEVT_DISK) {
 		if (disk_ioctl(dev, DIOCGMEDIASIZE, &disk_blocks) == 0) {
 			/* DIOCGMEDIASIZE does return bytes. */
 			disk_blocks /= blkio->Media->BlockSize;
 		}
 		d_offset = dev->d_offset;
 	}
 	if (disk_blocks == 0)
 		disk_blocks = blkio->Media->LastBlock + 1 - d_offset;
 
 	/* make sure we don't read past disk end */
 	if ((off + size) / blkio->Media->BlockSize > d_offset + disk_blocks) {
 		diskend = d_offset + disk_blocks;
 		readstart = off / blkio->Media->BlockSize;
 
 		if (diskend <= readstart) {
 			if (rsize != NULL)
 				*rsize = 0;
 
 			return (EIO);
 		}
 		size = diskend - readstart;
 		size = size * blkio->Media->BlockSize;
 	}
 
-	if (rsize != NULL)
-		*rsize = size;
-
+	need_buf = true;
+	/* Do we need bounce buffer? */
 	if ((size % blkio->Media->BlockSize == 0) &&
 	    (off % blkio->Media->BlockSize == 0))
-		return (efipart_readwrite(blkio, rw,
-		    off / blkio->Media->BlockSize,
-		    size / blkio->Media->BlockSize, buf));
+		need_buf = false;
 
-	/*
-	 * The buffer size is not a multiple of the media block size.
-	 */
-	blkbuf = malloc(blkio->Media->BlockSize);
+	/* Do we have IO alignment requirement? */
+	ioalign = blkio->Media->IoAlign;
+	if (ioalign == 0)
+		ioalign++;
+
+	if (ioalign > 1 && (uintptr_t)buf != roundup2((uintptr_t)buf, ioalign))
+		need_buf = true;
+
+	if (need_buf) {
+		for (bio_size = BIO_BUFFER_SIZE; bio_size > 0;
+		    bio_size -= blkio->Media->BlockSize) {
+			blkbuf = memalign(ioalign, bio_size);
+			if (blkbuf != NULL)
+				break;
+		}
+	} else {
+		blkbuf = buf;
+		bio_size = size;
+	}
+
 	if (blkbuf == NULL)
 		return (ENOMEM);
 
-	error = 0;
+	if (rsize != NULL)
+		*rsize = size;
+
+	rc = 0;
 	blk = off / blkio->Media->BlockSize;
 	blkoff = off % blkio->Media->BlockSize;
-	blksz = blkio->Media->BlockSize - blkoff;
+
 	while (size > 0) {
-		error = efipart_readwrite(blkio, rw, blk, 1, blkbuf);
-		if (error)
+		size_t x = min(size, bio_size);
+
+		if (x < blkio->Media->BlockSize)
+			x = 1;
+		else
+			x /= blkio->Media->BlockSize;
+
+		switch (rw & F_MASK) {
+		case F_READ:
+			blksz = blkio->Media->BlockSize * x - blkoff;
+			if (size < blksz)
+				blksz = size;
+
+			rc = efipart_readwrite(blkio, rw, blk, x, blkbuf);
+			if (rc != 0)
+				goto error;
+
+			if (need_buf)
+				bcopy(blkbuf + blkoff, buf, blksz);
 			break;
-		if (size < blksz)
-			blksz = size;
-		bcopy(blkbuf + blkoff, buf, blksz);
+		case F_WRITE:
+			rc = 0;
+			if (blkoff != 0) {
+				/*
+				 * We got offset to sector, read 1 sector to
+				 * blkbuf.
+				 */
+				x = 1;
+				blksz = blkio->Media->BlockSize - blkoff;
+				blksz = min(blksz, size);
+				rc = efipart_readwrite(blkio, F_READ, blk, x,
+				    blkbuf);
+			} else if (size < blkio->Media->BlockSize) {
+				/*
+				 * The remaining block is not full
+				 * sector. Read 1 sector to blkbuf.
+				 */
+				x = 1;
+				blksz = size;
+				rc = efipart_readwrite(blkio, F_READ, blk, x,
+				    blkbuf);
+			} else {
+				/* We can write full sector(s). */
+				blksz = blkio->Media->BlockSize * x;
+			}
+
+			if (rc != 0)
+				goto error;
+			/*
+			 * Put your Data In, Put your Data out,
+			 * Put your Data In, and shake it all about
+			 */
+			if (need_buf)
+				bcopy(buf, blkbuf + blkoff, blksz);
+			rc = efipart_readwrite(blkio, F_WRITE, blk, x, blkbuf);
+			if (rc != 0)
+				goto error;
+			break;
+		default:
+			/* DO NOTHING */
+			rc = EROFS;
+			goto error;
+		}
+
+		blkoff = 0;
 		buf += blksz;
 		size -= blksz;
-		blk++;
-		blkoff = 0;
-		blksz = blkio->Media->BlockSize;
+		blk += x;
 	}
 
-	free(blkbuf);
-	return (error);
+error:
+	if (rsize != NULL)
+		*rsize -= size;
+
+	if (need_buf)
+		free(blkbuf);
+	return (rc);
 }
Index: projects/clang900-import/stand/forth/loader.4th
===================================================================
--- projects/clang900-import/stand/forth/loader.4th	(revision 352536)
+++ projects/clang900-import/stand/forth/loader.4th	(revision 352537)
@@ -1,263 +1,286 @@
 \ Copyright (c) 1999 Daniel C. Sobral <dcs@FreeBSD.org>
 \ Copyright (c) 2011-2015 Devin Teske <dteske@FreeBSD.org>
 \ All rights reserved.
 \
 \ Redistribution and use in source and binary forms, with or without
 \ modification, are permitted provided that the following conditions
 \ are met:
 \ 1. Redistributions of source code must retain the above copyright
 \    notice, this list of conditions and the following disclaimer.
 \ 2. Redistributions in binary form must reproduce the above copyright
 \    notice, this list of conditions and the following disclaimer in the
 \    documentation and/or other materials provided with the distribution.
 \
 \ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 \ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 \ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 \ ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 \ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 \ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 \ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 \ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 \ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 \ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 \ SUCH DAMAGE.
 \
 \ $FreeBSD$
 
 only forth definitions
 
+\ provide u> if needed
+s" u>" sfind [if] drop [else]
+	drop
+: u>
+	2dup u< if 2drop 0 exit then
+	swap u< if -1 exit then
+	0
+;
+[then]
+
+\ provide xemit if needed
+s" xemit" sfind [if] drop [else]
+	drop
+: xemit
+	dup 0x80 u< if emit exit then
+	0 swap 0x3F
+	begin 2dup u> while
+		2/ >r dup 0x3F and 0x80 or swap 6 rshift r>
+	repeat 0x7F xor 2* or
+	begin dup 0x80 u< 0= while emit repeat drop
+;
+[then]
+
 s" arch-i386" environment? [if] [if]
 	s" loader_version" environment?  [if]
 		11 < [if]
 			.( Loader version 1.1+ required) cr
 			abort
 		[then]
 	[else]
 		.( Could not get loader version!) cr
 		abort
 	[then]
 [then] [then]
 
 256 dictthreshold !  \ 256 cells minimum free space
 2048 dictincrease !  \ 2048 additional cells each time
 
 include /boot/support.4th
 include /boot/color.4th
 include /boot/delay.4th
 include /boot/check-password.4th
 
 only forth definitions
 
 : bootmsg ( -- )
   loader_color? dup ( -- bool bool )
   if 7 fg 4 bg then
   ." Booting..."
   if me then
   cr
 ;
 
 : try-menu-unset
   \ menu-unset may not be present
   s" beastie_disable" getenv
   dup -1 <> if
     s" YES" compare-insensitive 0= if
       exit
     then
   else
     drop
   then
   s" menu-unset"
   sfind if
     execute
   else
     drop
   then
   s" menusets-unset"
   sfind if
     execute
   else
     drop
   then
 ;
 
 only forth also support-functions also builtins definitions
 
 : boot
   0= if ( interpreted ) get_arguments then
 
   \ Unload only if a path was passed
   dup if
     >r over r> swap
     c@ [char] - <> if
       0 1 unload drop
     else
       s" kernelname" getenv? if ( a kernel has been loaded )
         try-menu-unset
         bootmsg 1 boot exit
       then
       load_kernel_and_modules
       ?dup if exit then
       try-menu-unset
       bootmsg 0 1 boot exit
     then
   else
     s" kernelname" getenv? if ( a kernel has been loaded )
       try-menu-unset
       bootmsg 1 boot exit
     then
     load_kernel_and_modules
     ?dup if exit then
     try-menu-unset
     bootmsg 0 1 boot exit
   then
   load_kernel_and_modules
   ?dup 0= if bootmsg 0 1 boot then
 ;
 
 \ ***** boot-conf
 \
 \	Prepares to boot as specified by loaded configuration files.
 
 : boot-conf
   0= if ( interpreted ) get_arguments then
   0 1 unload drop
   load_kernel_and_modules
   ?dup 0= if 0 1 autoboot then
 ;
 
 also forth definitions previous
 
 builtin: boot
 builtin: boot-conf
 
 only forth definitions also support-functions
 
 \ ***** start
 \
 \       Initializes support.4th global variables, sets loader_conf_files,
 \       processes conf files, and, if any one such file was successfully
 \       read to the end, loads kernel and modules.
 
 : start  ( -- ) ( throws: abort & user-defined )
   s" /boot/defaults/loader.conf" initialize
   include_conf_files
   include_nextboot_file
   \ If the user defined a post-initialize hook, call it now
   s" post-initialize" sfind if execute else drop then
   \ Will *NOT* try to load kernel and modules if no configuration file
   \ was successfully loaded!
   any_conf_read? if
     s" loader_delay" getenv -1 = if
       load_xen_throw
       load_kernel
       load_modules
     else
       drop
       ." Loading Kernel and Modules (Ctrl-C to Abort)" cr
       s" also support-functions" evaluate
       s" set delay_command='load_xen_throw load_kernel load_modules'" evaluate
       s" set delay_showdots" evaluate
       delay_execute
     then
   then
 ;
 
 \ ***** initialize
 \
 \	Overrides support.4th initialization word with one that does
 \	everything start one does, short of loading the kernel and
 \	modules. Returns a flag.
 
 : initialize ( -- flag )
   s" /boot/defaults/loader.conf" initialize
   include_conf_files
   include_nextboot_file
   \ If the user defined a post-initialize hook, call it now
   s" post-initialize" sfind if execute else drop then
   any_conf_read?
 ;
 
 \ ***** read-conf
 \
 \	Read a configuration file, whose name was specified on the command
 \	line, if interpreted, or given on the stack, if compiled in.
 
 : (read-conf)  ( addr len -- )
   conf_files string=
   include_conf_files \ Will recurse on new loader_conf_files definitions
 ;
 
 : read-conf  ( <filename> | addr len -- ) ( throws: abort & user-defined )
   state @ if
     \ Compiling
     postpone (read-conf)
   else
     \ Interpreting
     bl parse (read-conf)
   then
 ; immediate
 
 \ show, enable, disable, toggle module loading. They all take module from
 \ the next word
 
 : set-module-flag ( module_addr val -- ) \ set and print flag
   over module.flag !
   dup module.name strtype
   module.flag @ if ."  will be loaded" else ."  will not be loaded" then cr
 ;
 
 : enable-module find-module ?dup if true set-module-flag then ;
 
 : disable-module find-module ?dup if false set-module-flag then ;
 
 : toggle-module find-module ?dup if dup module.flag @ 0= set-module-flag then ;
 
 \ ***** show-module
 \
 \	Show loading information about a module.
 
 : show-module ( <module> -- ) find-module ?dup if show-one-module then ;
 
 \ Words to be used inside configuration files
 
 : retry false ;         \ For use in load error commands
 : ignore true ;         \ For use in load error commands
 
 \ Return to strict forth vocabulary
 
 : #type
   over - >r
   type
   r> spaces
 ;
 
 : .? 2 spaces 2swap 15 #type 2 spaces type cr ;
 
 \ Execute the ? command to print all the commands defined in
 \ C, then list the ones we support here. Please note that this
 \ doesn't use pager_* routines that the C implementation of ?
 \ does, so these will always appear, even if you stop early
 \ there. And they may cause the commands to scroll off the
 \ screen if the number of commands modulus LINES is close
 \ to LINEs....
 : ?
   ['] ? execute
   s" boot-conf" s" load kernel and modules, then autoboot" .?
   s" read-conf" s" read a configuration file" .?
   s" enable-module" s" enable loading of a module" .?
   s" disable-module" s" disable loading of a module" .?
   s" toggle-module" s" toggle loading of a module" .?
   s" show-module" s" show module load data" .?
   s" try-include" s" try to load/interpret files" .?
 ;
 
 : try-include ( -- ) \ see loader.4th(8)
   ['] include ( -- xt ) \ get the execution token of `include'
   catch ( xt -- exception# | 0 ) if \ failed
     LF parse ( c -- s-addr/u ) 2drop \ advance >in to EOL (drop data)
     \ ... prevents words unused by `include' from being interpreted
   then
 ; immediate \ interpret immediately for access to `source' (aka tib)
 
 only forth definitions
Index: projects/clang900-import/stand/libsa/stand.h
===================================================================
--- projects/clang900-import/stand/libsa/stand.h	(revision 352536)
+++ projects/clang900-import/stand/libsa/stand.h	(revision 352537)
@@ -1,449 +1,453 @@
 /*
  * Copyright (c) 1998 Michael Smith.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  * From	$NetBSD: stand.h,v 1.22 1997/06/26 19:17:40 drochner Exp $	
  */
 
 /*-
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)stand.h	8.1 (Berkeley) 6/11/93
  */
 
 #ifndef	STAND_H
 #define	STAND_H
 
 #include <sys/types.h>
 #include <sys/cdefs.h>
 #include <sys/stat.h>
 #include <sys/dirent.h>
 
 /* this header intentionally exports NULL from <string.h> */
 #include <string.h>
 #define strcoll(a, b)	strcmp((a), (b))
 
 #define CHK(fmt, args...)	printf("%s(%d): " fmt "\n", __func__, __LINE__ , ##args)
 #define PCHK(fmt, args...)	{printf("%s(%d): " fmt "\n", __func__, __LINE__ , ##args); getchar();}
 
 #include <sys/errno.h>
 
 /* special stand error codes */
 #define	EADAPT	(ELAST+1)	/* bad adaptor */
 #define	ECTLR	(ELAST+2)	/* bad controller */
 #define	EUNIT	(ELAST+3)	/* bad unit */
 #define ESLICE	(ELAST+4)	/* bad slice */
 #define	EPART	(ELAST+5)	/* bad partition */
 #define	ERDLAB	(ELAST+6)	/* can't read disk label */
 #define	EUNLAB	(ELAST+7)	/* unlabeled disk */
 #define	EOFFSET	(ELAST+8)	/* relative seek not supported */
 #define	ESALAST	(ELAST+8)	/* */
 
 /* Partial signal emulation for sig_atomic_t */
 #include <machine/signal.h>
 
 struct open_file;
 
 /*
  * This structure is used to define file system operations in a file system
  * independent way.
  *
  * XXX note that filesystem providers should export a pointer to their fs_ops
  *     struct, so that consumers can reference this and thus include the
  *     filesystems that they require.
  */
 struct fs_ops {
     const char	*fs_name;
     int		(*fo_open)(const char *path, struct open_file *f);
     int		(*fo_close)(struct open_file *f);
     int		(*fo_read)(struct open_file *f, void *buf,
 			   size_t size, size_t *resid);
     int		(*fo_write)(struct open_file *f, const void *buf,
 			    size_t size, size_t *resid);
     off_t	(*fo_seek)(struct open_file *f, off_t offset, int where);
     int		(*fo_stat)(struct open_file *f, struct stat *sb);
     int		(*fo_readdir)(struct open_file *f, struct dirent *d);
 };
 
 /*
  * libstand-supplied filesystems
  */
 extern struct fs_ops ufs_fsops;
 extern struct fs_ops tftp_fsops;
 extern struct fs_ops nfs_fsops;
 extern struct fs_ops cd9660_fsops;
 extern struct fs_ops gzipfs_fsops;
 extern struct fs_ops bzipfs_fsops;
 extern struct fs_ops dosfs_fsops;
 extern struct fs_ops ext2fs_fsops;
 extern struct fs_ops splitfs_fsops;
 extern struct fs_ops pkgfs_fsops;
 extern struct fs_ops efihttp_fsops;
 
 /* where values for lseek(2) */
 #define	SEEK_SET	0	/* set file offset to offset */
 #define	SEEK_CUR	1	/* set file offset to current plus offset */
 #define	SEEK_END	2	/* set file offset to EOF plus offset */
 
 /* 
  * Device switch
  */
 struct devsw {
     const char	dv_name[8];
     int		dv_type;		/* opaque type constant, arch-dependant */
 #define DEVT_NONE	0
 #define DEVT_DISK	1
 #define DEVT_NET	2
 #define DEVT_CD		3
 #define DEVT_ZFS	4
 #define DEVT_FD		5
     int		(*dv_init)(void);	/* early probe call */
     int		(*dv_strategy)(void *devdata, int rw, daddr_t blk,
 			size_t size, char *buf, size_t *rsize);
     int		(*dv_open)(struct open_file *f, ...);
     int		(*dv_close)(struct open_file *f);
     int		(*dv_ioctl)(struct open_file *f, u_long cmd, void *data);
     int		(*dv_print)(int verbose);	/* print device information */
     void	(*dv_cleanup)(void);
 };
 
 /*
  * libstand-supplied device switch
  */
 extern struct devsw netdev;
 
 extern int errno;
 
 /*
  * Generic device specifier; architecture-dependent
  * versions may be larger, but should be allowed to
  * overlap.
  */
 struct devdesc {
     struct devsw	*d_dev;
     int			d_unit;
     void		*d_opendata;
 };
 
 struct open_file {
     int			f_flags;	/* see F_* below */
     struct devsw	*f_dev;		/* pointer to device operations */
     void		*f_devdata;	/* device specific data */
     struct fs_ops	*f_ops;		/* pointer to file system operations */
     void		*f_fsdata;	/* file system specific data */
     off_t		f_offset;	/* current file offset */
     char		*f_rabuf;	/* readahead buffer pointer */
     size_t		f_ralen;	/* valid data in readahead buffer */
     off_t		f_raoffset;	/* consumer offset in readahead buffer */
 #define SOPEN_RASIZE	512
 };
 
 #define	SOPEN_MAX	64
 extern struct open_file files[];
 
 /* f_flags values */
 #define	F_READ		0x0001	/* file opened for reading */
 #define	F_WRITE		0x0002	/* file opened for writing */
 #define	F_RAW		0x0004	/* raw device open - no file system */
 #define F_NODEV		0x0008	/* network open - no device */
 #define	F_MASK		0xFFFF
 /* Mode modifier for strategy() */
 #define	F_NORA		(0x01 << 16)	/* Disable Read-Ahead */
 
 #define isascii(c)	(((c) & ~0x7F) == 0)
 
 static __inline int isupper(int c)
 {
     return c >= 'A' && c <= 'Z';
 }
 
 static __inline int islower(int c)
 {
     return c >= 'a' && c <= 'z';
 }
 
 static __inline int isspace(int c)
 {
     return c == ' ' || (c >= 0x9 && c <= 0xd);
 }
 
 static __inline int isdigit(int c)
 {
     return c >= '0' && c <= '9';
 }
 
 static __inline int isxdigit(int c)
 {
     return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
 }
 
 static __inline int isalpha(int c)
 {
     return isupper(c) || islower(c);
 }
 
 static __inline int isalnum(int c)
 {
     return isalpha(c) || isdigit(c);
 }
 
 static __inline int iscntrl(int c)
 {
 	return (c >= 0 && c < ' ') || c == 127;
 }
 
 static __inline int isgraph(int c)
 {
 	return c >= '!' && c <= '~';
 }
 
 static __inline int ispunct(int c)
 {
 	return (c >= '!' && c <= '/') || (c >= ':' && c <= '@') ||
 	    (c >= '[' && c <= '`') || (c >= '{' && c <= '~');
 }
 
 static __inline int toupper(int c)
 {
     return islower(c) ? c - 'a' + 'A' : c;
 }
 
 static __inline int tolower(int c)
 {
     return isupper(c) ? c - 'A' + 'a' : c;
 }
 
 /* sbrk emulation */
 extern void	setheap(void *base, void *top);
 extern char	*sbrk(int incr);
 
-extern void	*reallocf(void *ptr, size_t size);
-extern void	mallocstats(void);
-
 extern int	printf(const char *fmt, ...) __printflike(1, 2);
 extern int	asprintf(char **buf, const char *cfmt, ...) __printflike(2, 3);
 extern int	sprintf(char *buf, const char *cfmt, ...) __printflike(2, 3);
 extern int	snprintf(char *buf, size_t size, const char *cfmt, ...) __printflike(3, 4);
 extern int	vprintf(const char *fmt, __va_list);
 extern int	vsprintf(char *buf, const char *cfmt, __va_list);
 extern int	vsnprintf(char *buf, size_t size, const char *cfmt, __va_list);
 
 extern void	twiddle(u_int callerdiv);
 extern void	twiddle_divisor(u_int globaldiv);
 
 extern void	ngets(char *, int);
 #define gets(x)	ngets((x), 0)
 extern int	fgetstr(char *buf, int size, int fd);
 
 extern int	open(const char *, int);
 #define	O_RDONLY	0x0
 #define O_WRONLY	0x1
 #define O_RDWR		0x2
 #define O_ACCMODE	0x3
 /* NOT IMPLEMENTED */
 #define	O_CREAT		0x0200		/* create if nonexistent */
 #define	O_TRUNC		0x0400		/* truncate to zero length */
 extern int	close(int);
 extern void	closeall(void);
 extern ssize_t	read(int, void *, size_t);
 extern ssize_t	write(int, const void *, size_t);
 extern struct	dirent *readdirfd(int);
 
 extern void	srandom(unsigned int);
 extern long	random(void);
     
 /* imports from stdlib, locally modified */
 extern char	*optarg;			/* getopt(3) external variables */
 extern int	optind, opterr, optopt, optreset;
 extern int	getopt(int, char * const [], const char *);
 
 /* pager.c */
 extern void	pager_open(void);
 extern void	pager_close(void);
 extern int	pager_output(const char *lines);
 extern int	pager_file(const char *fname);
 
 /* No signal state to preserve */
 #define setjmp	_setjmp
 #define longjmp	_longjmp
 
 /* environment.c */
 #define EV_DYNAMIC	(1<<0)		/* value was dynamically allocated, free if changed/unset */
 #define EV_VOLATILE	(1<<1)		/* value is volatile, make a copy of it */
 #define EV_NOHOOK	(1<<2)		/* don't call hook when setting */
 
 struct env_var;
 typedef char	*(ev_format_t)(struct env_var *ev);
 typedef int	(ev_sethook_t)(struct env_var *ev, int flags,
 		    const void *value);
 typedef int	(ev_unsethook_t)(struct env_var *ev);
 
 struct env_var
 {
     char		*ev_name;
     int			ev_flags;
     void		*ev_value;
     ev_sethook_t	*ev_sethook;
     ev_unsethook_t	*ev_unsethook;
     struct env_var	*ev_next, *ev_prev;
 };
 extern struct env_var	*environ;
 
 extern struct env_var	*env_getenv(const char *name);
 extern int		env_setenv(const char *name, int flags,
 				   const void *value, ev_sethook_t sethook,
 				   ev_unsethook_t unsethook);
 extern char		*getenv(const char *name);
 extern int		setenv(const char *name, const char *value,
 			       int overwrite);
 extern int		putenv(char *string);
 extern int		unsetenv(const char *name);
 
 extern ev_sethook_t	env_noset;		/* refuse set operation */
 extern ev_unsethook_t	env_nounset;		/* refuse unset operation */
 
 /* stdlib.h routines */
 extern int		abs(int a);
 extern void		abort(void) __dead2;
 extern long		strtol(const char * __restrict, char ** __restrict, int);
 extern long long	strtoll(const char * __restrict, char ** __restrict, int);
 extern unsigned long	strtoul(const char * __restrict, char ** __restrict, int);
 extern unsigned long long strtoull(const char * __restrict, char ** __restrict, int);
 
 /* BCD conversions (undocumented) */
 extern u_char const	bcd2bin_data[];
 extern u_char const	bin2bcd_data[];
 extern char const	hex2ascii_data[];
 
 #define	bcd2bin(bcd)	(bcd2bin_data[bcd])
 #define	bin2bcd(bin)	(bin2bcd_data[bin])
 #define	hex2ascii(hex)	(hex2ascii_data[hex])
 #define	validbcd(bcd)	(bcd == 0 || (bcd > 0 && bcd <= 0x99 && bcd2bin_data[bcd] != 0))
 
 /* min/max (undocumented) */
 static __inline int imax(int a, int b) { return (a > b ? a : b); }
 static __inline int imin(int a, int b) { return (a < b ? a : b); }
 static __inline long lmax(long a, long b) { return (a > b ? a : b); }
 static __inline long lmin(long a, long b) { return (a < b ? a : b); }
 static __inline u_int max(u_int a, u_int b) { return (a > b ? a : b); }
 static __inline u_int min(u_int a, u_int b) { return (a < b ? a : b); }
 static __inline quad_t qmax(quad_t a, quad_t b) { return (a > b ? a : b); }
 static __inline quad_t qmin(quad_t a, quad_t b) { return (a < b ? a : b); }
 static __inline u_long ulmax(u_long a, u_long b) { return (a > b ? a : b); }
 static __inline u_long ulmin(u_long a, u_long b) { return (a < b ? a : b); }
 
 /* null functions for device/filesystem switches (undocumented) */
 extern int	nodev(void);
 extern int	noioctl(struct open_file *, u_long, void *);
 extern void	nullsys(void);
 
 extern int	null_open(const char *path, struct open_file *f);
 extern int	null_close(struct open_file *f);
 extern int	null_read(struct open_file *f, void *buf, size_t size, size_t *resid);
 extern int	null_write(struct open_file *f, const void *buf, size_t size, size_t *resid);
 extern off_t	null_seek(struct open_file *f, off_t offset, int where);
 extern int	null_stat(struct open_file *f, struct stat *sb);
 extern int	null_readdir(struct open_file *f, struct dirent *d);
 
 
 /* 
  * Machine dependent functions and data, must be provided or stubbed by 
  * the consumer 
  */
 extern void		exit(int) __dead2;
 extern int		getchar(void);
 extern int		ischar(void);
 extern void		putchar(int);
 extern int		devopen(struct open_file *, const char *, const char **);
 extern int		devclose(struct open_file *f);
 extern void		panic(const char *, ...) __dead2 __printflike(1, 2);
 extern void		panic_action(void) __weak_symbol __dead2;
 extern time_t		getsecs(void);
 extern struct fs_ops	*file_system[];
 extern struct fs_ops	*exclusive_file_system;
 extern struct devsw	*devsw[];
 
 /*
  * Expose byteorder(3) functions.
  */
 #ifndef _BYTEORDER_PROTOTYPED
 #define	_BYTEORDER_PROTOTYPED
 extern uint32_t		htonl(uint32_t);
 extern uint16_t		htons(uint16_t);
 extern uint32_t		ntohl(uint32_t);
 extern uint16_t		ntohs(uint16_t);
 #endif
 
 #ifndef _BYTEORDER_FUNC_DEFINED
 #define	_BYTEORDER_FUNC_DEFINED
 #define	htonl(x)	__htonl(x)
 #define	htons(x)	__htons(x)
 #define	ntohl(x)	__ntohl(x)
 #define	ntohs(x)	__ntohs(x)
 #endif
 
 void *Malloc(size_t, const char *, int);
+void *Memalign(size_t, size_t, const char *, int);
 void *Calloc(size_t, size_t, const char *, int);
 void *Realloc(void *, size_t, const char *, int);
+void *Reallocf(void *, size_t, const char *, int);
 void Free(void *, const char *, int);
+extern void	mallocstats(void);
 
 #ifdef DEBUG_MALLOC
 #define malloc(x)	Malloc(x, __FILE__, __LINE__)
+#define memalign(x, y)	Memalign(x, y, __FILE__, __LINE__)
 #define calloc(x, y)	Calloc(x, y, __FILE__, __LINE__)
 #define free(x)		Free(x, __FILE__, __LINE__)
 #define realloc(x, y)	Realloc(x, y, __FILE__, __LINE__)
+#define reallocf(x, y)	Reallocf(x, y, __FILE__, __LINE__)
 #else
 #define malloc(x)	Malloc(x, NULL, 0)
+#define memalign(x, y)	Memalign(x, y, NULL, 0)
 #define calloc(x, y)	Calloc(x, y, NULL, 0)
 #define free(x)		Free(x, NULL, 0)
 #define realloc(x, y)	Realloc(x, y, NULL, 0)
+#define reallocf(x, y)	Reallocf(x, y, NULL, 0)
 #endif
 
 #endif	/* STAND_H */
Index: projects/clang900-import/stand/libsa/zalloc.c
===================================================================
--- projects/clang900-import/stand/libsa/zalloc.c	(revision 352536)
+++ projects/clang900-import/stand/libsa/zalloc.c	(revision 352537)
@@ -1,316 +1,338 @@
 /*
- * This module derived from code donated to the FreeBSD Project by 
+ * This module derived from code donated to the FreeBSD Project by
  * Matthew Dillon <dillon@backplane.com>
  *
  * Copyright (c) 1998 The FreeBSD Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include <sys/param.h>
+
 /*
- * LIB/MEMORY/ZALLOC.C	- self contained low-overhead memory pool/allocation 
+ * LIB/MEMORY/ZALLOC.C	- self contained low-overhead memory pool/allocation
  *			  subsystem
  *
- *	This subsystem implements memory pools and memory allocation 
+ *	This subsystem implements memory pools and memory allocation
  *	routines.
  *
  *	Pools are managed via a linked list of 'free' areas.  Allocating
  *	memory creates holes in the freelist, freeing memory fills them.
  *	Since the freelist consists only of free memory areas, it is possible
  *	to allocate the entire pool without incuring any structural overhead.
  *
  *	The system works best when allocating similarly-sized chunks of
- *	memory.  Care must be taken to avoid fragmentation when 
+ *	memory.  Care must be taken to avoid fragmentation when
  *	allocating/deallocating dissimilar chunks.
  *
  *	When a memory pool is first allocated, the entire pool is marked as
  *	allocated.  This is done mainly because we do not want to modify any
  *	portion of a pool's data area until we are given permission.  The
  *	caller must explicitly deallocate portions of the pool to make them
  *	available.
  *
  *	z[n]xalloc() works like z[n]alloc() but the allocation is made from
- *	within the specified address range.  If the segment could not be 
+ *	within the specified address range.  If the segment could not be
  *	allocated, NULL is returned.  WARNING!  The address range will be
  *	aligned to an 8 or 16 byte boundry depending on the cpu so if you
  *	give an unaligned address range, unexpected results may occur.
  *
  *	If a standard allocation fails, the reclaim function will be called
  *	to recover some space.  This usually causes other portions of the
  *	same pool to be released.  Memory allocations at this low level
  *	should not block but you can do that too in your reclaim function
  *	if you want.  Reclaim does not function when z[n]xalloc() is used,
  *	only for z[n]alloc().
  *
  *	Allocation and frees of 0 bytes are valid operations.
  */
 
 #include "zalloc_defs.h"
 
 /*
  * Objects in the pool must be aligned to at least the size of struct MemNode.
  * They must also be aligned to MALLOCALIGN, which should normally be larger
  * than the struct, so assert that to be so at compile time.
  */
 typedef char assert_align[(sizeof(struct MemNode) <= MALLOCALIGN) ? 1 : -1];
 
 #define	MEMNODE_SIZE_MASK	MALLOCALIGN_MASK
 
 /*
  * znalloc() -	allocate memory (without zeroing) from pool.  Call reclaim
  *		and retry if appropriate, return NULL if unable to allocate
  *		memory.
  */
 
 void *
-znalloc(MemPool *mp, uintptr_t bytes)
+znalloc(MemPool *mp, uintptr_t bytes, size_t align)
 {
-    /*
-     * align according to pool object size (can be 0).  This is
-     * inclusive of the MEMNODE_SIZE_MASK minimum alignment.
-     *
-     */
-    bytes = (bytes + MEMNODE_SIZE_MASK) & ~MEMNODE_SIZE_MASK;
-
-    if (bytes == 0)
-	return((void *)-1);
-
-    /*
-     * locate freelist entry big enough to hold the object.  If all objects
-     * are the same size, this is a constant-time function.
-     */
-
-    if (bytes <= mp->mp_Size - mp->mp_Used) {
 	MemNode **pmn;
 	MemNode *mn;
 
-	for (pmn = &mp->mp_First; (mn=*pmn) != NULL; pmn = &mn->mr_Next) {
-	    if (bytes > mn->mr_Bytes)
-		continue;
+	/*
+	 * align according to pool object size (can be 0).  This is
+	 * inclusive of the MEMNODE_SIZE_MASK minimum alignment.
+	 *
+	*/
+	bytes = (bytes + MEMNODE_SIZE_MASK) & ~MEMNODE_SIZE_MASK;
 
-	    /*
-	     *  Cut a chunk of memory out of the beginning of this
-	     *  block and fixup the link appropriately.
-	     */
+	if (bytes == 0)
+		return ((void *)-1);
 
-	    {
+	/*
+	 * locate freelist entry big enough to hold the object.  If all objects
+	 * are the same size, this is a constant-time function.
+	 */
+
+	if (bytes > mp->mp_Size - mp->mp_Used)
+		return (NULL);
+
+	for (pmn = &mp->mp_First; (mn = *pmn) != NULL; pmn = &mn->mr_Next) {
 		char *ptr = (char *)mn;
+		uintptr_t dptr;
+		char *aligned;
+		size_t extra;
 
+		dptr = (uintptr_t)(ptr + MALLOCALIGN);  /* pointer to data */
+		aligned = (char *)(roundup2(dptr, align) - MALLOCALIGN);
+		extra = aligned - ptr;
+
+		if (bytes + extra > mn->mr_Bytes)
+			continue;
+
+		/*
+		 * Cut extra from head and create new memory node from reminder.
+		 */
+
+		if (extra != 0) {
+			MemNode *new;
+
+			new = (MemNode *)aligned;
+			new->mr_Next = mn->mr_Next;
+			new->mr_Bytes = mn->mr_Bytes - extra;
+
+			/* And update current memory node */
+			mn->mr_Bytes = extra;
+			mn->mr_Next = new;
+			/* In next iteration, we will get our aligned address */
+			continue;
+		}
+
+		/*
+		 *  Cut a chunk of memory out of the beginning of this
+		 *  block and fixup the link appropriately.
+		 */
+
 		if (mn->mr_Bytes == bytes) {
-		    *pmn = mn->mr_Next;
+			*pmn = mn->mr_Next;
 		} else {
-		    mn = (MemNode *)((char *)mn + bytes);
-		    mn->mr_Next  = ((MemNode *)ptr)->mr_Next;
-		    mn->mr_Bytes = ((MemNode *)ptr)->mr_Bytes - bytes;
-		    *pmn = mn;
+			mn = (MemNode *)((char *)mn + bytes);
+			mn->mr_Next  = ((MemNode *)ptr)->mr_Next;
+			mn->mr_Bytes = ((MemNode *)ptr)->mr_Bytes - bytes;
+			*pmn = mn;
 		}
 		mp->mp_Used += bytes;
 		return(ptr);
-	    }
 	}
-    }
 
-    /*
-     * Memory pool is full, return NULL.
-     */
+	/*
+	 * Memory pool is full, return NULL.
+	 */
 
-    return(NULL);
+	return (NULL);
 }
 
 /*
  * zfree() - free previously allocated memory
  */
 
 void
 zfree(MemPool *mp, void *ptr, uintptr_t bytes)
 {
-    /*
-     * align according to pool object size (can be 0).  This is
-     * inclusive of the MEMNODE_SIZE_MASK minimum alignment.
-     */
-    bytes = (bytes + MEMNODE_SIZE_MASK) & ~MEMNODE_SIZE_MASK;
+	MemNode **pmn;
+	MemNode *mn;
 
-    if (bytes == 0)
-	return;
+	/*
+	 * align according to pool object size (can be 0).  This is
+	 * inclusive of the MEMNODE_SIZE_MASK minimum alignment.
+	 */
+	bytes = (bytes + MEMNODE_SIZE_MASK) & ~MEMNODE_SIZE_MASK;
 
-    /*
-     * panic if illegal pointer
-     */
+	if (bytes == 0)
+		return;
 
-    if ((char *)ptr < (char *)mp->mp_Base || 
-	(char *)ptr + bytes > (char *)mp->mp_End ||
-	((uintptr_t)ptr & MEMNODE_SIZE_MASK) != 0)
-	panic("zfree(%p,%ju): wild pointer", ptr, (uintmax_t)bytes);
+	/*
+	 * panic if illegal pointer
+	 */
 
-    /*
-     * free the segment
-     */
+	if ((char *)ptr < (char *)mp->mp_Base ||
+	    (char *)ptr + bytes > (char *)mp->mp_End ||
+	    ((uintptr_t)ptr & MEMNODE_SIZE_MASK) != 0)
+		panic("zfree(%p,%ju): wild pointer", ptr, (uintmax_t)bytes);
 
-    {
-	MemNode **pmn;
-	MemNode *mn;
-
+	/*
+	 * free the segment
+	 */
 	mp->mp_Used -= bytes;
 
 	for (pmn = &mp->mp_First; (mn = *pmn) != NULL; pmn = &mn->mr_Next) {
-	    /*
-	     * If area between last node and current node
-	     *  - check range
-	     *  - check merge with next area
-	     *  - check merge with previous area
-	     */
-	    if ((char *)ptr <= (char *)mn) {
 		/*
-		 * range check
+		 * If area between last node and current node
+		 *  - check range
+		 *  - check merge with next area
+		 *  - check merge with previous area
 		 */
-		if ((char *)ptr + bytes > (char *)mn) {
-		    panic("zfree(%p,%ju): corrupt memlist1", ptr,
-			(uintmax_t)bytes);
-		}
+		if ((char *)ptr <= (char *)mn) {
+			/*
+			 * range check
+			 */
+			if ((char *)ptr + bytes > (char *)mn) {
+				panic("zfree(%p,%ju): corrupt memlist1", ptr,
+				    (uintmax_t)bytes);
+			}
 
-		/*
-		 * merge against next area or create independant area
-		 */
+			/*
+			 * merge against next area or create independant area
+			 */
 
-		if ((char *)ptr + bytes == (char *)mn) {
-		    ((MemNode *)ptr)->mr_Next = mn->mr_Next;
-		    ((MemNode *)ptr)->mr_Bytes= bytes + mn->mr_Bytes;
-		} else {
-		    ((MemNode *)ptr)->mr_Next = mn;
-		    ((MemNode *)ptr)->mr_Bytes= bytes;
-		}
-		*pmn = mn = (MemNode *)ptr;
+			if ((char *)ptr + bytes == (char *)mn) {
+				((MemNode *)ptr)->mr_Next = mn->mr_Next;
+				((MemNode *)ptr)->mr_Bytes =
+				    bytes + mn->mr_Bytes;
+			} else {
+				((MemNode *)ptr)->mr_Next = mn;
+				((MemNode *)ptr)->mr_Bytes = bytes;
+			}
+			*pmn = mn = (MemNode *)ptr;
 
-		/*
-		 * merge against previous area (if there is a previous
-		 * area).
-		 */
+			/*
+			 * merge against previous area (if there is a previous
+			 * area).
+			 */
 
-		if (pmn != &mp->mp_First) {
-		    if ((char*)pmn + ((MemNode*)pmn)->mr_Bytes == (char*)ptr) {
-			((MemNode *)pmn)->mr_Next = mn->mr_Next;
-			((MemNode *)pmn)->mr_Bytes += mn->mr_Bytes;
-			mn = (MemNode *)pmn;
-		    }
+			if (pmn != &mp->mp_First) {
+				if ((char *)pmn + ((MemNode*)pmn)->mr_Bytes ==
+				    (char *)ptr) {
+					((MemNode *)pmn)->mr_Next = mn->mr_Next;
+					((MemNode *)pmn)->mr_Bytes +=
+					    mn->mr_Bytes;
+					mn = (MemNode *)pmn;
+				}
+			}
+			return;
 		}
-		return;
-		/* NOT REACHED */
-	    }
-	    if ((char *)ptr < (char *)mn + mn->mr_Bytes) {
-		panic("zfree(%p,%ju): corrupt memlist2", ptr,
-		    (uintmax_t)bytes);
-	    }
+		if ((char *)ptr < (char *)mn + mn->mr_Bytes) {
+			panic("zfree(%p,%ju): corrupt memlist2", ptr,
+			    (uintmax_t)bytes);
+		}
 	}
 	/*
 	 * We are beyond the last MemNode, append new MemNode.  Merge against
 	 * previous area if possible.
 	 */
-	if (pmn == &mp->mp_First || 
-	    (char *)pmn + ((MemNode *)pmn)->mr_Bytes != (char *)ptr
-	) {
-	    ((MemNode *)ptr)->mr_Next = NULL;
-	    ((MemNode *)ptr)->mr_Bytes = bytes;
-	    *pmn = (MemNode *)ptr;
-	    mn = (MemNode *)ptr;
+	if (pmn == &mp->mp_First ||
+	    (char *)pmn + ((MemNode *)pmn)->mr_Bytes != (char *)ptr) {
+		((MemNode *)ptr)->mr_Next = NULL;
+		((MemNode *)ptr)->mr_Bytes = bytes;
+		*pmn = (MemNode *)ptr;
+		mn = (MemNode *)ptr;
 	} else {
-	    ((MemNode *)pmn)->mr_Bytes += bytes;
-	    mn = (MemNode *)pmn;
+		((MemNode *)pmn)->mr_Bytes += bytes;
+		mn = (MemNode *)pmn;
 	}
-    }
 }
 
 /*
  * zextendPool() - extend memory pool to cover additional space.
  *
  *		   Note: the added memory starts out as allocated, you
  *		   must free it to make it available to the memory subsystem.
  *
  *		   Note: mp_Size may not reflect (mp_End - mp_Base) range
  *		   due to other parts of the system doing their own sbrk()
  *		   calls.
  */
 
 void
 zextendPool(MemPool *mp, void *base, uintptr_t bytes)
 {
-    if (mp->mp_Size == 0) {
-	mp->mp_Base = base;
-	mp->mp_Used = bytes;
-	mp->mp_End = (char *)base + bytes;
-	mp->mp_Size = bytes;
-    } else {
-	void *pend = (char *)mp->mp_Base + mp->mp_Size;
+	if (mp->mp_Size == 0) {
+		mp->mp_Base = base;
+		mp->mp_Used = bytes;
+		mp->mp_End = (char *)base + bytes;
+		mp->mp_Size = bytes;
+	} else {
+		void *pend = (char *)mp->mp_Base + mp->mp_Size;
 
-	if (base < mp->mp_Base) {
-	    mp->mp_Size += (char *)mp->mp_Base - (char *)base;
-	    mp->mp_Used += (char *)mp->mp_Base - (char *)base;
-	    mp->mp_Base = base;
+		if (base < mp->mp_Base) {
+			mp->mp_Size += (char *)mp->mp_Base - (char *)base;
+			mp->mp_Used += (char *)mp->mp_Base - (char *)base;
+			mp->mp_Base = base;
+		}
+		base = (char *)base + bytes;
+		if (base > pend) {
+			mp->mp_Size += (char *)base - (char *)pend;
+			mp->mp_Used += (char *)base - (char *)pend;
+			mp->mp_End = (char *)base;
+		}
 	}
-	base = (char *)base + bytes;
-	if (base > pend) {
-	    mp->mp_Size += (char *)base - (char *)pend;
-	    mp->mp_Used += (char *)base - (char *)pend;
-	    mp->mp_End = (char *)base;
-	}
-    }
 }
 
 #ifdef ZALLOCDEBUG
 
 void
 zallocstats(MemPool *mp)
 {
-    int abytes = 0;
-    int hbytes = 0;
-    int fcount = 0;
-    MemNode *mn;
+	int abytes = 0;
+	int hbytes = 0;
+	int fcount = 0;
+	MemNode *mn;
 
-    printf("%d bytes reserved", (int) mp->mp_Size);
+	printf("%d bytes reserved", (int)mp->mp_Size);
 
-    mn = mp->mp_First;
+	mn = mp->mp_First;
 
-    if ((void *)mn != (void *)mp->mp_Base) {
-	abytes += (char *)mn - (char *)mp->mp_Base;
-    }
+	if ((void *)mn != (void *)mp->mp_Base) {
+		abytes += (char *)mn - (char *)mp->mp_Base;
+	}
 
-    while (mn) {
-	if ((char *)mn + mn->mr_Bytes != mp->mp_End) {
-	    hbytes += mn->mr_Bytes;
-	    ++fcount;
+	while (mn != NULL) {
+		if ((char *)mn + mn->mr_Bytes != mp->mp_End) {
+			hbytes += mn->mr_Bytes;
+			++fcount;
+		}
+		if (mn->mr_Next != NULL) {
+			abytes += (char *)mn->mr_Next -
+			    ((char *)mn + mn->mr_Bytes);
+		}
+		mn = mn->mr_Next;
 	}
-	if (mn->mr_Next)
-	    abytes += (char *)mn->mr_Next - ((char *)mn + mn->mr_Bytes);
-	mn = mn->mr_Next;
-    }
-    printf(" %d bytes allocated\n%d fragments (%d bytes fragmented)\n",
-	abytes,
-	fcount,
-	hbytes
-    );
+	printf(" %d bytes allocated\n%d fragments (%d bytes fragmented)\n",
+	    abytes, fcount, hbytes);
 }
 
 #endif
-
Index: projects/clang900-import/stand/libsa/zalloc_defs.h
===================================================================
--- projects/clang900-import/stand/libsa/zalloc_defs.h	(revision 352536)
+++ projects/clang900-import/stand/libsa/zalloc_defs.h	(revision 352537)
@@ -1,78 +1,83 @@
 /*
- * This module derived from code donated to the FreeBSD Project by 
+ * This module derived from code donated to the FreeBSD Project by
  * Matthew Dillon <dillon@backplane.com>
  *
  * Copyright (c) 1998 The FreeBSD Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * DEFS.H
  */
 
-#define USEGUARD		/* use stard/end guard bytes */
-#define USEENDGUARD
-#define DMALLOCDEBUG		/* add debugging code to gather stats */
-#define ZALLOCDEBUG
+#ifndef _ZALLOC_DEFS_H
+#define	_ZALLOC_DEFS_H
 
+#define	USEGUARD		/* use stard/end guard bytes */
+#define	USEENDGUARD
+#define	DMALLOCDEBUG		/* add debugging code to gather stats */
+#define	ZALLOCDEBUG
+
 #include <sys/stdint.h>
 #include "stand.h"
 #include "zalloc_mem.h"
 
-#define Library extern
+#define	Library extern
 
 /*
  * block extension for sbrk()
  */
 
-#define BLKEXTEND	(4 * 1024)
-#define BLKEXTENDMASK	(BLKEXTEND - 1)
+#define	BLKEXTEND	(4 * 1024)
+#define	BLKEXTENDMASK	(BLKEXTEND - 1)
 
 /*
  * Required malloc alignment.
  *
  * Embedded platforms using the u-boot API drivers require that all I/O buffers
  * be on a cache line sized boundary.  The worst case size for that is 64 bytes.
  * For other platforms, 16 bytes works fine.  The alignment also must be at
  * least sizeof(struct MemNode); this is asserted in zalloc.c.
  */
 
 #if defined(__arm__) || defined(__mips__) || defined(__powerpc__)
 #define	MALLOCALIGN		64
 #else
 #define	MALLOCALIGN		16
 #endif
 #define	MALLOCALIGN_MASK	(MALLOCALIGN - 1)
 
 typedef struct Guard {
-    size_t	ga_Bytes;
-    size_t	ga_Magic;	/* must be at least 32 bits */
+	size_t	ga_Bytes;
+	size_t	ga_Magic;	/* must be at least 32 bits */
 } Guard;
 
-#define GAMAGIC		0x55FF44FD
-#define GAFREE		0x5F54F4DF
+#define	GAMAGIC		0x55FF44FD
+#define	GAFREE		0x5F54F4DF
 
 #include "zalloc_protos.h"
+
+#endif	/* _ZALLOC_DEFS_H */
Index: projects/clang900-import/stand/libsa/zalloc_malloc.c
===================================================================
--- projects/clang900-import/stand/libsa/zalloc_malloc.c	(revision 352536)
+++ projects/clang900-import/stand/libsa/zalloc_malloc.c	(revision 352537)
@@ -1,203 +1,223 @@
 /*
- * This module derived from code donated to the FreeBSD Project by 
+ * This module derived from code donated to the FreeBSD Project by
  * Matthew Dillon <dillon@backplane.com>
  *
  * Copyright (c) 1998 The FreeBSD Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * MALLOC.C - malloc equivalent, runs on top of zalloc and uses sbrk
  */
 
 #include "zalloc_defs.h"
 
 static MemPool	MallocPool;
 
 #ifdef DMALLOCDEBUG
 static int MallocMax;
 static int MallocCount;
 
 void mallocstats(void);
 #endif
 
 #ifdef malloc
 #undef malloc
 #undef free
 #endif
 
+static void *Malloc_align(size_t, size_t);
+
 void *
-Malloc(size_t bytes, const char *file, int line)
+Malloc(size_t bytes, const char *file __unused, int line __unused)
 {
-    Guard *res;
+	return (Malloc_align(bytes, 1));
+}
 
-    if (bytes == 0)
-	return (NULL);
+void *
+Memalign(size_t alignment, size_t bytes, const char *file __unused,
+    int line __unused)
+{
+	if (alignment == 0)
+		alignment = 1;
 
+	return (Malloc_align(bytes, alignment));
+}
+
+static void *
+Malloc_align(size_t bytes, size_t alignment)
+{
+	Guard *res;
+
 #ifdef USEENDGUARD
-    bytes += MALLOCALIGN + 1;
+	bytes += MALLOCALIGN + 1;
 #else
-    bytes += MALLOCALIGN;
+	bytes += MALLOCALIGN;
 #endif
 
-    while ((res = znalloc(&MallocPool, bytes)) == NULL) {
-	int incr = (bytes + BLKEXTENDMASK) & ~BLKEXTENDMASK;
-	char *base;
+	while ((res = znalloc(&MallocPool, bytes, alignment)) == NULL) {
+		int incr = (bytes + BLKEXTENDMASK) & ~BLKEXTENDMASK;
+		char *base;
 
-	if ((base = sbrk(incr)) == (char *)-1)
-	    return(NULL);
-	zextendPool(&MallocPool, base, incr);
-	zfree(&MallocPool, base, incr);
-    }
+		if ((base = sbrk(incr)) == (char *)-1)
+			return (NULL);
+		zextendPool(&MallocPool, base, incr);
+		zfree(&MallocPool, base, incr);
+	}
 #ifdef DMALLOCDEBUG
-    if (++MallocCount > MallocMax)
-	MallocMax = MallocCount;
+	if (++MallocCount > MallocMax)
+		MallocMax = MallocCount;
 #endif
 #ifdef USEGUARD
-    res->ga_Magic = GAMAGIC;
+	res->ga_Magic = GAMAGIC;
 #endif
-    res->ga_Bytes = bytes;
+	res->ga_Bytes = bytes;
 #ifdef USEENDGUARD
-    *((signed char *)res + bytes - 1) = -2;
+	*((signed char *)res + bytes - 1) = -2;
 #endif
 
-    return((char *)res + MALLOCALIGN);
+	return ((char *)res + MALLOCALIGN);
 }
 
 void
 Free(void *ptr, const char *file, int line)
 {
-    size_t bytes;
+	size_t bytes;
 
-    if (ptr != NULL) {
-	Guard *res = (void *)((char *)ptr - MALLOCALIGN);
+	if (ptr != NULL) {
+		Guard *res = (void *)((char *)ptr - MALLOCALIGN);
 
-	if (file == NULL)
-		file = "unknown";
+		if (file == NULL)
+			file = "unknown";
 #ifdef USEGUARD
-	if (res->ga_Magic == GAFREE) {
-	    printf("free: duplicate free @ %p from %s:%d\n", ptr, file, line);
-	    return;
-	}
-	if (res->ga_Magic != GAMAGIC)
-	    panic("free: guard1 fail @ %p from %s:%d", ptr, file, line);
-	res->ga_Magic = GAFREE;
+		if (res->ga_Magic == GAFREE) {
+			printf("free: duplicate free @ %p from %s:%d\n",
+			    ptr, file, line);
+			return;
+		}
+		if (res->ga_Magic != GAMAGIC)
+			panic("free: guard1 fail @ %p from %s:%d",
+			    ptr, file, line);
+		res->ga_Magic = GAFREE;
 #endif
 #ifdef USEENDGUARD
-	if (*((signed char *)res + res->ga_Bytes - 1) == -1) {
-	    printf("free: duplicate2 free @ %p from %s:%d\n", ptr, file, line);
-	    return;
-	}
-	if (*((signed char *)res + res->ga_Bytes - 1) != -2)
-	    panic("free: guard2 fail @ %p + %zu from %s:%d", ptr, res->ga_Bytes - MALLOCALIGN, file, line);
-	*((signed char *)res + res->ga_Bytes - 1) = -1;
+		if (*((signed char *)res + res->ga_Bytes - 1) == -1) {
+			printf("free: duplicate2 free @ %p from %s:%d\n",
+			    ptr, file, line);
+			return;
+		}
+		if (*((signed char *)res + res->ga_Bytes - 1) != -2)
+			panic("free: guard2 fail @ %p + %zu from %s:%d",
+			    ptr, res->ga_Bytes - MALLOCALIGN, file, line);
+		*((signed char *)res + res->ga_Bytes - 1) = -1;
 #endif
 
-	bytes = res->ga_Bytes;
-	zfree(&MallocPool, res, bytes);
+		bytes = res->ga_Bytes;
+		zfree(&MallocPool, res, bytes);
 #ifdef DMALLOCDEBUG
-	--MallocCount;
+		--MallocCount;
 #endif
-    }
+	}
 }
 
 
 void *
 Calloc(size_t n1, size_t n2, const char *file, int line)
 {
-    uintptr_t bytes = (uintptr_t)n1 * (uintptr_t)n2;
-    void *res;
+	uintptr_t bytes = (uintptr_t)n1 * (uintptr_t)n2;
+	void *res;
 
-    if ((res = Malloc(bytes, file, line)) != NULL) {
-	bzero(res, bytes);
+	if ((res = Malloc(bytes, file, line)) != NULL) {
+		bzero(res, bytes);
 #ifdef DMALLOCDEBUG
-	if (++MallocCount > MallocMax)
-	    MallocMax = MallocCount;
+		if (++MallocCount > MallocMax)
+			MallocMax = MallocCount;
 #endif
-    }
-    return(res);
+	}
+	return (res);
 }
 
 /*
  * realloc() - I could be fancier here and free the old buffer before
- * 	       allocating the new one (saving potential fragmentation
+ *	       allocating the new one (saving potential fragmentation
  *	       and potential buffer copies).  But I don't bother.
  */
 
 void *
 Realloc(void *ptr, size_t size, const char *file, int line)
 {
-    void *res;
-    size_t old;
+	void *res;
+	size_t old;
 
-    if ((res = Malloc(size, file, line)) != NULL) {
-	if (ptr) {
-	    old = *(size_t *)((char *)ptr - MALLOCALIGN) - MALLOCALIGN;
-	    if (old < size)
-		bcopy(ptr, res, old);
-	    else
-		bcopy(ptr, res, size);
-	    Free(ptr, file, line);
-	} else {
+	if ((res = Malloc(size, file, line)) != NULL) {
+		if (ptr != NULL) {
+			Guard *g = (Guard *)((char *)ptr - MALLOCALIGN);
+
+			old = g->ga_Bytes - MALLOCALIGN;
+			if (old < size)
+				bcopy(ptr, res, old);
+			else
+				bcopy(ptr, res, size);
+			Free(ptr, file, line);
+		} else {
 #ifdef DMALLOCDEBUG
-	    if (++MallocCount > MallocMax)
-		MallocMax = MallocCount;
+			if (++MallocCount > MallocMax)
+				MallocMax = MallocCount;
 #ifdef EXITSTATS
-	    if (DidAtExit == 0) {
-		DidAtExit = 1;
-		atexit(mallocstats);
-	    }
+			if (DidAtExit == 0) {
+				DidAtExit = 1;
+				atexit(mallocstats);
+			}
 #endif
 #endif
+		}
 	}
-    }
-    return(res);
+	return (res);
 }
 
 void *
 Reallocf(void *ptr, size_t size, const char *file, int line)
 {
-    void *res;
+	void *res;
 
-    if ((res = Realloc(ptr, size, file, line)) == NULL)
-	Free(ptr, file, line);
-    return(res);
+	if ((res = Realloc(ptr, size, file, line)) == NULL)
+		Free(ptr, file, line);
+	return (res);
 }
 
 #ifdef DMALLOCDEBUG
 
 void
 mallocstats(void)
 {
-    printf("Active Allocations: %d/%d\n", MallocCount, MallocMax);
+	printf("Active Allocations: %d/%d\n", MallocCount, MallocMax);
 #ifdef ZALLOCDEBUG
-    zallocstats(&MallocPool);
+	zallocstats(&MallocPool);
 #endif
 }
 
 #endif
-
Index: projects/clang900-import/stand/libsa/zalloc_mem.h
===================================================================
--- projects/clang900-import/stand/libsa/zalloc_mem.h	(revision 352536)
+++ projects/clang900-import/stand/libsa/zalloc_mem.h	(revision 352537)
@@ -1,53 +1,56 @@
 /*
- * This module derived from code donated to the FreeBSD Project by 
+ * This module derived from code donated to the FreeBSD Project by
  * Matthew Dillon <dillon@backplane.com>
  *
  * Copyright (c) 1998 The FreeBSD Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * H/MEM.H
  *
  * Basic memory pool / memory node structures.
  */
+#ifndef _ZALLOC_MEM_H
+#define	_ZALLOC_MEM_H
 
 typedef struct MemNode {
-    struct MemNode	*mr_Next;
-    uintptr_t		mr_Bytes;
+	struct MemNode	*mr_Next;
+	uintptr_t	mr_Bytes;
 } MemNode;
 
 typedef struct MemPool {
-    void		*mp_Base;  
-    void		*mp_End;
-    MemNode		*mp_First; 
-    uintptr_t		mp_Size;
-    uintptr_t		mp_Used;
+	void		*mp_Base;
+	void		*mp_End;
+	MemNode		*mp_First;
+	uintptr_t	mp_Size;
+	uintptr_t	mp_Used;
 } MemPool;
 
-#define ZNOTE_FREE	0
-#define ZNOTE_REUSE	1
+#define	ZNOTE_FREE	0
+#define	ZNOTE_REUSE	1
 
+#endif	/* _ZALLOC_MEM_H */
Index: projects/clang900-import/stand/libsa/zalloc_protos.h
===================================================================
--- projects/clang900-import/stand/libsa/zalloc_protos.h	(revision 352536)
+++ projects/clang900-import/stand/libsa/zalloc_protos.h	(revision 352537)
@@ -1,35 +1,40 @@
 /*
- * This module derived from code donated to the FreeBSD Project by 
+ * This module derived from code donated to the FreeBSD Project by
  * Matthew Dillon <dillon@backplane.com>
  *
  * Copyright (c) 1998 The FreeBSD Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
-Library void *znalloc(struct MemPool *mpool, uintptr_t bytes);
+#ifndef _ZALLOC_PROTOS_H
+#define	_ZALLOC_PROTOS_H
+
+Library void *znalloc(struct MemPool *mpool, uintptr_t bytes, size_t align);
 Library void zfree(struct MemPool *mpool, void *ptr, uintptr_t bytes);
 Library void zextendPool(MemPool *mp, void *base, uintptr_t bytes);
 Library void zallocstats(struct MemPool *mp);
+
+#endif	/* _ZALLOC_PROTOS_H */
Index: projects/clang900-import/stand/mips/uboot/Makefile
===================================================================
--- projects/clang900-import/stand/mips/uboot/Makefile	(revision 352536)
+++ projects/clang900-import/stand/mips/uboot/Makefile	(revision 352537)
@@ -1,56 +1,60 @@
 # $FreeBSD$
 
 LOADER_CD9660_SUPPORT?=	no
 LOADER_EXT2FS_SUPPORT?=	no
 LOADER_MSDOS_SUPPORT?=	yes
 LOADER_UFS_SUPPORT?=	yes
 LOADER_NET_SUPPORT?=	yes
 LOADER_NFS_SUPPORT?=	yes
 LOADER_TFTP_SUPPORT?=	no
 LOADER_GZIP_SUPPORT?=	no
 LOADER_BZIP2_SUPPORT?=	no
 
 .include <bsd.init.mk>
 
 FILES+=		ubldr
 
 NEWVERSWHAT=	"U-Boot loader" ${MACHINE_ARCH}
 INSTALLFLAGS=	-b
 WARNS?=		1
 # Address at which ubldr will be loaded.
 # This varies for different boards and SOCs.
+.if ${MACHINE_ARCH:Mmips64*}
 UBLDR_LOADADDR?=	0xffffffff80800000
+.else
+UBLDR_LOADADDR?=	0x80800000
+.endif
 
 # Architecture-specific loader code
 SRCS=		start.S conf.c vers.c
 
 HELP_FILES=	${.CURDIR}/help.uboot ${BOOTSRC}/fdt/help.fdt
 
 # Always add MI sources
 .include	"${BOOTSRC}/loader.mk"
 
 CFLAGS+=	-g
 
 LDFLAGS=	-nostdlib -static -T ${.CURDIR}/ldscript.${MACHINE_CPUARCH}
 
 .include	"${BOOTSRC}/uboot.mk"
 
 DPADD=		${LDR_INTERP} ${LIBUBOOT} ${LIBFDT} ${LIBUBOOT_FDT} ${LIBSA}
 LDADD=		${LDR_INTERP} ${LIBUBOOT} ${LIBFDT} ${LIBUBOOT_FDT} ${LIBSA}
 
 OBJS+=  ${SRCS:N*.h:R:S/$/.o/g}
 
 ldscript.abs:
 	echo "UBLDR_LOADADDR = ${UBLDR_LOADADDR};" >${.TARGET}
 
 ldscript.pie:
 	echo "UBLDR_LOADADDR = 0;" >${.TARGET}
 
 ubldr: ${OBJS} ldscript.abs ${.CURDIR}/ldscript.${MACHINE_CPUARCH} ${DPADD}
 	${CC} ${CFLAGS} -T ldscript.abs ${LDFLAGS} \
 	    -o ${.TARGET} ${OBJS} ${LDADD}
 	${OBJCOPY} -S -O binary ubldr ubldr.bin
 
 CLEANFILES+=	ldscript.abs ldscript.pie ubldr ubldr.pie ubldr.bin
 
 .include <bsd.prog.mk>
Index: projects/clang900-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
===================================================================
--- projects/clang900-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c	(revision 352536)
+++ projects/clang900-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c	(revision 352537)
@@ -1,3939 +1,3977 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  * Copyright 2016 Gary Mills
  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  * Copyright 2017 Joyent, Inc.
  * Copyright (c) 2017 Datto Inc.
  */
 
 #include <sys/dsl_scan.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_synctask.h>
 #include <sys/dnode.h>
 #include <sys/dmu_tx.h>
 #include <sys/dmu_objset.h>
 #include <sys/arc.h>
 #include <sys/zap.h>
 #include <sys/zio.h>
 #include <sys/zfs_context.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_znode.h>
 #include <sys/spa_impl.h>
 #include <sys/vdev_impl.h>
 #include <sys/zil_impl.h>
 #include <sys/zio_checksum.h>
 #include <sys/ddt.h>
 #include <sys/sa.h>
 #include <sys/sa_impl.h>
 #include <sys/zfeature.h>
 #include <sys/abd.h>
 #include <sys/range_tree.h>
 #ifdef _KERNEL
 #include <sys/zfs_vfsops.h>
 #endif
 
 /*
  * Grand theory statement on scan queue sorting
  *
  * Scanning is implemented by recursively traversing all indirection levels
  * in an object and reading all blocks referenced from said objects. This
  * results in us approximately traversing the object from lowest logical
  * offset to the highest. For best performance, we would want the logical
  * blocks to be physically contiguous. However, this is frequently not the
  * case with pools given the allocation patterns of copy-on-write filesystems.
  * So instead, we put the I/Os into a reordering queue and issue them in a
  * way that will most benefit physical disks (LBA-order).
  *
  * Queue management:
  *
  * Ideally, we would want to scan all metadata and queue up all block I/O
  * prior to starting to issue it, because that allows us to do an optimal
  * sorting job. This can however consume large amounts of memory. Therefore
  * we continuously monitor the size of the queues and constrain them to 5%
  * (zfs_scan_mem_lim_fact) of physmem. If the queues grow larger than this
  * limit, we clear out a few of the largest extents at the head of the queues
  * to make room for more scanning. Hopefully, these extents will be fairly
  * large and contiguous, allowing us to approach sequential I/O throughput
  * even without a fully sorted tree.
  *
  * Metadata scanning takes place in dsl_scan_visit(), which is called from
  * dsl_scan_sync() every spa_sync(). If we have either fully scanned all
  * metadata on the pool, or we need to make room in memory because our
  * queues are too large, dsl_scan_visit() is postponed and
  * scan_io_queues_run() is called from dsl_scan_sync() instead. This implies
  * that metadata scanning and queued I/O issuing are mutually exclusive. This
  * allows us to provide maximum sequential I/O throughput for the majority of
  * I/O's issued since sequential I/O performance is significantly negatively
  * impacted if it is interleaved with random I/O.
  *
  * Implementation Notes
  *
  * One side effect of the queued scanning algorithm is that the scanning code
  * needs to be notified whenever a block is freed. This is needed to allow
  * the scanning code to remove these I/Os from the issuing queue. Additionally,
  * we do not attempt to queue gang blocks to be issued sequentially since this
  * is very hard to do and would have an extremely limitted performance benefit.
  * Instead, we simply issue gang I/Os as soon as we find them using the legacy
  * algorithm.
  *
  * Backwards compatibility
  *
  * This new algorithm is backwards compatible with the legacy on-disk data
  * structures (and therefore does not require a new feature flag).
  * Periodically during scanning (see zfs_scan_checkpoint_intval), the scan
  * will stop scanning metadata (in logical order) and wait for all outstanding
  * sorted I/O to complete. Once this is done, we write out a checkpoint
  * bookmark, indicating that we have scanned everything logically before it.
  * If the pool is imported on a machine without the new sorting algorithm,
  * the scan simply resumes from the last checkpoint using the legacy algorithm.
  */
 
 typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *,
     const zbookmark_phys_t *);
 
 static scan_cb_t dsl_scan_scrub_cb;
 
 static int scan_ds_queue_compare(const void *a, const void *b);
 static int scan_prefetch_queue_compare(const void *a, const void *b);
 static void scan_ds_queue_clear(dsl_scan_t *scn);
 static boolean_t scan_ds_queue_contains(dsl_scan_t *scn, uint64_t dsobj,
     uint64_t *txg);
 static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg);
 static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj);
 static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx);
 
 extern int zfs_vdev_async_write_active_min_dirty_percent;
 
 /*
  * By default zfs will check to ensure it is not over the hard memory
  * limit before each txg. If finer-grained control of this is needed
  * this value can be set to 1 to enable checking before scanning each
  * block.
  */
 int zfs_scan_strict_mem_lim = B_FALSE;
 
 /*
  * Maximum number of parallelly executing I/Os per top-level vdev.
  * Tune with care. Very high settings (hundreds) are known to trigger
  * some firmware bugs and resets on certain SSDs.
  */
 int zfs_top_maxinflight = 32;		/* maximum I/Os per top-level */
 unsigned int zfs_resilver_delay = 2;	/* number of ticks to delay resilver -- 2 is a good number */
 unsigned int zfs_scrub_delay = 4;	/* number of ticks to delay scrub -- 4 is a good number */
 unsigned int zfs_scan_idle = 50;	/* idle window in clock ticks */
 
 /*
  * Maximum number of parallelly executed bytes per leaf vdev. We attempt
  * to strike a balance here between keeping the vdev queues full of I/Os
  * at all times and not overflowing the queues to cause long latency,
  * which would cause long txg sync times. No matter what, we will not
  * overload the drives with I/O, since that is protected by
  * zfs_vdev_scrub_max_active.
  */
 unsigned long zfs_scan_vdev_limit = 4 << 20;
 
 int zfs_scan_issue_strategy = 0;
 int zfs_scan_legacy = B_FALSE;	/* don't queue & sort zios, go direct */
 uint64_t zfs_scan_max_ext_gap = 2 << 20;	/* in bytes */
 
 unsigned int zfs_scan_checkpoint_intval = 7200;	/* seconds */
 #define	ZFS_SCAN_CHECKPOINT_INTVAL	SEC_TO_TICK(zfs_scan_checkpoint_intval)
 
 /*
  * fill_weight is non-tunable at runtime, so we copy it at module init from
  * zfs_scan_fill_weight. Runtime adjustments to zfs_scan_fill_weight would
  * break queue sorting.
  */
 uint64_t zfs_scan_fill_weight = 3;
 static uint64_t fill_weight;
 
 /* See dsl_scan_should_clear() for details on the memory limit tunables */
 uint64_t zfs_scan_mem_lim_min = 16 << 20;	/* bytes */
 uint64_t zfs_scan_mem_lim_soft_max = 128 << 20;	/* bytes */
 int zfs_scan_mem_lim_fact = 20;		/* fraction of physmem */
 int zfs_scan_mem_lim_soft_fact = 20;	/* fraction of mem lim above */
 
 unsigned int zfs_scrub_min_time_ms = 1000; /* min millisecs to scrub per txg */
 unsigned int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */
 unsigned int zfs_obsolete_min_time_ms = 500; /* min millisecs to obsolete per txg */
 unsigned int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver per txg */
 boolean_t zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
 boolean_t zfs_no_scrub_prefetch = B_FALSE; /* set to disable scrub prefetch */
 
 SYSCTL_DECL(_vfs_zfs);
 SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, CTLFLAG_RWTUN,
     &zfs_top_maxinflight, 0, "Maximum I/Os per top-level vdev");
 SYSCTL_UINT(_vfs_zfs, OID_AUTO, resilver_delay, CTLFLAG_RWTUN,
     &zfs_resilver_delay, 0, "Number of ticks to delay resilver");
 SYSCTL_UINT(_vfs_zfs, OID_AUTO, scrub_delay, CTLFLAG_RWTUN,
     &zfs_scrub_delay, 0, "Number of ticks to delay scrub");
 SYSCTL_UINT(_vfs_zfs, OID_AUTO, scan_idle, CTLFLAG_RWTUN,
     &zfs_scan_idle, 0, "Idle scan window in clock ticks");
 SYSCTL_UINT(_vfs_zfs, OID_AUTO, scan_min_time_ms, CTLFLAG_RWTUN,
     &zfs_scrub_min_time_ms, 0, "Min millisecs to scrub per txg");
 SYSCTL_UINT(_vfs_zfs, OID_AUTO, free_min_time_ms, CTLFLAG_RWTUN,
     &zfs_free_min_time_ms, 0, "Min millisecs to free per txg");
 SYSCTL_UINT(_vfs_zfs, OID_AUTO, resilver_min_time_ms, CTLFLAG_RWTUN,
     &zfs_resilver_min_time_ms, 0, "Min millisecs to resilver per txg");
 SYSCTL_INT(_vfs_zfs, OID_AUTO, no_scrub_io, CTLFLAG_RWTUN,
     &zfs_no_scrub_io, 0, "Disable scrub I/O");
 SYSCTL_INT(_vfs_zfs, OID_AUTO, no_scrub_prefetch, CTLFLAG_RWTUN,
     &zfs_no_scrub_prefetch, 0, "Disable scrub prefetching");
 SYSCTL_UINT(_vfs_zfs, OID_AUTO, zfs_scan_legacy, CTLFLAG_RWTUN,
     &zfs_scan_legacy, 0, "Scrub using legacy non-sequential method");
 SYSCTL_UINT(_vfs_zfs, OID_AUTO, zfs_scan_checkpoint_interval, CTLFLAG_RWTUN,
     &zfs_scan_checkpoint_intval, 0, "Scan progress on-disk checkpointing interval");
 
 enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
 /* max number of blocks to free in a single TXG */
 uint64_t zfs_async_block_max_blocks = UINT64_MAX;
 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, free_max_blocks, CTLFLAG_RWTUN,
     &zfs_async_block_max_blocks, 0, "Maximum number of blocks to free in one TXG");
 
 /*
  * We wait a few txgs after importing a pool to begin scanning so that
  * the import / mounting code isn't held up by scrub / resilver IO.
  * Unfortunately, it is a bit difficult to determine exactly how long
  * this will take since userspace will trigger fs mounts asynchronously
  * and the kernel will create zvol minors asynchronously. As a result,
  * the value provided here is a bit arbitrary, but represents a
  * reasonable estimate of how many txgs it will take to finish fully
  * importing a pool
  */
 #define        SCAN_IMPORT_WAIT_TXGS           5
 
 
 #define	DSL_SCAN_IS_SCRUB_RESILVER(scn) \
 	((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB || \
 	(scn)->scn_phys.scn_func == POOL_SCAN_RESILVER)
 
 extern int zfs_txg_timeout;
 
 /*
  * Enable/disable the processing of the free_bpobj object.
  */
 boolean_t zfs_free_bpobj_enabled = B_TRUE;
 
 SYSCTL_INT(_vfs_zfs, OID_AUTO, free_bpobj_enabled, CTLFLAG_RWTUN,
     &zfs_free_bpobj_enabled, 0, "Enable free_bpobj processing");
 
 /* the order has to match pool_scan_type */
 static scan_cb_t *scan_funcs[POOL_SCAN_FUNCS] = {
 	NULL,
 	dsl_scan_scrub_cb,	/* POOL_SCAN_SCRUB */
 	dsl_scan_scrub_cb,	/* POOL_SCAN_RESILVER */
 };
 
 /* In core node for the scn->scn_queue. Represents a dataset to be scanned */
 typedef struct {
 	uint64_t	sds_dsobj;
 	uint64_t	sds_txg;
 	avl_node_t	sds_node;
 } scan_ds_t;
 
 /*
  * This controls what conditions are placed on dsl_scan_sync_state():
  * SYNC_OPTIONAL) write out scn_phys iff scn_bytes_pending == 0
  * SYNC_MANDATORY) write out scn_phys always. scn_bytes_pending must be 0.
  * SYNC_CACHED) if scn_bytes_pending == 0, write out scn_phys. Otherwise
  *	write out the scn_phys_cached version.
  * See dsl_scan_sync_state for details.
  */
 typedef enum {
 	SYNC_OPTIONAL,
 	SYNC_MANDATORY,
 	SYNC_CACHED
 } state_sync_type_t;
 
 /*
  * This struct represents the minimum information needed to reconstruct a
  * zio for sequential scanning. This is useful because many of these will
  * accumulate in the sequential IO queues before being issued, so saving
  * memory matters here.
  */
 typedef struct scan_io {
 	/* fields from blkptr_t */
 	uint64_t		sio_offset;
 	uint64_t		sio_blk_prop;
 	uint64_t		sio_phys_birth;
 	uint64_t		sio_birth;
 	zio_cksum_t		sio_cksum;
 	uint32_t		sio_asize;
 
 	/* fields from zio_t */
 	int			sio_flags;
 	zbookmark_phys_t	sio_zb;
 
 	/* members for queue sorting */
 	union {
 		avl_node_t	sio_addr_node; /* link into issueing queue */
 		list_node_t	sio_list_node; /* link for issuing to disk */
 	} sio_nodes;
 } scan_io_t;
 
 struct dsl_scan_io_queue {
 	dsl_scan_t	*q_scn; /* associated dsl_scan_t */
 	vdev_t		*q_vd; /* top-level vdev that this queue represents */
 
 	/* trees used for sorting I/Os and extents of I/Os */
 	range_tree_t	*q_exts_by_addr;
 	avl_tree_t	q_exts_by_size;
 	avl_tree_t	q_sios_by_addr;
 
 	/* members for zio rate limiting */
 	uint64_t	q_maxinflight_bytes;
 	uint64_t	q_inflight_bytes;
 	kcondvar_t	q_zio_cv; /* used under vd->vdev_scan_io_queue_lock */
 
 	/* per txg statistics */
 	uint64_t	q_total_seg_size_this_txg;
 	uint64_t	q_segs_this_txg;
 	uint64_t	q_total_zio_size_this_txg;
 	uint64_t	q_zios_this_txg;
 };
 
 /* private data for dsl_scan_prefetch_cb() */
 typedef struct scan_prefetch_ctx {
 	refcount_t spc_refcnt;		/* refcount for memory management */
 	dsl_scan_t *spc_scn;		/* dsl_scan_t for the pool */
 	boolean_t spc_root;		/* is this prefetch for an objset? */
 	uint8_t spc_indblkshift;	/* dn_indblkshift of current dnode */
 	uint16_t spc_datablkszsec;	/* dn_idatablkszsec of current dnode */
 } scan_prefetch_ctx_t;
 
 /* private data for dsl_scan_prefetch() */
 typedef struct scan_prefetch_issue_ctx {
 	avl_node_t spic_avl_node;	/* link into scn->scn_prefetch_queue */
 	scan_prefetch_ctx_t *spic_spc;	/* spc for the callback */
 	blkptr_t spic_bp;		/* bp to prefetch */
 	zbookmark_phys_t spic_zb;	/* bookmark to prefetch */
 } scan_prefetch_issue_ctx_t;
 
 static void scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
     const zbookmark_phys_t *zb, dsl_scan_io_queue_t *queue);
 static void scan_io_queue_insert_impl(dsl_scan_io_queue_t *queue,
     scan_io_t *sio);
 
 static dsl_scan_io_queue_t *scan_io_queue_create(vdev_t *vd);
 static void scan_io_queues_destroy(dsl_scan_t *scn);
 
 static kmem_cache_t *sio_cache;
 
 void
 scan_init(void)
 {
 	/*
 	 * This is used in ext_size_compare() to weight segments
 	 * based on how sparse they are. This cannot be changed
 	 * mid-scan and the tree comparison functions don't currently
 	 * have a mechansim for passing additional context to the
 	 * compare functions. Thus we store this value globally and
 	 * we only allow it to be set at module intiailization time
 	 */
 	fill_weight = zfs_scan_fill_weight;
 	
 	sio_cache = kmem_cache_create("sio_cache",
 	    sizeof (scan_io_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 }
 
 void
 scan_fini(void)
 {
 	kmem_cache_destroy(sio_cache);
 }
 
 static inline boolean_t
 dsl_scan_is_running(const dsl_scan_t *scn)
 {
 	return (scn->scn_phys.scn_state == DSS_SCANNING);
 }
 
 boolean_t
 dsl_scan_resilvering(dsl_pool_t *dp)
 {
 	return (dsl_scan_is_running(dp->dp_scan) &&
 	    dp->dp_scan->scn_phys.scn_func == POOL_SCAN_RESILVER);
 }
 
 static inline void
 sio2bp(const scan_io_t *sio, blkptr_t *bp, uint64_t vdev_id)
 {
 	bzero(bp, sizeof (*bp));
 	DVA_SET_ASIZE(&bp->blk_dva[0], sio->sio_asize);
 	DVA_SET_VDEV(&bp->blk_dva[0], vdev_id);
 	DVA_SET_OFFSET(&bp->blk_dva[0], sio->sio_offset);
 	bp->blk_prop = sio->sio_blk_prop;
 	bp->blk_phys_birth = sio->sio_phys_birth;
 	bp->blk_birth = sio->sio_birth;
 	bp->blk_fill = 1;	/* we always only work with data pointers */
 	bp->blk_cksum = sio->sio_cksum;
 }
 
 static inline void
 bp2sio(const blkptr_t *bp, scan_io_t *sio, int dva_i)
 {
 	/* we discard the vdev id, since we can deduce it from the queue */
 	sio->sio_offset = DVA_GET_OFFSET(&bp->blk_dva[dva_i]);
 	sio->sio_asize = DVA_GET_ASIZE(&bp->blk_dva[dva_i]);
 	sio->sio_blk_prop = bp->blk_prop;
 	sio->sio_phys_birth = bp->blk_phys_birth;
 	sio->sio_birth = bp->blk_birth;
 	sio->sio_cksum = bp->blk_cksum;
 }
 
 void
 dsl_scan_global_init(void)
 {
 	/*
 	 * This is used in ext_size_compare() to weight segments
 	 * based on how sparse they are. This cannot be changed
 	 * mid-scan and the tree comparison functions don't currently
 	 * have a mechansim for passing additional context to the
 	 * compare functions. Thus we store this value globally and
 	 * we only allow it to be set at module intiailization time
 	 */
 	fill_weight = zfs_scan_fill_weight;
 }
 
 int
 dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
 {
 	int err;
 	dsl_scan_t *scn;
 	spa_t *spa = dp->dp_spa;
 	uint64_t f;
 
 	scn = dp->dp_scan = kmem_zalloc(sizeof (dsl_scan_t), KM_SLEEP);
 	scn->scn_dp = dp;
 
 	/*
 	 * It's possible that we're resuming a scan after a reboot so
 	 * make sure that the scan_async_destroying flag is initialized
 	 * appropriately.
 	 */
 	ASSERT(!scn->scn_async_destroying);
 	scn->scn_async_destroying = spa_feature_is_active(dp->dp_spa,
 	    SPA_FEATURE_ASYNC_DESTROY);
 
 	bcopy(&scn->scn_phys, &scn->scn_phys_cached, sizeof (scn->scn_phys));
 	avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t),
 	    offsetof(scan_ds_t, sds_node));
 	avl_create(&scn->scn_prefetch_queue, scan_prefetch_queue_compare,
 	    sizeof (scan_prefetch_issue_ctx_t),
 	    offsetof(scan_prefetch_issue_ctx_t, spic_avl_node));
 
 	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    "scrub_func", sizeof (uint64_t), 1, &f);
 	if (err == 0) {
 		/*
 		 * There was an old-style scrub in progress.  Restart a
 		 * new-style scrub from the beginning.
 		 */
 		scn->scn_restart_txg = txg;
 		zfs_dbgmsg("old-style scrub was in progress; "
 		    "restarting new-style scrub in txg %llu",
 		    (longlong_t)scn->scn_restart_txg);
 
 		/*
 		 * Load the queue obj from the old location so that it
 		 * can be freed by dsl_scan_done().
 		 */
 		(void) zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 		    "scrub_queue", sizeof (uint64_t), 1,
 		    &scn->scn_phys.scn_queue_obj);
 	} else {
 		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 		    DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
 		    &scn->scn_phys);
 		if (err == ENOENT)
 			return (0);
 		else if (err)
 			return (err);
 
 		/*
 		 * We might be restarting after a reboot, so jump the issued
 		 * counter to how far we've scanned. We know we're consistent
 		 * up to here.
 		 */
 		scn->scn_issued_before_pass = scn->scn_phys.scn_examined;
 
 		if (dsl_scan_is_running(scn) &&
 		    spa_prev_software_version(dp->dp_spa) < SPA_VERSION_SCAN) {
 			/*
 			 * A new-type scrub was in progress on an old
 			 * pool, and the pool was accessed by old
 			 * software.  Restart from the beginning, since
 			 * the old software may have changed the pool in
 			 * the meantime.
 			 */
 			scn->scn_restart_txg = txg;
 			zfs_dbgmsg("new-style scrub was modified "
 			    "by old software; restarting in txg %llu",
 			    (longlong_t)scn->scn_restart_txg);
 		}
 	}
 
 	/* reload the queue into the in-core state */
 	if (scn->scn_phys.scn_queue_obj != 0) {
 		zap_cursor_t zc;
 		zap_attribute_t za;
 
 		for (zap_cursor_init(&zc, dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj);
 		    zap_cursor_retrieve(&zc, &za) == 0;
 		    (void) zap_cursor_advance(&zc)) {
 			scan_ds_queue_insert(scn,
 			    zfs_strtonum(za.za_name, NULL),
 			    za.za_first_integer);
 		}
 		zap_cursor_fini(&zc);
 	}
 
 	spa_scan_stat_init(spa);
 	return (0);
 }
 
 void
 dsl_scan_fini(dsl_pool_t *dp)
 {
 	if (dp->dp_scan != NULL) {
 		dsl_scan_t *scn = dp->dp_scan;
 
 		if (scn->scn_taskq != NULL)
 			taskq_destroy(scn->scn_taskq);
 		scan_ds_queue_clear(scn);
 		avl_destroy(&scn->scn_queue);
 		avl_destroy(&scn->scn_prefetch_queue);
 
 		kmem_free(dp->dp_scan, sizeof (dsl_scan_t));
 		dp->dp_scan = NULL;
 	}
 }
 
 static boolean_t
 dsl_scan_restarting(dsl_scan_t *scn, dmu_tx_t *tx)
 {
 	return (scn->scn_restart_txg != 0 &&
 	    scn->scn_restart_txg <= tx->tx_txg);
 }
 
 boolean_t
 dsl_scan_scrubbing(const dsl_pool_t *dp)
 {
 	dsl_scan_phys_t *scn_phys = &dp->dp_scan->scn_phys;
 
 	return (scn_phys->scn_state == DSS_SCANNING &&
 	    scn_phys->scn_func == POOL_SCAN_SCRUB);
 }
 
 boolean_t
 dsl_scan_is_paused_scrub(const dsl_scan_t *scn)
 {
 	return (dsl_scan_scrubbing(scn->scn_dp) &&
 	    scn->scn_phys.scn_flags & DSF_SCRUB_PAUSED);
 }
 
 /*
  * Writes out a persistent dsl_scan_phys_t record to the pool directory.
  * Because we can be running in the block sorting algorithm, we do not always
  * want to write out the record, only when it is "safe" to do so. This safety
  * condition is achieved by making sure that the sorting queues are empty
  * (scn_bytes_pending == 0). When this condition is not true, the sync'd state
  * is inconsistent with how much actual scanning progress has been made. The
  * kind of sync to be performed is specified by the sync_type argument. If the
  * sync is optional, we only sync if the queues are empty. If the sync is
  * mandatory, we do a hard ASSERT to make sure that the queues are empty. The
  * third possible state is a "cached" sync. This is done in response to:
  * 1) The dataset that was in the last sync'd dsl_scan_phys_t having been
  *	destroyed, so we wouldn't be able to restart scanning from it.
  * 2) The snapshot that was in the last sync'd dsl_scan_phys_t having been
  *	superseded by a newer snapshot.
  * 3) The dataset that was in the last sync'd dsl_scan_phys_t having been
  *	swapped with its clone.
  * In all cases, a cached sync simply rewrites the last record we've written,
  * just slightly modified. For the modifications that are performed to the
  * last written dsl_scan_phys_t, see dsl_scan_ds_destroyed,
  * dsl_scan_ds_snapshotted and dsl_scan_ds_clone_swapped.
  */
 static void
 dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx, state_sync_type_t sync_type)
 {
 	int i;
 	spa_t *spa = scn->scn_dp->dp_spa;
 
 	ASSERT(sync_type != SYNC_MANDATORY || scn->scn_bytes_pending == 0);
 	if (scn->scn_bytes_pending == 0) {
 		for (i = 0; i < spa->spa_root_vdev->vdev_children; i++) {
 			vdev_t *vd = spa->spa_root_vdev->vdev_child[i];
 			dsl_scan_io_queue_t *q = vd->vdev_scan_io_queue;
 
 			if (q == NULL)
 				continue;
 
 			mutex_enter(&vd->vdev_scan_io_queue_lock);
 			ASSERT3P(avl_first(&q->q_sios_by_addr), ==, NULL);
 			ASSERT3P(avl_first(&q->q_exts_by_size), ==, NULL);
 			ASSERT3P(range_tree_first(q->q_exts_by_addr), ==, NULL);
 			mutex_exit(&vd->vdev_scan_io_queue_lock);
 		}
 
 		if (scn->scn_phys.scn_queue_obj != 0)
 			scan_ds_queue_sync(scn, tx);
 		VERIFY0(zap_update(scn->scn_dp->dp_meta_objset,
 		    DMU_POOL_DIRECTORY_OBJECT,
 		    DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
 		    &scn->scn_phys, tx));
 		bcopy(&scn->scn_phys, &scn->scn_phys_cached,
 		    sizeof (scn->scn_phys));
 
 		if (scn->scn_checkpointing)
 			zfs_dbgmsg("finish scan checkpoint");
 
 		scn->scn_checkpointing = B_FALSE;
 		scn->scn_last_checkpoint = ddi_get_lbolt();
 	} else if (sync_type == SYNC_CACHED) {
 		VERIFY0(zap_update(scn->scn_dp->dp_meta_objset,
 		    DMU_POOL_DIRECTORY_OBJECT,
 		    DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
 		    &scn->scn_phys_cached, tx));
 	}
 }
 
 /* ARGSUSED */
 static int
 dsl_scan_setup_check(void *arg, dmu_tx_t *tx)
 {
 	dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
 
 	if (dsl_scan_is_running(scn))
 		return (SET_ERROR(EBUSY));
 
 	return (0);
 }
 
 static void
 dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
 {
 	dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
 	pool_scan_func_t *funcp = arg;
 	dmu_object_type_t ot = 0;
 	dsl_pool_t *dp = scn->scn_dp;
 	spa_t *spa = dp->dp_spa;
 
 	ASSERT(!dsl_scan_is_running(scn));
 	ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS);
 	bzero(&scn->scn_phys, sizeof (scn->scn_phys));
 	scn->scn_phys.scn_func = *funcp;
 	scn->scn_phys.scn_state = DSS_SCANNING;
 	scn->scn_phys.scn_min_txg = 0;
 	scn->scn_phys.scn_max_txg = tx->tx_txg;
 	scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */
 	scn->scn_phys.scn_start_time = gethrestime_sec();
 	scn->scn_phys.scn_errors = 0;
 	scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc;
 	scn->scn_issued_before_pass = 0;
 	scn->scn_restart_txg = 0;
 	scn->scn_done_txg = 0;
 	scn->scn_last_checkpoint = 0;
 	scn->scn_checkpointing = B_FALSE;
 	spa_scan_stat_init(spa);
 
 	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
 		scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;
 
 		/* rewrite all disk labels */
 		vdev_config_dirty(spa->spa_root_vdev);
 
 		if (vdev_resilver_needed(spa->spa_root_vdev,
 		    &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
 			spa_event_notify(spa, NULL, NULL,
 			    ESC_ZFS_RESILVER_START);
 		} else {
 			spa_event_notify(spa, NULL, NULL, ESC_ZFS_SCRUB_START);
 		}
 
 		spa->spa_scrub_started = B_TRUE;
 		/*
 		 * If this is an incremental scrub, limit the DDT scrub phase
 		 * to just the auto-ditto class (for correctness); the rest
 		 * of the scrub should go faster using top-down pruning.
 		 */
 		if (scn->scn_phys.scn_min_txg > TXG_INITIAL)
 			scn->scn_phys.scn_ddt_class_max = DDT_CLASS_DITTO;
 
 	}
 
 	/* back to the generic stuff */
 
 	if (dp->dp_blkstats == NULL) {
 		dp->dp_blkstats =
 		    kmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP);
 		mutex_init(&dp->dp_blkstats->zab_lock, NULL,
 		    MUTEX_DEFAULT, NULL);
 	}
 	bzero(&dp->dp_blkstats->zab_type, sizeof (dp->dp_blkstats->zab_type));
 
 	if (spa_version(spa) < SPA_VERSION_DSL_SCRUB)
 		ot = DMU_OT_ZAP_OTHER;
 
 	scn->scn_phys.scn_queue_obj = zap_create(dp->dp_meta_objset,
 	    ot ? ot : DMU_OT_SCAN_QUEUE, DMU_OT_NONE, 0, tx);
 
 	bcopy(&scn->scn_phys, &scn->scn_phys_cached, sizeof (scn->scn_phys));
 
 	dsl_scan_sync_state(scn, tx, SYNC_MANDATORY);
 
 	spa_history_log_internal(spa, "scan setup", tx,
 	    "func=%u mintxg=%llu maxtxg=%llu",
 	    *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg);
 }
 
 /*
  * Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver.
  * Can also be called to resume a paused scrub.
  */
 int
 dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
 {
 	spa_t *spa = dp->dp_spa;
 	dsl_scan_t *scn = dp->dp_scan;
 
 	/*
 	 * Purge all vdev caches and probe all devices.  We do this here
 	 * rather than in sync context because this requires a writer lock
 	 * on the spa_config lock, which we can't do from sync context.  The
 	 * spa_scrub_reopen flag indicates that vdev_open() should not
 	 * attempt to start another scrub.
 	 */
 	spa_vdev_state_enter(spa, SCL_NONE);
 	spa->spa_scrub_reopen = B_TRUE;
 	vdev_reopen(spa->spa_root_vdev);
 	spa->spa_scrub_reopen = B_FALSE;
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 
 	if (func == POOL_SCAN_SCRUB && dsl_scan_is_paused_scrub(scn)) {
 		/* got scrub start cmd, resume paused scrub */
 		int err = dsl_scrub_set_pause_resume(scn->scn_dp,
 		    POOL_SCRUB_NORMAL);
 		if (err == 0) {
 			spa_event_notify(spa, NULL, NULL, ESC_ZFS_SCRUB_RESUME);
 			return (ECANCELED);
 		}
 		return (SET_ERROR(err));
 	}
 
 	return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check,
 	    dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED));
 }
 
 /* ARGSUSED */
 static void
 dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
 {
 	static const char *old_names[] = {
 		"scrub_bookmark",
 		"scrub_ddt_bookmark",
 		"scrub_ddt_class_max",
 		"scrub_queue",
 		"scrub_min_txg",
 		"scrub_max_txg",
 		"scrub_func",
 		"scrub_errors",
 		NULL
 	};
 
 	dsl_pool_t *dp = scn->scn_dp;
 	spa_t *spa = dp->dp_spa;
 	int i;
 
 	/* Remove any remnants of an old-style scrub. */
 	for (i = 0; old_names[i]; i++) {
 		(void) zap_remove(dp->dp_meta_objset,
 		    DMU_POOL_DIRECTORY_OBJECT, old_names[i], tx);
 	}
 
 	if (scn->scn_phys.scn_queue_obj != 0) {
 		VERIFY0(dmu_object_free(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, tx));
 		scn->scn_phys.scn_queue_obj = 0;
 	}
 	scan_ds_queue_clear(scn);
 
 	scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED;
 
 	/*
 	 * If we were "restarted" from a stopped state, don't bother
 	 * with anything else.
 	 */
 	if (!dsl_scan_is_running(scn)) {
 		ASSERT(!scn->scn_is_sorted);
 		return;
 	}
 
 	if (scn->scn_is_sorted) {
 		scan_io_queues_destroy(scn);
 		scn->scn_is_sorted = B_FALSE;
 
 		if (scn->scn_taskq != NULL) {
 			taskq_destroy(scn->scn_taskq);
 			scn->scn_taskq = NULL;
 		}
 	}
 
 	scn->scn_phys.scn_state = complete ? DSS_FINISHED : DSS_CANCELED;
 
 	if (dsl_scan_restarting(scn, tx))
 		spa_history_log_internal(spa, "scan aborted, restarting", tx,
 		    "errors=%llu", spa_get_errlog_size(spa));
 	else if (!complete)
 		spa_history_log_internal(spa, "scan cancelled", tx,
 		    "errors=%llu", spa_get_errlog_size(spa));
 	else
 		spa_history_log_internal(spa, "scan done", tx,
 		    "errors=%llu", spa_get_errlog_size(spa));
 
 	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
 		spa->spa_scrub_started = B_FALSE;
 		spa->spa_scrub_active = B_FALSE;
 
 		/*
 		 * If the scrub/resilver completed, update all DTLs to
 		 * reflect this.  Whether it succeeded or not, vacate
 		 * all temporary scrub DTLs.
 		 *
 		 * As the scrub does not currently support traversing
 		 * data that have been freed but are part of a checkpoint,
 		 * we don't mark the scrub as done in the DTLs as faults
 		 * may still exist in those vdevs.
 		 */
 		if (complete &&
 		    !spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) {
 			vdev_dtl_reassess(spa->spa_root_vdev, tx->tx_txg,
 			    scn->scn_phys.scn_max_txg, B_TRUE);
 
 			spa_event_notify(spa, NULL, NULL,
 			    scn->scn_phys.scn_min_txg ?
 			    ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH);
 		} else {
 			vdev_dtl_reassess(spa->spa_root_vdev, tx->tx_txg,
 			    0, B_TRUE);
 		}
 		spa_errlog_rotate(spa);
 
 		/*
 		 * We may have finished replacing a device.
 		 * Let the async thread assess this and handle the detach.
 		 */
 		spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
 	}
 
 	scn->scn_phys.scn_end_time = gethrestime_sec();
 
 	ASSERT(!dsl_scan_is_running(scn));
 }
 
 /* ARGSUSED */
 static int
 dsl_scan_cancel_check(void *arg, dmu_tx_t *tx)
 {
 	dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
 
 	if (!dsl_scan_is_running(scn))
 		return (SET_ERROR(ENOENT));
 	return (0);
 }
 
 /* ARGSUSED */
 static void
 dsl_scan_cancel_sync(void *arg, dmu_tx_t *tx)
 {
 	dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
 
 	dsl_scan_done(scn, B_FALSE, tx);
 	dsl_scan_sync_state(scn, tx, SYNC_MANDATORY);
 	spa_event_notify(scn->scn_dp->dp_spa, NULL, NULL, ESC_ZFS_SCRUB_ABORT);
 }
 
 int
 dsl_scan_cancel(dsl_pool_t *dp)
 {
 	return (dsl_sync_task(spa_name(dp->dp_spa), dsl_scan_cancel_check,
 	    dsl_scan_cancel_sync, NULL, 3, ZFS_SPACE_CHECK_RESERVED));
 }
 
 static int
 dsl_scrub_pause_resume_check(void *arg, dmu_tx_t *tx)
 {
 	pool_scrub_cmd_t *cmd = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_scan_t *scn = dp->dp_scan;
 
 	if (*cmd == POOL_SCRUB_PAUSE) {
 		/* can't pause a scrub when there is no in-progress scrub */
 		if (!dsl_scan_scrubbing(dp))
 			return (SET_ERROR(ENOENT));
 
 		/* can't pause a paused scrub */
 		if (dsl_scan_is_paused_scrub(scn))
 			return (SET_ERROR(EBUSY));
 	} else if (*cmd != POOL_SCRUB_NORMAL) {
 		return (SET_ERROR(ENOTSUP));
 	}
 
 	return (0);
 }
 
 static void
 dsl_scrub_pause_resume_sync(void *arg, dmu_tx_t *tx)
 {
 	pool_scrub_cmd_t *cmd = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	spa_t *spa = dp->dp_spa;
 	dsl_scan_t *scn = dp->dp_scan;
 
 	if (*cmd == POOL_SCRUB_PAUSE) {
 		/* can't pause a scrub when there is no in-progress scrub */
 		spa->spa_scan_pass_scrub_pause = gethrestime_sec();
 		scn->scn_phys.scn_flags |= DSF_SCRUB_PAUSED;
 		dsl_scan_sync_state(scn, tx, SYNC_CACHED);
 		spa_event_notify(spa, NULL, NULL, ESC_ZFS_SCRUB_PAUSED);
 	} else {
 		ASSERT3U(*cmd, ==, POOL_SCRUB_NORMAL);
 		if (dsl_scan_is_paused_scrub(scn)) {
 			/*
 			 * We need to keep track of how much time we spend
 			 * paused per pass so that we can adjust the scrub rate
 			 * shown in the output of 'zpool status'
 			 */
 			spa->spa_scan_pass_scrub_spent_paused +=
 			    gethrestime_sec() - spa->spa_scan_pass_scrub_pause;
 			spa->spa_scan_pass_scrub_pause = 0;
 			scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED;
 			dsl_scan_sync_state(scn, tx, SYNC_CACHED);
 		}
 	}
 }
 
 /*
  * Set scrub pause/resume state if it makes sense to do so
  */
 int
 dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd)
 {
 	return (dsl_sync_task(spa_name(dp->dp_spa),
 	    dsl_scrub_pause_resume_check, dsl_scrub_pause_resume_sync, &cmd, 3,
 	    ZFS_SPACE_CHECK_RESERVED));
 }
 
 
 /* start a new scan, or restart an existing one. */
 void
 dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg)
 {
 	if (txg == 0) {
 		dmu_tx_t *tx;
 		tx = dmu_tx_create_dd(dp->dp_mos_dir);
 		VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
 
 		txg = dmu_tx_get_txg(tx);
 		dp->dp_scan->scn_restart_txg = txg;
 		dmu_tx_commit(tx);
 	} else {
 		dp->dp_scan->scn_restart_txg = txg;
 	}
 	zfs_dbgmsg("restarting resilver txg=%llu", txg);
 }
 
 void
 dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bp)
 {
 	zio_free(dp->dp_spa, txg, bp);
 }
 
 void
 dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp)
 {
 	ASSERT(dsl_pool_sync_context(dp));
 	zio_nowait(zio_free_sync(pio, dp->dp_spa, txg, bpp, BP_GET_PSIZE(bpp),
 	    pio->io_flags));
 }
 
 static int
 scan_ds_queue_compare(const void *a, const void *b)
 {
 	const scan_ds_t *sds_a = a, *sds_b = b;
 
 	if (sds_a->sds_dsobj < sds_b->sds_dsobj)
 		return (-1);
 	if (sds_a->sds_dsobj == sds_b->sds_dsobj)
 		return (0);
 	return (1);
 }
 
 static void
 scan_ds_queue_clear(dsl_scan_t *scn)
 {
 	void *cookie = NULL;
 	scan_ds_t *sds;
 	while ((sds = avl_destroy_nodes(&scn->scn_queue, &cookie)) != NULL) {
 		kmem_free(sds, sizeof (*sds));
 	}
 }
 
 static boolean_t
 scan_ds_queue_contains(dsl_scan_t *scn, uint64_t dsobj, uint64_t *txg)
 {
 	scan_ds_t srch, *sds;
 
 	srch.sds_dsobj = dsobj;
 	sds = avl_find(&scn->scn_queue, &srch, NULL);
 	if (sds != NULL && txg != NULL)
 		*txg = sds->sds_txg;
 	return (sds != NULL);
 }
 
 static void
 scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg)
 {
 	scan_ds_t *sds;
 	avl_index_t where;
 
 	sds = kmem_zalloc(sizeof (*sds), KM_SLEEP);
 	sds->sds_dsobj = dsobj;
 	sds->sds_txg = txg;
 
 	VERIFY3P(avl_find(&scn->scn_queue, sds, &where), ==, NULL);
 	avl_insert(&scn->scn_queue, sds, where);
 }
 
 static void
 scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj)
 {
 	scan_ds_t srch, *sds;
 
 	srch.sds_dsobj = dsobj;
 
 	sds = avl_find(&scn->scn_queue, &srch, NULL);
 	VERIFY(sds != NULL);
 	avl_remove(&scn->scn_queue, sds);
 	kmem_free(sds, sizeof (*sds));
 }
 
 static void
 scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = scn->scn_dp;
 	spa_t *spa = dp->dp_spa;
 	dmu_object_type_t ot = (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) ?
 	    DMU_OT_SCAN_QUEUE : DMU_OT_ZAP_OTHER;
 
 	ASSERT0(scn->scn_bytes_pending);
 	ASSERT(scn->scn_phys.scn_queue_obj != 0);
 
 	VERIFY0(dmu_object_free(dp->dp_meta_objset,
 	    scn->scn_phys.scn_queue_obj, tx));
 	scn->scn_phys.scn_queue_obj = zap_create(dp->dp_meta_objset, ot,
 	    DMU_OT_NONE, 0, tx);
 	for (scan_ds_t *sds = avl_first(&scn->scn_queue);
 	    sds != NULL; sds = AVL_NEXT(&scn->scn_queue, sds)) {
 		VERIFY0(zap_add_int_key(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, sds->sds_dsobj,
 		    sds->sds_txg, tx));
 	}
 }
 
 /*
  * Computes the memory limit state that we're currently in. A sorted scan
  * needs quite a bit of memory to hold the sorting queue, so we need to
  * reasonably constrain the size so it doesn't impact overall system
  * performance. We compute two limits:
  * 1) Hard memory limit: if the amount of memory used by the sorting
  *	queues on a pool gets above this value, we stop the metadata
  *	scanning portion and start issuing the queued up and sorted
  *	I/Os to reduce memory usage.
  *	This limit is calculated as a fraction of physmem (by default 5%).
  *	We constrain the lower bound of the hard limit to an absolute
  *	minimum of zfs_scan_mem_lim_min (default: 16 MiB). We also constrain
  *	the upper bound to 5% of the total pool size - no chance we'll
  *	ever need that much memory, but just to keep the value in check.
  * 2) Soft memory limit: once we hit the hard memory limit, we start
  *	issuing I/O to reduce queue memory usage, but we don't want to
  *	completely empty out the queues, since we might be able to find I/Os
  *	that will fill in the gaps of our non-sequential IOs at some point
  *	in the future. So we stop the issuing of I/Os once the amount of
  *	memory used drops below the soft limit (at which point we stop issuing
  *	I/O and start scanning metadata again).
  *
  *	This limit is calculated by subtracting a fraction of the hard
  *	limit from the hard limit. By default this fraction is 5%, so
  *	the soft limit is 95% of the hard limit. We cap the size of the
  *	difference between the hard and soft limits at an absolute
  *	maximum of zfs_scan_mem_lim_soft_max (default: 128 MiB) - this is
  *	sufficient to not cause too frequent switching between the
  *	metadata scan and I/O issue (even at 2k recordsize, 128 MiB's
  *	worth of queues is about 1.2 GiB of on-pool data, so scanning
  *	that should take at least a decent fraction of a second).
  */
 static boolean_t
 dsl_scan_should_clear(dsl_scan_t *scn)
 {
 	vdev_t *rvd = scn->scn_dp->dp_spa->spa_root_vdev;
 	uint64_t mlim_hard, mlim_soft, mused;
 	uint64_t alloc = metaslab_class_get_alloc(spa_normal_class(
 	    scn->scn_dp->dp_spa));
 
 	mlim_hard = MAX((physmem / zfs_scan_mem_lim_fact) * PAGESIZE,
 	    zfs_scan_mem_lim_min);
 	mlim_hard = MIN(mlim_hard, alloc / 20);
 	mlim_soft = mlim_hard - MIN(mlim_hard / zfs_scan_mem_lim_soft_fact,
 	    zfs_scan_mem_lim_soft_max);
 	mused = 0;
 	for (uint64_t i = 0; i < rvd->vdev_children; i++) {
 		vdev_t *tvd = rvd->vdev_child[i];
 		dsl_scan_io_queue_t *queue;
 
 		mutex_enter(&tvd->vdev_scan_io_queue_lock);
 		queue = tvd->vdev_scan_io_queue;
 		if (queue != NULL) {
 			/* #extents in exts_by_size = # in exts_by_addr */
 			mused += avl_numnodes(&queue->q_exts_by_size) *
 			    sizeof (range_seg_t) +
 			    avl_numnodes(&queue->q_sios_by_addr) *
 			    sizeof (scan_io_t);
 		}
 		mutex_exit(&tvd->vdev_scan_io_queue_lock);
 	}
 
 	dprintf("current scan memory usage: %llu bytes\n", (longlong_t)mused);
 
 	if (mused == 0)
 		ASSERT0(scn->scn_bytes_pending);
 
 	/*
 	 * If we are above our hard limit, we need to clear out memory.
 	 * If we are below our soft limit, we need to accumulate sequential IOs.
 	 * Otherwise, we should keep doing whatever we are currently doing.
 	 */
 	if (mused >= mlim_hard)
 		return (B_TRUE);
 	else if (mused < mlim_soft)
 		return (B_FALSE);
 	else
 		return (scn->scn_clearing);
 }
 
 static boolean_t
 dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)
 {
 	/* we never skip user/group accounting objects */
 	if (zb && (int64_t)zb->zb_object < 0)
 		return (B_FALSE);
 
 	if (scn->scn_suspending)
 		return (B_TRUE); /* we're already suspending */
 
 	if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark))
 		return (B_FALSE); /* we're resuming */
 
 	/* We only know how to resume from level-0 blocks. */
 	if (zb && zb->zb_level != 0)
 		return (B_FALSE);
 
 	/*
 	 * We suspend if:
 	 *  - we have scanned for at least the minimum time (default 1 sec
 	 *    for scrub, 3 sec for resilver), and either we have sufficient
 	 *    dirty data that we are starting to write more quickly
 	 *    (default 30%), or someone is explicitly waiting for this txg
 	 *    to complete.
 	 *  or
 	 *  - the spa is shutting down because this pool is being exported
 	 *    or the machine is rebooting.
 	 *  or
 	 *  - the scan queue has reached its memory use limit
 	 */
 	uint64_t elapsed_nanosecs = gethrtime();
 	uint64_t curr_time_ns = gethrtime();
 	uint64_t scan_time_ns = curr_time_ns - scn->scn_sync_start_time;
 	uint64_t sync_time_ns = curr_time_ns -
 	    scn->scn_dp->dp_spa->spa_sync_starttime;
 
 	int dirty_pct = scn->scn_dp->dp_dirty_total * 100 / zfs_dirty_data_max;
 	int mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
 	    zfs_resilver_min_time_ms : zfs_scrub_min_time_ms;
 
 	if ((NSEC2MSEC(scan_time_ns) > mintime &&
             (dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent ||
             txg_sync_waiting(scn->scn_dp) ||
             NSEC2SEC(sync_time_ns) >= zfs_txg_timeout)) ||
             spa_shutting_down(scn->scn_dp->dp_spa) ||
 	    (zfs_scan_strict_mem_lim && dsl_scan_should_clear(scn))) {
 		if (zb) {
 			dprintf("suspending at bookmark %llx/%llx/%llx/%llx\n",
 			    (longlong_t)zb->zb_objset,
 			    (longlong_t)zb->zb_object,
 			    (longlong_t)zb->zb_level,
 			    (longlong_t)zb->zb_blkid);
 			scn->scn_phys.scn_bookmark = *zb;
 		} else {
 			dsl_scan_phys_t *scnp = &scn->scn_phys;
 
 			dprintf("suspending at at DDT bookmark "
 			    "%llx/%llx/%llx/%llx\n",
 			    (longlong_t)scnp->scn_ddt_bookmark.ddb_class,
 			    (longlong_t)scnp->scn_ddt_bookmark.ddb_type,
 			    (longlong_t)scnp->scn_ddt_bookmark.ddb_checksum,
 			    (longlong_t)scnp->scn_ddt_bookmark.ddb_cursor);
 		}
 		scn->scn_suspending = B_TRUE;
 		return (B_TRUE);
 	}
 	return (B_FALSE);
 }
 
 typedef struct zil_scan_arg {
 	dsl_pool_t	*zsa_dp;
 	zil_header_t	*zsa_zh;
 } zil_scan_arg_t;
 
 /* ARGSUSED */
 static int
 dsl_scan_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
 {
 	zil_scan_arg_t *zsa = arg;
 	dsl_pool_t *dp = zsa->zsa_dp;
 	dsl_scan_t *scn = dp->dp_scan;
 	zil_header_t *zh = zsa->zsa_zh;
 	zbookmark_phys_t zb;
 
 	if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
 		return (0);
 
 	/*
 	 * One block ("stubby") can be allocated a long time ago; we
 	 * want to visit that one because it has been allocated
 	 * (on-disk) even if it hasn't been claimed (even though for
 	 * scrub there's nothing to do to it).
 	 */
 	if (claim_txg == 0 && bp->blk_birth >= spa_min_claim_txg(dp->dp_spa))
 		return (0);
 
 	SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
 	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
 
 	VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 dsl_scan_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
 {
 	if (lrc->lrc_txtype == TX_WRITE) {
 		zil_scan_arg_t *zsa = arg;
 		dsl_pool_t *dp = zsa->zsa_dp;
 		dsl_scan_t *scn = dp->dp_scan;
 		zil_header_t *zh = zsa->zsa_zh;
 		lr_write_t *lr = (lr_write_t *)lrc;
 		blkptr_t *bp = &lr->lr_blkptr;
 		zbookmark_phys_t zb;
 
 		if (BP_IS_HOLE(bp) ||
 		    bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
 			return (0);
 
 		/*
 		 * birth can be < claim_txg if this record's txg is
 		 * already txg sync'ed (but this log block contains
 		 * other records that are not synced)
 		 */
 		if (claim_txg == 0 || bp->blk_birth < claim_txg)
 			return (0);
 
 		SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
 		    lr->lr_foid, ZB_ZIL_LEVEL,
 		    lr->lr_offset / BP_GET_LSIZE(bp));
 
 		VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
 	}
 	return (0);
 }
 
 static void
 dsl_scan_zil(dsl_pool_t *dp, zil_header_t *zh)
 {
 	uint64_t claim_txg = zh->zh_claim_txg;
 	zil_scan_arg_t zsa = { dp, zh };
 	zilog_t *zilog;
 
 	ASSERT(spa_writeable(dp->dp_spa));
 
 	/*
 	 * We only want to visit blocks that have been claimed
 	 * but not yet replayed.
 	 */
 	if (claim_txg == 0)
 		return;
 
 	zilog = zil_alloc(dp->dp_meta_objset, zh);
 
 	(void) zil_parse(zilog, dsl_scan_zil_block, dsl_scan_zil_record, &zsa,
 	    claim_txg);
 
 	zil_free(zilog);
 }
 
 /*
  * We compare scan_prefetch_issue_ctx_t's based on their bookmarks. The idea
  * here is to sort the AVL tree by the order each block will be needed.
  */
 static int
 scan_prefetch_queue_compare(const void *a, const void *b)
 {
 	const scan_prefetch_issue_ctx_t *spic_a = a, *spic_b = b;
 	const scan_prefetch_ctx_t *spc_a = spic_a->spic_spc;
 	const scan_prefetch_ctx_t *spc_b = spic_b->spic_spc;
 
 	return (zbookmark_compare(spc_a->spc_datablkszsec,
 	    spc_a->spc_indblkshift, spc_b->spc_datablkszsec,
 	    spc_b->spc_indblkshift, &spic_a->spic_zb, &spic_b->spic_zb));
 }
 
 static void
 scan_prefetch_ctx_rele(scan_prefetch_ctx_t *spc, void *tag)
 {
 	if (refcount_remove(&spc->spc_refcnt, tag) == 0) {
 		refcount_destroy(&spc->spc_refcnt);
 		kmem_free(spc, sizeof (scan_prefetch_ctx_t));
 	}
 }
 
 static scan_prefetch_ctx_t *
 scan_prefetch_ctx_create(dsl_scan_t *scn, dnode_phys_t *dnp, void *tag)
 {
 	scan_prefetch_ctx_t *spc;
 
 	spc = kmem_alloc(sizeof (scan_prefetch_ctx_t), KM_SLEEP);
 	refcount_create(&spc->spc_refcnt);
 	refcount_add(&spc->spc_refcnt, tag);
 	spc->spc_scn = scn;
 	if (dnp != NULL) {
 		spc->spc_datablkszsec = dnp->dn_datablkszsec;
 		spc->spc_indblkshift = dnp->dn_indblkshift;
 		spc->spc_root = B_FALSE;
 	} else {
 		spc->spc_datablkszsec = 0;
 		spc->spc_indblkshift = 0;
 		spc->spc_root = B_TRUE;
 	}
 
 	return (spc);
 }
 
 static void
 scan_prefetch_ctx_add_ref(scan_prefetch_ctx_t *spc, void *tag)
 {
 	refcount_add(&spc->spc_refcnt, tag);
 }
 
 static boolean_t
 dsl_scan_check_prefetch_resume(scan_prefetch_ctx_t *spc,
     const zbookmark_phys_t *zb)
 {
 	zbookmark_phys_t *last_zb = &spc->spc_scn->scn_prefetch_bookmark;
 	dnode_phys_t tmp_dnp;
 	dnode_phys_t *dnp = (spc->spc_root) ? NULL : &tmp_dnp;
 
 	if (zb->zb_objset != last_zb->zb_objset)
 		return (B_TRUE);
 	if ((int64_t)zb->zb_object < 0)
 		return (B_FALSE);
 
 	tmp_dnp.dn_datablkszsec = spc->spc_datablkszsec;
 	tmp_dnp.dn_indblkshift = spc->spc_indblkshift;
 
 	if (zbookmark_subtree_completed(dnp, zb, last_zb))
 		return (B_TRUE);
 
 	return (B_FALSE);
 }
 
 static void
 dsl_scan_prefetch(scan_prefetch_ctx_t *spc, blkptr_t *bp, zbookmark_phys_t *zb)
 {
 	avl_index_t idx;
 	dsl_scan_t *scn = spc->spc_scn;
 	spa_t *spa = scn->scn_dp->dp_spa;
 	scan_prefetch_issue_ctx_t *spic;
 
 	if (zfs_no_scrub_prefetch)
 		return;
 
 	if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_cur_min_txg ||
 	    (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE &&
 	    BP_GET_TYPE(bp) != DMU_OT_OBJSET))
 		return;
 
 	if (dsl_scan_check_prefetch_resume(spc, zb))
 		return;
 
 	scan_prefetch_ctx_add_ref(spc, scn);
 	spic = kmem_alloc(sizeof (scan_prefetch_issue_ctx_t), KM_SLEEP);
 	spic->spic_spc = spc;
 	spic->spic_bp = *bp;
 	spic->spic_zb = *zb;
 
 	/*
 	 * Add the IO to the queue of blocks to prefetch. This allows us to
 	 * prioritize blocks that we will need first for the main traversal
 	 * thread.
 	 */
 	mutex_enter(&spa->spa_scrub_lock);
 	if (avl_find(&scn->scn_prefetch_queue, spic, &idx) != NULL) {
 		/* this block is already queued for prefetch */
 		kmem_free(spic, sizeof (scan_prefetch_issue_ctx_t));
 		scan_prefetch_ctx_rele(spc, scn);
 		mutex_exit(&spa->spa_scrub_lock);
 		return;
 	}
 
 	avl_insert(&scn->scn_prefetch_queue, spic, idx);
 	cv_broadcast(&spa->spa_scrub_io_cv);
 	mutex_exit(&spa->spa_scrub_lock);
 }
 
 static void
 dsl_scan_prefetch_dnode(dsl_scan_t *scn, dnode_phys_t *dnp,
     uint64_t objset, uint64_t object)
 {
 	int i;
 	zbookmark_phys_t zb;
 	scan_prefetch_ctx_t *spc;
 
 	if (dnp->dn_nblkptr == 0 && !(dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR))
 		return;
 
 	SET_BOOKMARK(&zb, objset, object, 0, 0);
 
 	spc = scan_prefetch_ctx_create(scn, dnp, FTAG);
 
 	for (i = 0; i < dnp->dn_nblkptr; i++) {
 		zb.zb_level = BP_GET_LEVEL(&dnp->dn_blkptr[i]);
 		zb.zb_blkid = i;
 		dsl_scan_prefetch(spc, &dnp->dn_blkptr[i], &zb);
 	}
 
 	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
 		zb.zb_level = 0;
 		zb.zb_blkid = DMU_SPILL_BLKID;
 		dsl_scan_prefetch(spc, &dnp->dn_spill, &zb);
 	}
 
 	scan_prefetch_ctx_rele(spc, FTAG);
 }
 
 void
 dsl_scan_prefetch_cb(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
     arc_buf_t *buf, void *private)
 {
 	scan_prefetch_ctx_t *spc = private;
 	dsl_scan_t *scn = spc->spc_scn;
 	spa_t *spa = scn->scn_dp->dp_spa;
 
 	/* broadcast that the IO has completed for rate limitting purposes */
 	mutex_enter(&spa->spa_scrub_lock);
 	ASSERT3U(spa->spa_scrub_inflight, >=, BP_GET_PSIZE(bp));
 	spa->spa_scrub_inflight -= BP_GET_PSIZE(bp);
 	cv_broadcast(&spa->spa_scrub_io_cv);
 	mutex_exit(&spa->spa_scrub_lock);
 
 	/* if there was an error or we are done prefetching, just cleanup */
 	if (buf == NULL || scn->scn_suspending)
 		goto out;
 
 	if (BP_GET_LEVEL(bp) > 0) {
 		int i;
 		blkptr_t *cbp;
 		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
 		zbookmark_phys_t czb;
 
 		for (i = 0, cbp = buf->b_data; i < epb; i++, cbp++) {
 			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
 			    zb->zb_level - 1, zb->zb_blkid * epb + i);
 			dsl_scan_prefetch(spc, cbp, &czb);
 		}
 	} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
 		dnode_phys_t *cdnp = buf->b_data;
 		int i;
 		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
 
 		for (i = 0, cdnp = buf->b_data; i < epb;
 		    i += cdnp->dn_extra_slots + 1,
 		    cdnp += cdnp->dn_extra_slots + 1) {
 			dsl_scan_prefetch_dnode(scn, cdnp,
 			    zb->zb_objset, zb->zb_blkid * epb + i);
 		}
 	} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
 		objset_phys_t *osp = buf->b_data;
 
 		dsl_scan_prefetch_dnode(scn, &osp->os_meta_dnode,
 		    zb->zb_objset, DMU_META_DNODE_OBJECT);
 
 		if (OBJSET_BUF_HAS_USERUSED(buf)) {
 			dsl_scan_prefetch_dnode(scn,
 			    &osp->os_groupused_dnode, zb->zb_objset,
 			    DMU_GROUPUSED_OBJECT);
 			dsl_scan_prefetch_dnode(scn,
 			    &osp->os_userused_dnode, zb->zb_objset,
 			    DMU_USERUSED_OBJECT);
 		}
 	}
 
 out:
 	if (buf != NULL)
 		arc_buf_destroy(buf, private);
 	scan_prefetch_ctx_rele(spc, scn);
 }
 
 /* ARGSUSED */
 static void
 dsl_scan_prefetch_thread(void *arg)
 {
 	dsl_scan_t *scn = arg;
 	spa_t *spa = scn->scn_dp->dp_spa;
 	vdev_t *rvd = spa->spa_root_vdev;
 	uint64_t maxinflight = rvd->vdev_children * zfs_top_maxinflight;
 	scan_prefetch_issue_ctx_t *spic;
 
 	/* loop until we are told to stop */
 	while (!scn->scn_prefetch_stop) {
 		arc_flags_t flags = ARC_FLAG_NOWAIT |
                     ARC_FLAG_PRESCIENT_PREFETCH | ARC_FLAG_PREFETCH;
 		int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD;
 		
 		mutex_enter(&spa->spa_scrub_lock);
 
 		/*
 		 * Wait until we have an IO to issue and are not above our
 		 * maximum in flight limit.
 		 */
 		while (!scn->scn_prefetch_stop &&
 		    (avl_numnodes(&scn->scn_prefetch_queue) == 0 ||
 		    spa->spa_scrub_inflight >= scn->scn_maxinflight_bytes)) {
 			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
 		}
 
 		/* recheck if we should stop since we waited for the cv */
 		if (scn->scn_prefetch_stop) {
 			mutex_exit(&spa->spa_scrub_lock);
 			break;
 		}
 
 		/* remove the prefetch IO from the tree */
 		spic = avl_first(&scn->scn_prefetch_queue);
 		spa->spa_scrub_inflight += BP_GET_PSIZE(&spic->spic_bp);
 		avl_remove(&scn->scn_prefetch_queue, spic);
 
 		mutex_exit(&spa->spa_scrub_lock);
 
 		/* issue the prefetch asynchronously */
 		(void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa,
 		    &spic->spic_bp, dsl_scan_prefetch_cb, spic->spic_spc,
 		    ZIO_PRIORITY_SCRUB, zio_flags, &flags, &spic->spic_zb);
 
 		kmem_free(spic, sizeof (scan_prefetch_issue_ctx_t));
 	}
 
 	ASSERT(scn->scn_prefetch_stop);
 
 	/* free any prefetches we didn't get to complete */
 	mutex_enter(&spa->spa_scrub_lock);
 	while ((spic = avl_first(&scn->scn_prefetch_queue)) != NULL) {
 		avl_remove(&scn->scn_prefetch_queue, spic);
 		scan_prefetch_ctx_rele(spic->spic_spc, scn);
 		kmem_free(spic, sizeof (scan_prefetch_issue_ctx_t));
 	}
 	ASSERT0(avl_numnodes(&scn->scn_prefetch_queue));
 	mutex_exit(&spa->spa_scrub_lock);
 }
 
 static boolean_t
 dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
     const zbookmark_phys_t *zb)
 {
 	/*
 	 * We never skip over user/group accounting objects (obj<0)
 	 */
 	if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark) &&
 	    (int64_t)zb->zb_object >= 0) {
 		/*
 		 * If we already visited this bp & everything below (in
 		 * a prior txg sync), don't bother doing it again.
 		 */
 		if (zbookmark_subtree_completed(dnp, zb,
 		    &scn->scn_phys.scn_bookmark))
 			return (B_TRUE);
 
 		/*
 		 * If we found the block we're trying to resume from, or
 		 * we went past it to a different object, zero it out to
 		 * indicate that it's OK to start checking for suspending
 		 * again.
 		 */
 		if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 ||
 		    zb->zb_object > scn->scn_phys.scn_bookmark.zb_object) {
 			dprintf("resuming at %llx/%llx/%llx/%llx\n",
 			    (longlong_t)zb->zb_objset,
 			    (longlong_t)zb->zb_object,
 			    (longlong_t)zb->zb_level,
 			    (longlong_t)zb->zb_blkid);
 			bzero(&scn->scn_phys.scn_bookmark, sizeof (*zb));
 		}
 	}
 	return (B_FALSE);
 }
 
 static void dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
     dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn,
     dmu_objset_type_t ostype, dmu_tx_t *tx);
 static void dsl_scan_visitdnode(
     dsl_scan_t *, dsl_dataset_t *ds, dmu_objset_type_t ostype,
     dnode_phys_t *dnp, uint64_t object, dmu_tx_t *tx);
 
 /*
  * Return nonzero on i/o error.
  * Return new buf to write out in *bufp.
  */
 static int
 dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
     dnode_phys_t *dnp, const blkptr_t *bp,
     const zbookmark_phys_t *zb, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = scn->scn_dp;
 	int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD;
 	int err;
 
 	if (BP_GET_LEVEL(bp) > 0) {
 		arc_flags_t flags = ARC_FLAG_WAIT;
 		int i;
 		blkptr_t *cbp;
 		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
 		arc_buf_t *buf;
 
 		err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_SCRUB, zio_flags, &flags, zb);
 		if (err) {
 			scn->scn_phys.scn_errors++;
 			return (err);
 		}
 		for (i = 0, cbp = buf->b_data; i < epb; i++, cbp++) {
 			zbookmark_phys_t czb;
 
 			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
 			    zb->zb_level - 1,
 			    zb->zb_blkid * epb + i);
 			dsl_scan_visitbp(cbp, &czb, dnp,
 			    ds, scn, ostype, tx);
 		}
 		arc_buf_destroy(buf, &buf);
 	} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
 		arc_flags_t flags = ARC_FLAG_WAIT;
 		dnode_phys_t *cdnp;
 		int i;
 		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
 		arc_buf_t *buf;
 
 		err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_SCRUB, zio_flags, &flags, zb);
 		if (err) {
 			scn->scn_phys.scn_errors++;
 			return (err);
 		}
 		for (i = 0, cdnp = buf->b_data; i < epb;
 		    i += cdnp->dn_extra_slots + 1,
 		    cdnp += cdnp->dn_extra_slots + 1) {
 			dsl_scan_visitdnode(scn, ds, ostype,
 			    cdnp, zb->zb_blkid * epb + i, tx);
 		}
 
 		arc_buf_destroy(buf, &buf);
 	} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
 		arc_flags_t flags = ARC_FLAG_WAIT;
 		objset_phys_t *osp;
 		arc_buf_t *buf;
 
 		err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_SCRUB, zio_flags, &flags, zb);
 		if (err) {
 			scn->scn_phys.scn_errors++;
 			return (err);
 		}
 
 		osp = buf->b_data;
 
 		dsl_scan_visitdnode(scn, ds, osp->os_type,
 		    &osp->os_meta_dnode, DMU_META_DNODE_OBJECT, tx);
 
 		if (OBJSET_BUF_HAS_USERUSED(buf)) {
 			/*
 			 * We also always visit user/group accounting
 			 * objects, and never skip them, even if we are
 			 * suspending.  This is necessary so that the space
 			 * deltas from this txg get integrated.
 			 */
 			dsl_scan_visitdnode(scn, ds, osp->os_type,
 			    &osp->os_groupused_dnode,
 			    DMU_GROUPUSED_OBJECT, tx);
 			dsl_scan_visitdnode(scn, ds, osp->os_type,
 			    &osp->os_userused_dnode,
 			    DMU_USERUSED_OBJECT, tx);
 		}
 		arc_buf_destroy(buf, &buf);
 	}
 
 	return (0);
 }
 
 static void
 dsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds,
     dmu_objset_type_t ostype, dnode_phys_t *dnp,
     uint64_t object, dmu_tx_t *tx)
 {
 	int j;
 
 	for (j = 0; j < dnp->dn_nblkptr; j++) {
 		zbookmark_phys_t czb;
 
 		SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object,
 		    dnp->dn_nlevels - 1, j);
 		dsl_scan_visitbp(&dnp->dn_blkptr[j],
 		    &czb, dnp, ds, scn, ostype, tx);
 	}
 
 	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
 		zbookmark_phys_t czb;
 		SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object,
 		    0, DMU_SPILL_BLKID);
 		dsl_scan_visitbp(DN_SPILL_BLKPTR(dnp),
 		    &czb, dnp, ds, scn, ostype, tx);
 	}
 }
 
 /*
  * The arguments are in this order because mdb can only print the
  * first 5; we want them to be useful.
  */
 static void
 dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
     dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn,
     dmu_objset_type_t ostype, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = scn->scn_dp;
 	blkptr_t *bp_toread = NULL;
 
 	if (dsl_scan_check_suspend(scn, zb))
 		return;
 
 	if (dsl_scan_check_resume(scn, dnp, zb))
 		return;
 
 	scn->scn_visited_this_txg++;
 
 	dprintf_bp(bp,
 	    "visiting ds=%p/%llu zb=%llx/%llx/%llx/%llx bp=%p",
 	    ds, ds ? ds->ds_object : 0,
 	    zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid,
 	    bp);
 
 	if (BP_IS_HOLE(bp)) {
 		scn->scn_holes_this_txg++;
 		return;
 	}
 
 	if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) {
 		scn->scn_lt_min_this_txg++;
 		return;
 	}
 
 	bp_toread = kmem_alloc(sizeof (blkptr_t), KM_SLEEP);
 	*bp_toread = *bp;
 
 	if (dsl_scan_recurse(scn, ds, ostype, dnp, bp_toread, zb, tx) != 0)
 		return;
 
 	/*
 	 * If dsl_scan_ddt() has already visited this block, it will have
 	 * already done any translations or scrubbing, so don't call the
 	 * callback again.
 	 */
 	if (ddt_class_contains(dp->dp_spa,
 	    scn->scn_phys.scn_ddt_class_max, bp)) {
 		scn->scn_ddt_contained_this_txg++;
 		goto out;
 	}
 
 	/*
 	 * If this block is from the future (after cur_max_txg), then we
 	 * are doing this on behalf of a deleted snapshot, and we will
 	 * revisit the future block on the next pass of this dataset.
 	 * Don't scan it now unless we need to because something
 	 * under it was modified.
 	 */
 	if (BP_PHYSICAL_BIRTH(bp) > scn->scn_phys.scn_cur_max_txg) {
 		scn->scn_gt_max_this_txg++;
 		goto out;
 	}
 
 	scan_funcs[scn->scn_phys.scn_func](dp, bp, zb);
 out:
 	kmem_free(bp_toread, sizeof (blkptr_t));
 }
 
 static void
 dsl_scan_visit_rootbp(dsl_scan_t *scn, dsl_dataset_t *ds, blkptr_t *bp,
     dmu_tx_t *tx)
 {
 	zbookmark_phys_t zb;
 	scan_prefetch_ctx_t *spc;
 
 	SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
 	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 
 	if (ZB_IS_ZERO(&scn->scn_phys.scn_bookmark)) {
 		SET_BOOKMARK(&scn->scn_prefetch_bookmark,
 		    zb.zb_objset, 0, 0, 0);
 	} else {
 		scn->scn_prefetch_bookmark = scn->scn_phys.scn_bookmark;
 	}
 
 	scn->scn_objsets_visited_this_txg++;
 
 	spc = scan_prefetch_ctx_create(scn, NULL, FTAG);
 	dsl_scan_prefetch(spc, bp, &zb);
 	scan_prefetch_ctx_rele(spc, FTAG);
 
 	dsl_scan_visitbp(bp, &zb, NULL, ds, scn, DMU_OST_NONE, tx);
 
 	dprintf_ds(ds, "finished scan%s", "");
 }
 
 static void
 ds_destroyed_scn_phys(dsl_dataset_t *ds, dsl_scan_phys_t *scn_phys)
 {
 	if (scn_phys->scn_bookmark.zb_objset == ds->ds_object) {
 		if (ds->ds_is_snapshot) {
 			/*
 			 * Note:
 			 *  - scn_cur_{min,max}_txg stays the same.
 			 *  - Setting the flag is not really necessary if
 			 *    scn_cur_max_txg == scn_max_txg, because there
 			 *    is nothing after this snapshot that we care
 			 *    about.  However, we set it anyway and then
 			 *    ignore it when we retraverse it in
 			 *    dsl_scan_visitds().
 			 */
 			scn_phys->scn_bookmark.zb_objset =
 			    dsl_dataset_phys(ds)->ds_next_snap_obj;
 			zfs_dbgmsg("destroying ds %llu; currently traversing; "
 			    "reset zb_objset to %llu",
 			    (u_longlong_t)ds->ds_object,
 			    (u_longlong_t)dsl_dataset_phys(ds)->
 			    ds_next_snap_obj);
 			scn_phys->scn_flags |= DSF_VISIT_DS_AGAIN;
 		} else {
 			SET_BOOKMARK(&scn_phys->scn_bookmark,
 			    ZB_DESTROYED_OBJSET, 0, 0, 0);
 			zfs_dbgmsg("destroying ds %llu; currently traversing; "
 			    "reset bookmark to -1,0,0,0",
 			    (u_longlong_t)ds->ds_object);
 		}
 	}
 }
 
 /*
  * Invoked when a dataset is destroyed. We need to make sure that:
  *
  * 1) If it is the dataset that was currently being scanned, we write
  *	a new dsl_scan_phys_t and marking the objset reference in it
  *	as destroyed.
  * 2) Remove it from the work queue, if it was present.
  *
  * If the dataset was actually a snapshot, instead of marking the dataset
  * as destroyed, we instead substitute the next snapshot in line.
  */
 void
 dsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	dsl_scan_t *scn = dp->dp_scan;
 	uint64_t mintxg;
 
 	if (!dsl_scan_is_running(scn))
 		return;
 
 	ds_destroyed_scn_phys(ds, &scn->scn_phys);
 	ds_destroyed_scn_phys(ds, &scn->scn_phys_cached);
 
 	if (scan_ds_queue_contains(scn, ds->ds_object, &mintxg)) {
 		scan_ds_queue_remove(scn, ds->ds_object);
 		if (ds->ds_is_snapshot)
 			scan_ds_queue_insert(scn,
 			    dsl_dataset_phys(ds)->ds_next_snap_obj, mintxg);
 	}
 
 	if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj,
 	    ds->ds_object, &mintxg) == 0) {
 		ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
 		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, ds->ds_object, tx));
 		if (ds->ds_is_snapshot) {
 			/*
 			 * We keep the same mintxg; it could be >
 			 * ds_creation_txg if the previous snapshot was
 			 * deleted too.
 			 */
 			VERIFY(zap_add_int_key(dp->dp_meta_objset,
 			    scn->scn_phys.scn_queue_obj,
 			    dsl_dataset_phys(ds)->ds_next_snap_obj,
 			    mintxg, tx) == 0);
 			zfs_dbgmsg("destroying ds %llu; in queue; "
 			    "replacing with %llu",
 			    (u_longlong_t)ds->ds_object,
 			    (u_longlong_t)dsl_dataset_phys(ds)->
 			    ds_next_snap_obj);
 		} else {
 			zfs_dbgmsg("destroying ds %llu; in queue; removing",
 			    (u_longlong_t)ds->ds_object);
 		}
 	}
 
 	/*
 	 * dsl_scan_sync() should be called after this, and should sync
 	 * out our changed state, but just to be safe, do it here.
 	 */
 	dsl_scan_sync_state(scn, tx, SYNC_CACHED);
 }
 
 static void
 ds_snapshotted_bookmark(dsl_dataset_t *ds, zbookmark_phys_t *scn_bookmark)
 {
 	if (scn_bookmark->zb_objset == ds->ds_object) {
 		scn_bookmark->zb_objset =
 		    dsl_dataset_phys(ds)->ds_prev_snap_obj;
 		zfs_dbgmsg("snapshotting ds %llu; currently traversing; "
 		    "reset zb_objset to %llu",
 		    (u_longlong_t)ds->ds_object,
 		    (u_longlong_t)dsl_dataset_phys(ds)->ds_prev_snap_obj);
 	}
 }
 
 /*
  * Called when a dataset is snapshotted. If we were currently traversing
  * this snapshot, we reset our bookmark to point at the newly created
  * snapshot. We also modify our work queue to remove the old snapshot and
  * replace with the new one.
  */
 void
 dsl_scan_ds_snapshotted(dsl_dataset_t *ds, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	dsl_scan_t *scn = dp->dp_scan;
 	uint64_t mintxg;
 
 	if (!dsl_scan_is_running(scn))
 		return;
 
 	ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0);
 
 	ds_snapshotted_bookmark(ds, &scn->scn_phys.scn_bookmark);
 	ds_snapshotted_bookmark(ds, &scn->scn_phys_cached.scn_bookmark);
 
 	if (scan_ds_queue_contains(scn, ds->ds_object, &mintxg)) {
 		scan_ds_queue_remove(scn, ds->ds_object);
 		scan_ds_queue_insert(scn,
 		    dsl_dataset_phys(ds)->ds_prev_snap_obj, mintxg);
 	}
 
 	if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj,
 	    ds->ds_object, &mintxg) == 0) {
 		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, ds->ds_object, tx));
 		VERIFY(zap_add_int_key(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj,
 		    dsl_dataset_phys(ds)->ds_prev_snap_obj, mintxg, tx) == 0);
 		zfs_dbgmsg("snapshotting ds %llu; in queue; "
 		    "replacing with %llu",
 		    (u_longlong_t)ds->ds_object,
 		    (u_longlong_t)dsl_dataset_phys(ds)->ds_prev_snap_obj);
 	}
 
 	dsl_scan_sync_state(scn, tx, SYNC_CACHED);
 }
 
 static void
 ds_clone_swapped_bookmark(dsl_dataset_t *ds1, dsl_dataset_t *ds2,
     zbookmark_phys_t *scn_bookmark)
 {
 	if (scn_bookmark->zb_objset == ds1->ds_object) {
 		scn_bookmark->zb_objset = ds2->ds_object;
 		zfs_dbgmsg("clone_swap ds %llu; currently traversing; "
 		    "reset zb_objset to %llu",
 		    (u_longlong_t)ds1->ds_object,
 		    (u_longlong_t)ds2->ds_object);
 	} else if (scn_bookmark->zb_objset == ds2->ds_object) {
 		scn_bookmark->zb_objset = ds1->ds_object;
 		zfs_dbgmsg("clone_swap ds %llu; currently traversing; "
 		    "reset zb_objset to %llu",
 		    (u_longlong_t)ds2->ds_object,
 		    (u_longlong_t)ds1->ds_object);
 	}
 }
 
 /*
- * Called when a parent dataset and its clone are swapped. If we were
+ * Called when an origin dataset and its clone are swapped.  If we were
  * currently traversing the dataset, we need to switch to traversing the
- * newly promoted parent.
+ * newly promoted clone.
  */
 void
 dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = ds1->ds_dir->dd_pool;
 	dsl_scan_t *scn = dp->dp_scan;
-	uint64_t mintxg;
+	uint64_t mintxg1, mintxg2;
+	boolean_t ds1_queued, ds2_queued;
 
 	if (!dsl_scan_is_running(scn))
 		return;
 
 	ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys.scn_bookmark);
 	ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys_cached.scn_bookmark);
 
-	if (scan_ds_queue_contains(scn, ds1->ds_object, &mintxg)) {
-		scan_ds_queue_remove(scn, ds1->ds_object);
-		scan_ds_queue_insert(scn, ds2->ds_object, mintxg);
+	/*
+	 * Handle the in-memory scan queue.
+	 */
+	ds1_queued = scan_ds_queue_contains(scn, ds1->ds_object, &mintxg1);
+	ds2_queued = scan_ds_queue_contains(scn, ds2->ds_object, &mintxg2);
+
+	/* Sanity checking. */
+	if (ds1_queued) {
+		ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg);
+		ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg);
 	}
-	if (scan_ds_queue_contains(scn, ds2->ds_object, &mintxg)) {
+	if (ds2_queued) {
+		ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg);
+		ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg);
+	}
+
+	if (ds1_queued && ds2_queued) {
+		/*
+		 * If both are queued, we don't need to do anything.
+		 * The swapping code below would not handle this case correctly,
+		 * since we can't insert ds2 if it is already there. That's
+		 * because scan_ds_queue_insert() prohibits a duplicate insert
+		 * and panics.
+		 */
+	} else if (ds1_queued) {
+		scan_ds_queue_remove(scn, ds1->ds_object);
+		scan_ds_queue_insert(scn, ds2->ds_object, mintxg1);
+	} else if (ds2_queued) {
 		scan_ds_queue_remove(scn, ds2->ds_object);
-		scan_ds_queue_insert(scn, ds1->ds_object, mintxg);
+		scan_ds_queue_insert(scn, ds1->ds_object, mintxg2);
 	}
 
-	if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj,
-	    ds1->ds_object, &mintxg) == 0) {
-		int err;
-		ASSERT3U(mintxg, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg);
-		ASSERT3U(mintxg, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg);
-		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
+	/*
+	 * Handle the on-disk scan queue.
+	 * The on-disk state is an out-of-date version of the in-memory state,
+	 * so the in-memory and on-disk values for ds1_queued and ds2_queued may
+	 * be different. Therefore we need to apply the swap logic to the
+	 * on-disk state independently of the in-memory state.
+	 */
+	ds1_queued = zap_lookup_int_key(dp->dp_meta_objset,
+	    scn->scn_phys.scn_queue_obj, ds1->ds_object, &mintxg1) == 0;
+	ds2_queued = zap_lookup_int_key(dp->dp_meta_objset,
+	    scn->scn_phys.scn_queue_obj, ds2->ds_object, &mintxg2) == 0;
+
+	/* Sanity checking. */
+	if (ds1_queued) {
+		ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg);
+		ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg);
+	}
+	if (ds2_queued) {
+		ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg);
+		ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg);
+	}
+
+	if (ds1_queued && ds2_queued) {
+		/*
+		 * If both are queued, we don't need to do anything.
+		 * Alternatively, we could check for EEXIST from
+		 * zap_add_int_key() and back out to the original state, but
+		 * that would be more work than checking for this case upfront.
+		 */
+	} else if (ds1_queued) {
+		VERIFY3S(0, ==, zap_remove_int(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, ds1->ds_object, tx));
-		err = zap_add_int_key(dp->dp_meta_objset,
-		    scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg, tx);
-		VERIFY(err == 0 || err == EEXIST);
-		if (err == EEXIST) {
-			/* Both were there to begin with */
-			VERIFY(0 == zap_add_int_key(dp->dp_meta_objset,
-			    scn->scn_phys.scn_queue_obj,
-			    ds1->ds_object, mintxg, tx));
-		}
+		VERIFY3S(0, ==, zap_add_int_key(dp->dp_meta_objset,
+		    scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg1, tx));
 		zfs_dbgmsg("clone_swap ds %llu; in queue; "
 		    "replacing with %llu",
 		    (u_longlong_t)ds1->ds_object,
 		    (u_longlong_t)ds2->ds_object);
-	}
-	if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj,
-	    ds2->ds_object, &mintxg) == 0) {
-		ASSERT3U(mintxg, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg);
-		ASSERT3U(mintxg, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg);
-		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
+	} else if (ds2_queued) {
+		VERIFY3S(0, ==, zap_remove_int(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, ds2->ds_object, tx));
-		VERIFY(0 == zap_add_int_key(dp->dp_meta_objset,
-		    scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg, tx));
+		VERIFY3S(0, ==, zap_add_int_key(dp->dp_meta_objset,
+		    scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg2, tx));
 		zfs_dbgmsg("clone_swap ds %llu; in queue; "
 		    "replacing with %llu",
 		    (u_longlong_t)ds2->ds_object,
 		    (u_longlong_t)ds1->ds_object);
 	}
 
 	dsl_scan_sync_state(scn, tx, SYNC_CACHED);
 }
 
 /* ARGSUSED */
 static int
 enqueue_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
 {
 	uint64_t originobj = *(uint64_t *)arg;
 	dsl_dataset_t *ds;
 	int err;
 	dsl_scan_t *scn = dp->dp_scan;
 
 	if (dsl_dir_phys(hds->ds_dir)->dd_origin_obj != originobj)
 		return (0);
 
 	err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
 	if (err)
 		return (err);
 
 	while (dsl_dataset_phys(ds)->ds_prev_snap_obj != originobj) {
 		dsl_dataset_t *prev;
 		err = dsl_dataset_hold_obj(dp,
 		    dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev);
 
 		dsl_dataset_rele(ds, FTAG);
 		if (err)
 			return (err);
 		ds = prev;
 	}
 	scan_ds_queue_insert(scn, ds->ds_object,
 	    dsl_dataset_phys(ds)->ds_prev_snap_txg);
 	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
 
 static void
 dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = scn->scn_dp;
 	dsl_dataset_t *ds;
 
 	VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
 
 	if (scn->scn_phys.scn_cur_min_txg >=
 	    scn->scn_phys.scn_max_txg) {
 		/*
 		 * This can happen if this snapshot was created after the
 		 * scan started, and we already completed a previous snapshot
 		 * that was created after the scan started.  This snapshot
 		 * only references blocks with:
 		 *
 		 *	birth < our ds_creation_txg
 		 *	cur_min_txg is no less than ds_creation_txg.
 		 *	We have already visited these blocks.
 		 * or
 		 *	birth > scn_max_txg
 		 *	The scan requested not to visit these blocks.
 		 *
 		 * Subsequent snapshots (and clones) can reference our
 		 * blocks, or blocks with even higher birth times.
 		 * Therefore we do not need to visit them either,
 		 * so we do not add them to the work queue.
 		 *
 		 * Note that checking for cur_min_txg >= cur_max_txg
 		 * is not sufficient, because in that case we may need to
 		 * visit subsequent snapshots.  This happens when min_txg > 0,
 		 * which raises cur_min_txg.  In this case we will visit
 		 * this dataset but skip all of its blocks, because the
 		 * rootbp's birth time is < cur_min_txg.  Then we will
 		 * add the next snapshots/clones to the work queue.
 		 */
 		char *dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 		dsl_dataset_name(ds, dsname);
 		zfs_dbgmsg("scanning dataset %llu (%s) is unnecessary because "
 		    "cur_min_txg (%llu) >= max_txg (%llu)",
 		    (longlong_t)dsobj, dsname,
 		    (longlong_t)scn->scn_phys.scn_cur_min_txg,
 		    (longlong_t)scn->scn_phys.scn_max_txg);
 		kmem_free(dsname, MAXNAMELEN);
 
 		goto out;
 	}
 
 	/*
 	 * Only the ZIL in the head (non-snapshot) is valid. Even though
 	 * snapshots can have ZIL block pointers (which may be the same
 	 * BP as in the head), they must be ignored. In addition, $ORIGIN
 	 * doesn't have a objset (i.e. its ds_bp is a hole) so we don't
 	 * need to look for a ZIL in it either. So we traverse the ZIL here,
 	 * rather than in scan_recurse(), because the regular snapshot
 	 * block-sharing rules don't apply to it.
 	 */
 	if (DSL_SCAN_IS_SCRUB_RESILVER(scn) && !dsl_dataset_is_snapshot(ds) &&
 	    (dp->dp_origin_snap == NULL ||
 	    ds->ds_dir != dp->dp_origin_snap->ds_dir)) {
 		objset_t *os;
 		if (dmu_objset_from_ds(ds, &os) != 0) {
 			goto out;
 		}
 		dsl_scan_zil(dp, &os->os_zil_header);
 	}
 
 	/*
 	 * Iterate over the bps in this ds.
 	 */
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
 	dsl_scan_visit_rootbp(scn, ds, &dsl_dataset_phys(ds)->ds_bp, tx);
 	rrw_exit(&ds->ds_bp_rwlock, FTAG);
 
 	char *dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	dsl_dataset_name(ds, dsname);
 	zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; "
 	    "suspending=%u",
 	    (longlong_t)dsobj, dsname,
 	    (longlong_t)scn->scn_phys.scn_cur_min_txg,
 	    (longlong_t)scn->scn_phys.scn_cur_max_txg,
 	    (int)scn->scn_suspending);
 	kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
 
 	if (scn->scn_suspending)
 		goto out;
 
 	/*
 	 * We've finished this pass over this dataset.
 	 */
 
 	/*
 	 * If we did not completely visit this dataset, do another pass.
 	 */
 	if (scn->scn_phys.scn_flags & DSF_VISIT_DS_AGAIN) {
 		zfs_dbgmsg("incomplete pass; visiting again");
 		scn->scn_phys.scn_flags &= ~DSF_VISIT_DS_AGAIN;
 		scan_ds_queue_insert(scn, ds->ds_object,
 		    scn->scn_phys.scn_cur_max_txg);
 		goto out;
 	}
 
 	/*
 	 * Add descendent datasets to work queue.
 	 */
 	if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) {
 		scan_ds_queue_insert(scn,
 		    dsl_dataset_phys(ds)->ds_next_snap_obj,
 		    dsl_dataset_phys(ds)->ds_creation_txg);
 	}
 	if (dsl_dataset_phys(ds)->ds_num_children > 1) {
 		boolean_t usenext = B_FALSE;
 		if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) {
 			uint64_t count;
 			/*
 			 * A bug in a previous version of the code could
 			 * cause upgrade_clones_cb() to not set
 			 * ds_next_snap_obj when it should, leading to a
 			 * missing entry.  Therefore we can only use the
 			 * next_clones_obj when its count is correct.
 			 */
 			int err = zap_count(dp->dp_meta_objset,
 			    dsl_dataset_phys(ds)->ds_next_clones_obj, &count);
 			if (err == 0 &&
 			    count == dsl_dataset_phys(ds)->ds_num_children - 1)
 				usenext = B_TRUE;
 		}
 
 		if (usenext) {
 			zap_cursor_t zc;
 			zap_attribute_t za;
 			for (zap_cursor_init(&zc, dp->dp_meta_objset,
 			    dsl_dataset_phys(ds)->ds_next_clones_obj);
 			    zap_cursor_retrieve(&zc, &za) == 0;
 			    (void) zap_cursor_advance(&zc)) {
 				scan_ds_queue_insert(scn,
 				    zfs_strtonum(za.za_name, NULL),
 				    dsl_dataset_phys(ds)->ds_creation_txg);
 			}
 			zap_cursor_fini(&zc);
 		} else {
 			VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
 			    enqueue_clones_cb, &ds->ds_object,
 			    DS_FIND_CHILDREN));
 		}
 	}
 
 out:
 	dsl_dataset_rele(ds, FTAG);
 }
 
 /* ARGSUSED */
 static int
 enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
 {
 	dsl_dataset_t *ds;
 	int err;
 	dsl_scan_t *scn = dp->dp_scan;
 
 	err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
 	if (err)
 		return (err);
 
 	while (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
 		dsl_dataset_t *prev;
 		err = dsl_dataset_hold_obj(dp,
 		    dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev);
 		if (err) {
 			dsl_dataset_rele(ds, FTAG);
 			return (err);
 		}
 
 		/*
 		 * If this is a clone, we don't need to worry about it for now.
 		 */
 		if (dsl_dataset_phys(prev)->ds_next_snap_obj != ds->ds_object) {
 			dsl_dataset_rele(ds, FTAG);
 			dsl_dataset_rele(prev, FTAG);
 			return (0);
 		}
 		dsl_dataset_rele(ds, FTAG);
 		ds = prev;
 	}
 
 	scan_ds_queue_insert(scn, ds->ds_object,
 	    dsl_dataset_phys(ds)->ds_prev_snap_txg);
 	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
 
 /* ARGSUSED */
 void
 dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
     ddt_entry_t *dde, dmu_tx_t *tx)
 {
 	const ddt_key_t *ddk = &dde->dde_key;
 	ddt_phys_t *ddp = dde->dde_phys;
 	blkptr_t bp;
 	zbookmark_phys_t zb = { 0 };
 	int p;
 
 	if (scn->scn_phys.scn_state != DSS_SCANNING)
 		return;
 
 	for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
 		if (ddp->ddp_phys_birth == 0 ||
 		    ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg)
 			continue;
 		ddt_bp_create(checksum, ddk, ddp, &bp);
 
 		scn->scn_visited_this_txg++;
 		scan_funcs[scn->scn_phys.scn_func](scn->scn_dp, &bp, &zb);
 	}
 }
 
 /*
  * Scrub/dedup interaction.
  *
  * If there are N references to a deduped block, we don't want to scrub it
  * N times -- ideally, we should scrub it exactly once.
  *
  * We leverage the fact that the dde's replication class (enum ddt_class)
  * is ordered from highest replication class (DDT_CLASS_DITTO) to lowest
  * (DDT_CLASS_UNIQUE) so that we may walk the DDT in that order.
  *
  * To prevent excess scrubbing, the scrub begins by walking the DDT
  * to find all blocks with refcnt > 1, and scrubs each of these once.
  * Since there are two replication classes which contain blocks with
  * refcnt > 1, we scrub the highest replication class (DDT_CLASS_DITTO) first.
  * Finally the top-down scrub begins, only visiting blocks with refcnt == 1.
  *
  * There would be nothing more to say if a block's refcnt couldn't change
  * during a scrub, but of course it can so we must account for changes
  * in a block's replication class.
  *
  * Here's an example of what can occur:
  *
  * If a block has refcnt > 1 during the DDT scrub phase, but has refcnt == 1
  * when visited during the top-down scrub phase, it will be scrubbed twice.
  * This negates our scrub optimization, but is otherwise harmless.
  *
  * If a block has refcnt == 1 during the DDT scrub phase, but has refcnt > 1
  * on each visit during the top-down scrub phase, it will never be scrubbed.
  * To catch this, ddt_sync_entry() notifies the scrub code whenever a block's
  * reference class transitions to a higher level (i.e DDT_CLASS_UNIQUE to
  * DDT_CLASS_DUPLICATE); if it transitions from refcnt == 1 to refcnt > 1
  * while a scrub is in progress, it scrubs the block right then.
  */
 static void
 dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx)
 {
 	ddt_bookmark_t *ddb = &scn->scn_phys.scn_ddt_bookmark;
 	ddt_entry_t dde = { 0 };
 	int error;
 	uint64_t n = 0;
 
 	while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &dde)) == 0) {
 		ddt_t *ddt;
 
 		if (ddb->ddb_class > scn->scn_phys.scn_ddt_class_max)
 			break;
 		dprintf("visiting ddb=%llu/%llu/%llu/%llx\n",
 		    (longlong_t)ddb->ddb_class,
 		    (longlong_t)ddb->ddb_type,
 		    (longlong_t)ddb->ddb_checksum,
 		    (longlong_t)ddb->ddb_cursor);
 
 		/* There should be no pending changes to the dedup table */
 		ddt = scn->scn_dp->dp_spa->spa_ddt[ddb->ddb_checksum];
 		ASSERT(avl_first(&ddt->ddt_tree) == NULL);
 
 		dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx);
 		n++;
 
 		if (dsl_scan_check_suspend(scn, NULL))
 			break;
 	}
 
 	zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; "
 	    "suspending=%u", (longlong_t)n,
 	    (int)scn->scn_phys.scn_ddt_class_max, (int)scn->scn_suspending);
 
 	ASSERT(error == 0 || error == ENOENT);
 	ASSERT(error != ENOENT ||
 	    ddb->ddb_class > scn->scn_phys.scn_ddt_class_max);
 }
 
 static uint64_t
 dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
 {
 	uint64_t smt = ds->ds_dir->dd_pool->dp_scan->scn_phys.scn_max_txg;
 	if (ds->ds_is_snapshot)
 		return (MIN(smt, dsl_dataset_phys(ds)->ds_creation_txg));
 	return (smt);
 }
 
 static void
 dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
 {
 	scan_ds_t *sds;
 	dsl_pool_t *dp = scn->scn_dp;
 
 	if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
 	    scn->scn_phys.scn_ddt_class_max) {
 		scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
 		scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
 		dsl_scan_ddt(scn, tx);
 		if (scn->scn_suspending)
 			return;
 	}
 
 	if (scn->scn_phys.scn_bookmark.zb_objset == DMU_META_OBJSET) {
 		/* First do the MOS & ORIGIN */
 
 		scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
 		scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
 		dsl_scan_visit_rootbp(scn, NULL,
 		    &dp->dp_meta_rootbp, tx);
 		spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
 		if (scn->scn_suspending)
 			return;
 
 		if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
 			VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
 			    enqueue_cb, NULL, DS_FIND_CHILDREN));
 		} else {
 			dsl_scan_visitds(scn,
 			    dp->dp_origin_snap->ds_object, tx);
 		}
 		ASSERT(!scn->scn_suspending);
 	} else if (scn->scn_phys.scn_bookmark.zb_objset !=
 	    ZB_DESTROYED_OBJSET) {
 		uint64_t dsobj = scn->scn_phys.scn_bookmark.zb_objset;
 		/*
 		 * If we were suspended, continue from here. Note if the
 		 * ds we were suspended on was deleted, the zb_objset may
 		 * be -1, so we will skip this and find a new objset
 		 * below.
 		 */
 		dsl_scan_visitds(scn, dsobj, tx);
 		if (scn->scn_suspending)
 			return;
 	}
 
 	/*
 	 * In case we suspended right at the end of the ds, zero the
 	 * bookmark so we don't think that we're still trying to resume.
 	 */
 	bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_phys_t));
 
 	/*
 	 * Keep pulling things out of the dataset avl queue. Updates to the
 	 * persistent zap-object-as-queue happen only at checkpoints.
 	 */
 	while ((sds = avl_first(&scn->scn_queue)) != NULL) {
 		dsl_dataset_t *ds;
 		uint64_t dsobj = sds->sds_dsobj;
 		uint64_t txg = sds->sds_txg;
 
 		/* dequeue and free the ds from the queue */
 		scan_ds_queue_remove(scn, dsobj);
 		sds = NULL;	/* must not be touched after removal */
 
 		/* Set up min / max txg */
 		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
 		if (txg != 0) {
 			scn->scn_phys.scn_cur_min_txg =
 			    MAX(scn->scn_phys.scn_min_txg, txg);
 		} else {
 			scn->scn_phys.scn_cur_min_txg =
 			    MAX(scn->scn_phys.scn_min_txg,
 			    dsl_dataset_phys(ds)->ds_prev_snap_txg);
 		}
 		scn->scn_phys.scn_cur_max_txg = dsl_scan_ds_maxtxg(ds);
 		dsl_dataset_rele(ds, FTAG);
 
 		dsl_scan_visitds(scn, dsobj, tx);
 		if (scn->scn_suspending)
 			return;
 	}
 	/* No more objsets to fetch, we're done */
 	scn->scn_phys.scn_bookmark.zb_objset = ZB_DESTROYED_OBJSET;
 	ASSERT0(scn->scn_suspending);
 }
 
 static uint64_t
 dsl_scan_count_leaves(vdev_t *vd)
 {
 	uint64_t i, leaves = 0;
 	
 	/* we only count leaves that belong to the main pool and are readable */
 	if (vd->vdev_islog || vd->vdev_isspare ||
 	    vd->vdev_isl2cache || !vdev_readable(vd))
 		return (0);
 	
 	if (vd->vdev_ops->vdev_op_leaf)
 		return (1);
 	
 	for (i = 0; i < vd->vdev_children; i++) {
 		leaves += dsl_scan_count_leaves(vd->vdev_child[i]);
 	}
 	
 	return (leaves);
 }
 
 
 static void
 scan_io_queues_update_zio_stats(dsl_scan_io_queue_t *q, const blkptr_t *bp)
 {
 	int i;
 	uint64_t cur_size = 0;
 
 	for (i = 0; i < BP_GET_NDVAS(bp); i++) {
 		cur_size += DVA_GET_ASIZE(&bp->blk_dva[i]);
 	}
 
 	q->q_total_zio_size_this_txg += cur_size;
 	q->q_zios_this_txg++;
 }
 
 static void
 scan_io_queues_update_seg_stats(dsl_scan_io_queue_t *q, uint64_t start,
     uint64_t end)
 {
 	q->q_total_seg_size_this_txg += end - start;
 	q->q_segs_this_txg++;
 }
 
 static boolean_t
 scan_io_queue_check_suspend(dsl_scan_t *scn)
 {
 	/* See comment in dsl_scan_check_suspend() */
 	uint64_t curr_time_ns = gethrtime();
 	uint64_t scan_time_ns = curr_time_ns - scn->scn_sync_start_time;
 	uint64_t sync_time_ns = curr_time_ns -
 	    scn->scn_dp->dp_spa->spa_sync_starttime;
 	int dirty_pct = scn->scn_dp->dp_dirty_total * 100 / zfs_dirty_data_max;
 	int mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
 	    zfs_resilver_min_time_ms : zfs_scrub_min_time_ms;
        
 	return ((NSEC2MSEC(scan_time_ns) > mintime &&
 	    (dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent ||
 	    txg_sync_waiting(scn->scn_dp) ||
 	    NSEC2SEC(sync_time_ns) >= zfs_txg_timeout)) ||
 	    spa_shutting_down(scn->scn_dp->dp_spa));
 }
 
 /*
  * Given a list of scan_io_t's in io_list, this issues the io's out to
  * disk. This consumes the io_list and frees the scan_io_t's. This is
  * called when emptying queues, either when we're up against the memory
  * limit or when we have finished scanning. Returns B_TRUE if we stopped
  * processing the list before we finished. Any zios that were not issued
  * will remain in the io_list.
  */
 static boolean_t
 scan_io_queue_issue(dsl_scan_io_queue_t *queue, list_t *io_list)
 {
 	dsl_scan_t *scn = queue->q_scn;
 	scan_io_t *sio;
 	int64_t bytes_issued = 0;
 	boolean_t suspended = B_FALSE;
 
 	while ((sio = list_head(io_list)) != NULL) {
 		blkptr_t bp;
 
 		if (scan_io_queue_check_suspend(scn)) {
 			suspended = B_TRUE;
 			break;
 		}
 
 		sio2bp(sio, &bp, queue->q_vd->vdev_id);
 		bytes_issued += sio->sio_asize;
 		scan_exec_io(scn->scn_dp, &bp, sio->sio_flags,
 		    &sio->sio_zb, queue);
 		(void) list_remove_head(io_list);
 		scan_io_queues_update_zio_stats(queue, &bp);
 		kmem_free(sio, sizeof (*sio));
 	}
 
 	atomic_add_64(&scn->scn_bytes_pending, -bytes_issued);
 
 	return (suspended);
 }
 
 /*
  * Given a range_seg_t (extent) and a list, this function passes over a
  * scan queue and gathers up the appropriate ios which fit into that
  * scan seg (starting from lowest LBA). At the end, we remove the segment
  * from the q_exts_by_addr range tree.
  */
 static boolean_t
 scan_io_queue_gather(dsl_scan_io_queue_t *queue, range_seg_t *rs, list_t *list)
 {
 	scan_io_t srch_sio, *sio, *next_sio;
 	avl_index_t idx;
 	uint_t num_sios = 0;
 	int64_t bytes_issued = 0;
 
 	ASSERT(rs != NULL);
 	ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock));
 
 	srch_sio.sio_offset = rs->rs_start;
 
 	/*
 	 * The exact start of the extent might not contain any matching zios,
 	 * so if that's the case, examine the next one in the tree.
 	 */
 	sio = avl_find(&queue->q_sios_by_addr, &srch_sio, &idx);
 	if (sio == NULL)
 		sio = avl_nearest(&queue->q_sios_by_addr, idx, AVL_AFTER);
 
 	while (sio != NULL && sio->sio_offset < rs->rs_end && num_sios <= 32) {
 		ASSERT3U(sio->sio_offset, >=, rs->rs_start);
 		ASSERT3U(sio->sio_offset + sio->sio_asize, <=, rs->rs_end);
 
 		next_sio = AVL_NEXT(&queue->q_sios_by_addr, sio);
 		avl_remove(&queue->q_sios_by_addr, sio);
 
 		bytes_issued += sio->sio_asize;
 		num_sios++;
 		list_insert_tail(list, sio);
 		sio = next_sio;
 	}
 
 	/*
 	 * We limit the number of sios we process at once to 32 to avoid
 	 * biting off more than we can chew. If we didn't take everything
 	 * in the segment we update it to reflect the work we were able to
 	 * complete. Otherwise, we remove it from the range tree entirely.
 	 */
 	if (sio != NULL && sio->sio_offset < rs->rs_end) {
 		range_tree_adjust_fill(queue->q_exts_by_addr, rs,
 		    -bytes_issued);
 		range_tree_resize_segment(queue->q_exts_by_addr, rs,
 		    sio->sio_offset, rs->rs_end - sio->sio_offset);
 
 		return (B_TRUE);
 	} else {
 		range_tree_remove(queue->q_exts_by_addr, rs->rs_start,
 		    rs->rs_end - rs->rs_start);
 		return (B_FALSE);
 	}
 }
 
 
 /*
  * This is called from the queue emptying thread and selects the next
  * extent from which we are to issue io's. The behavior of this function
  * depends on the state of the scan, the current memory consumption and
  * whether or not we are performing a scan shutdown.
  * 1) We select extents in an elevator algorithm (LBA-order) if the scan
  * 	needs to perform a checkpoint
  * 2) We select the largest available extent if we are up against the
  * 	memory limit.
  * 3) Otherwise we don't select any extents.
  */
 static const range_seg_t *
 scan_io_queue_fetch_ext(dsl_scan_io_queue_t *queue)
 {
 	dsl_scan_t *scn = queue->q_scn;
 
 	ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock));
 	ASSERT(scn->scn_is_sorted);
 
 	/* handle tunable overrides */
 	if (scn->scn_checkpointing || scn->scn_clearing) {
 		if (zfs_scan_issue_strategy == 1) {
 			return (range_tree_first(queue->q_exts_by_addr));
 		} else if (zfs_scan_issue_strategy == 2) {
 			return (avl_first(&queue->q_exts_by_size));
 		}
 	}
 
 	/*
 	 * During normal clearing, we want to issue our largest segments
 	 * first, keeping IO as sequential as possible, and leaving the
 	 * smaller extents for later with the hope that they might eventually
 	 * grow to larger sequential segments. However, when the scan is
 	 * checkpointing, no new extents will be added to the sorting queue,
 	 * so the way we are sorted now is as good as it will ever get.
 	 * In this case, we instead switch to issuing extents in LBA order.
 	 */
 	if (scn->scn_checkpointing) {
 		return (range_tree_first(queue->q_exts_by_addr));
 	} else if (scn->scn_clearing) {
 		return (avl_first(&queue->q_exts_by_size));
 	} else {
 		return (NULL);
 	}
 }
 
 static void
 scan_io_queues_run_one(void *arg)
 {
 	dsl_scan_io_queue_t *queue = arg;
 	kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
 	boolean_t suspended = B_FALSE;
 	range_seg_t *rs = NULL;
 	scan_io_t *sio = NULL;
 	list_t sio_list;
 	uint64_t bytes_per_leaf = zfs_scan_vdev_limit;
 	uint64_t nr_leaves = dsl_scan_count_leaves(queue->q_vd);
 
 	ASSERT(queue->q_scn->scn_is_sorted);
 
 	list_create(&sio_list, sizeof (scan_io_t),
 	    offsetof(scan_io_t, sio_nodes.sio_list_node));
 	mutex_enter(q_lock);
 
 	/* calculate maximum in-flight bytes for this txg (min 1MB) */
 	queue->q_maxinflight_bytes =
 	    MAX(nr_leaves * bytes_per_leaf, 1ULL << 20);
 
 	/* reset per-queue scan statistics for this txg */
 	queue->q_total_seg_size_this_txg = 0;
 	queue->q_segs_this_txg = 0;
 	queue->q_total_zio_size_this_txg = 0;
 	queue->q_zios_this_txg = 0;
 
 	/* loop until we have run out of time or sios */
 	while ((rs = (range_seg_t*)scan_io_queue_fetch_ext(queue)) != NULL) {
 		uint64_t seg_start = 0, seg_end = 0;
 		boolean_t more_left = B_TRUE;
 
 		ASSERT(list_is_empty(&sio_list));
 
 		/* loop while we still have sios left to process in this rs */
 		while (more_left) {
 			scan_io_t *first_sio, *last_sio;
 
 			/*
 			 * We have selected which extent needs to be
 			 * processed next. Gather up the corresponding sios.
 			 */
 			more_left = scan_io_queue_gather(queue, rs, &sio_list);
 			ASSERT(!list_is_empty(&sio_list));
 			first_sio = list_head(&sio_list);
 			last_sio = list_tail(&sio_list);
 
 			seg_end = last_sio->sio_offset + last_sio->sio_asize;
 			if (seg_start == 0)
 				seg_start = first_sio->sio_offset;
 
 			/*
 			 * Issuing sios can take a long time so drop the
 			 * queue lock. The sio queue won't be updated by
 			 * other threads since we're in syncing context so
 			 * we can be sure that our trees will remain exactly
 			 * as we left them.
 			 */
 			mutex_exit(q_lock);
 			suspended = scan_io_queue_issue(queue, &sio_list);
 			mutex_enter(q_lock);
 
 			if (suspended)
 				break;
 		}
 		/* update statistics for debugging purposes */
 		scan_io_queues_update_seg_stats(queue, seg_start, seg_end);
 		
 		if (suspended)
 			break;
 	}
 		
 
 	/* If we were suspended in the middle of processing,
 	 * requeue any unfinished sios and exit.
 	 */
 	while ((sio = list_head(&sio_list)) != NULL) {
 		list_remove(&sio_list, sio);
 		scan_io_queue_insert_impl(queue, sio);
 	}
 
 	mutex_exit(q_lock);
 	list_destroy(&sio_list);
 }
 
 /*
  * Performs an emptying run on all scan queues in the pool. This just
  * punches out one thread per top-level vdev, each of which processes
  * only that vdev's scan queue. We can parallelize the I/O here because
  * we know that each queue's io's only affect its own top-level vdev.
  *
  * This function waits for the queue runs to complete, and must be
  * called from dsl_scan_sync (or in general, syncing context).
  */
 static void
 scan_io_queues_run(dsl_scan_t *scn)
 {
 	spa_t *spa = scn->scn_dp->dp_spa;
 
 	ASSERT(scn->scn_is_sorted);
 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
 
 	if (scn->scn_bytes_pending == 0)
 		return;
 
 	if (scn->scn_taskq == NULL) {
 		char *tq_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN + 16,
 		    KM_SLEEP);
 		int nthreads = spa->spa_root_vdev->vdev_children;
 
 		/*
 		 * We need to make this taskq *always* execute as many
 		 * threads in parallel as we have top-level vdevs and no
 		 * less, otherwise strange serialization of the calls to
 		 * scan_io_queues_run_one can occur during spa_sync runs
 		 * and that significantly impacts performance.
 		 */
 		(void) snprintf(tq_name, ZFS_MAX_DATASET_NAME_LEN + 16,
 		    "dsl_scan_tq_%s", spa->spa_name);
 		scn->scn_taskq = taskq_create(tq_name, nthreads, minclsyspri,
 		    nthreads, nthreads, TASKQ_PREPOPULATE);
 		kmem_free(tq_name, ZFS_MAX_DATASET_NAME_LEN + 16);
 	}
 
 	for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) {
 		vdev_t *vd = spa->spa_root_vdev->vdev_child[i];
 
 		mutex_enter(&vd->vdev_scan_io_queue_lock);
 		if (vd->vdev_scan_io_queue != NULL) {
 			VERIFY(taskq_dispatch(scn->scn_taskq,
 			    scan_io_queues_run_one, vd->vdev_scan_io_queue,
 			    TQ_SLEEP) != TASKQID_INVALID);
 		}
 		mutex_exit(&vd->vdev_scan_io_queue_lock);
 	}
 
 	/*
 	 * Wait for the queues to finish issuing thir IOs for this run
 	 * before we return. There may still be IOs in flight at this
 	 * point.
 	 */
 	taskq_wait(scn->scn_taskq);
 }
 
 static boolean_t
 dsl_scan_async_block_should_pause(dsl_scan_t *scn)
 {
 	uint64_t elapsed_nanosecs;
 
 	if (zfs_recover)
 		return (B_FALSE);
 
 	if (scn->scn_visited_this_txg >= zfs_async_block_max_blocks)
 		return (B_TRUE);
 
 	elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
 	return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
 	    (NSEC2MSEC(elapsed_nanosecs) > scn->scn_async_block_min_time_ms &&
 	    txg_sync_waiting(scn->scn_dp)) ||
 	    spa_shutting_down(scn->scn_dp->dp_spa));
 }
 
 static int
 dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	dsl_scan_t *scn = arg;
 
 	if (!scn->scn_is_bptree ||
 	    (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) {
 		if (dsl_scan_async_block_should_pause(scn))
 			return (SET_ERROR(ERESTART));
 	}
 
 	zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa,
 	    dmu_tx_get_txg(tx), bp, BP_GET_PSIZE(bp), 0));
 	dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, DD_USED_HEAD,
 	    -bp_get_dsize_sync(scn->scn_dp->dp_spa, bp),
 	    -BP_GET_PSIZE(bp), -BP_GET_UCSIZE(bp), tx);
 	scn->scn_visited_this_txg++;
 	return (0);
 }
 
 static void
 dsl_scan_update_stats(dsl_scan_t *scn)
 {
 	spa_t *spa = scn->scn_dp->dp_spa;
 	uint64_t i;
 	uint64_t seg_size_total = 0, zio_size_total = 0;
 	uint64_t seg_count_total = 0, zio_count_total = 0;
 
 	for (i = 0; i < spa->spa_root_vdev->vdev_children; i++) {
 		vdev_t *vd = spa->spa_root_vdev->vdev_child[i];
 		dsl_scan_io_queue_t *queue = vd->vdev_scan_io_queue;
 
 		if (queue == NULL)
 			continue;
 
 		seg_size_total += queue->q_total_seg_size_this_txg;
 		zio_size_total += queue->q_total_zio_size_this_txg;
 		seg_count_total += queue->q_segs_this_txg;
 		zio_count_total += queue->q_zios_this_txg;
 	}
 
 	if (seg_count_total == 0 || zio_count_total == 0) {
 		scn->scn_avg_seg_size_this_txg = 0;
 		scn->scn_avg_zio_size_this_txg = 0;
 		scn->scn_segs_this_txg = 0;
 		scn->scn_zios_this_txg = 0;
 		return;
 	}
 
 	scn->scn_avg_seg_size_this_txg = seg_size_total / seg_count_total;
 	scn->scn_avg_zio_size_this_txg = zio_size_total / zio_count_total;
 	scn->scn_segs_this_txg = seg_count_total;
 	scn->scn_zios_this_txg = zio_count_total;
 }
 
 static int
 dsl_scan_obsolete_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	dsl_scan_t *scn = arg;
 	const dva_t *dva = &bp->blk_dva[0];
 
 	if (dsl_scan_async_block_should_pause(scn))
 		return (SET_ERROR(ERESTART));
 
 	spa_vdev_indirect_mark_obsolete(scn->scn_dp->dp_spa,
 	    DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva),
 	    DVA_GET_ASIZE(dva), tx);
 	scn->scn_visited_this_txg++;
 	return (0);
 }
 
 boolean_t
 dsl_scan_active(dsl_scan_t *scn)
 {
 	spa_t *spa = scn->scn_dp->dp_spa;
 	uint64_t used = 0, comp, uncomp;
 
 	if (spa->spa_load_state != SPA_LOAD_NONE)
 		return (B_FALSE);
 	if (spa_shutting_down(spa))
 		return (B_FALSE);
 	if ((dsl_scan_is_running(scn) && !dsl_scan_is_paused_scrub(scn)) ||
 	    (scn->scn_async_destroying && !scn->scn_async_stalled))
 		return (B_TRUE);
 
 	if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
 		(void) bpobj_space(&scn->scn_dp->dp_free_bpobj,
 		    &used, &comp, &uncomp);
 	}
 	return (used != 0);
 }
 
 static boolean_t
 dsl_scan_need_resilver(spa_t *spa, const dva_t *dva, size_t psize,
     uint64_t phys_birth)
 {
 	vdev_t *vd;
 
 	vd = vdev_lookup_top(spa, DVA_GET_VDEV(dva));
 
 	if (vd->vdev_ops == &vdev_indirect_ops) {
 		/*
 		 * The indirect vdev can point to multiple
 		 * vdevs.  For simplicity, always create
 		 * the resilver zio_t. zio_vdev_io_start()
 		 * will bypass the child resilver i/o's if
 		 * they are on vdevs that don't have DTL's.
 		 */
 		return (B_TRUE);
 	}
 
 	if (DVA_GET_GANG(dva)) {
 		/*
 		 * Gang members may be spread across multiple
 		 * vdevs, so the best estimate we have is the
 		 * scrub range, which has already been checked.
 		 * XXX -- it would be better to change our
 		 * allocation policy to ensure that all
 		 * gang members reside on the same vdev.
 		 */
 		return (B_TRUE);
 	}
 
 	/*
 	 * Check if the txg falls within the range which must be
 	 * resilvered.  DVAs outside this range can always be skipped.
 	 */
 	if (!vdev_dtl_contains(vd, DTL_PARTIAL, phys_birth, 1))
 		return (B_FALSE);
 
 	/*
 	 * Check if the top-level vdev must resilver this offset.
 	 * When the offset does not intersect with a dirty leaf DTL
 	 * then it may be possible to skip the resilver IO.  The psize
 	 * is provided instead of asize to simplify the check for RAIDZ.
 	 */
 	if (!vdev_dtl_need_resilver(vd, DVA_GET_OFFSET(dva), psize))
 		return (B_FALSE);
 
 	return (B_TRUE);
 }
 
 static int
 dsl_process_async_destroys(dsl_pool_t *dp, dmu_tx_t *tx)
 {
 	int err = 0;
 	dsl_scan_t *scn = dp->dp_scan;
 	spa_t *spa = dp->dp_spa;
 
 	if (spa_suspend_async_destroy(spa))
 		return (0);
 
 	if (zfs_free_bpobj_enabled &&
 	    spa_version(spa) >= SPA_VERSION_DEADLISTS) {
 		scn->scn_is_bptree = B_FALSE;
 		scn->scn_async_block_min_time_ms = zfs_free_min_time_ms;
 		scn->scn_zio_root = zio_root(spa, NULL,
 		    NULL, ZIO_FLAG_MUSTSUCCEED);
 		err = bpobj_iterate(&dp->dp_free_bpobj,
 		    dsl_scan_free_block_cb, scn, tx);
 		VERIFY0(zio_wait(scn->scn_zio_root));
 		scn->scn_zio_root = NULL;
 
 		if (err != 0 && err != ERESTART)
 			zfs_panic_recover("error %u from bpobj_iterate()", err);
 	}
 
 	if (err == 0 && spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
 		ASSERT(scn->scn_async_destroying);
 		scn->scn_is_bptree = B_TRUE;
 		scn->scn_zio_root = zio_root(spa, NULL,
 		    NULL, ZIO_FLAG_MUSTSUCCEED);
 		err = bptree_iterate(dp->dp_meta_objset,
 		    dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb, scn, tx);
 		VERIFY0(zio_wait(scn->scn_zio_root));
 		scn->scn_zio_root = NULL;
 
 		if (err == EIO || err == ECKSUM) {
 			err = 0;
 		} else if (err != 0 && err != ERESTART) {
 			zfs_panic_recover("error %u from "
 			    "traverse_dataset_destroyed()", err);
 		}
 
 		if (bptree_is_empty(dp->dp_meta_objset, dp->dp_bptree_obj)) {
 			/* finished; deactivate async destroy feature */
 			spa_feature_decr(spa, SPA_FEATURE_ASYNC_DESTROY, tx);
 			ASSERT(!spa_feature_is_active(spa,
 			    SPA_FEATURE_ASYNC_DESTROY));
 			VERIFY0(zap_remove(dp->dp_meta_objset,
 			    DMU_POOL_DIRECTORY_OBJECT,
 			    DMU_POOL_BPTREE_OBJ, tx));
 			VERIFY0(bptree_free(dp->dp_meta_objset,
 			    dp->dp_bptree_obj, tx));
 			dp->dp_bptree_obj = 0;
 			scn->scn_async_destroying = B_FALSE;
 			scn->scn_async_stalled = B_FALSE;
 		} else {
 			/*
 			 * If we didn't make progress, mark the async
 			 * destroy as stalled, so that we will not initiate
 			 * a spa_sync() on its behalf.  Note that we only
 			 * check this if we are not finished, because if the
 			 * bptree had no blocks for us to visit, we can
 			 * finish without "making progress".
 			 */
 			scn->scn_async_stalled =
 			    (scn->scn_visited_this_txg == 0);
 		}
 	}
 	if (scn->scn_visited_this_txg) {
 		zfs_dbgmsg("freed %llu blocks in %llums from "
 		    "free_bpobj/bptree txg %llu; err=%d",
 		    (longlong_t)scn->scn_visited_this_txg,
 		    (longlong_t)
 		    NSEC2MSEC(gethrtime() - scn->scn_sync_start_time),
 		    (longlong_t)tx->tx_txg, err);
 		scn->scn_visited_this_txg = 0;
 
 		/*
 		 * Write out changes to the DDT that may be required as a
 		 * result of the blocks freed.  This ensures that the DDT
 		 * is clean when a scrub/resilver runs.
 		 */
 		ddt_sync(spa, tx->tx_txg);
 	}
 	if (err != 0)
 		return (err);
 	if (dp->dp_free_dir != NULL && !scn->scn_async_destroying &&
 	    zfs_free_leak_on_eio &&
 	    (dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes != 0 ||
 	    dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes != 0 ||
 	    dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes != 0)) {
 		/*
 		 * We have finished background destroying, but there is still
 		 * some space left in the dp_free_dir. Transfer this leaked
 		 * space to the dp_leak_dir.
 		 */
 		if (dp->dp_leak_dir == NULL) {
 			rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
 			(void) dsl_dir_create_sync(dp, dp->dp_root_dir,
 			    LEAK_DIR_NAME, tx);
 			VERIFY0(dsl_pool_open_special_dir(dp,
 			    LEAK_DIR_NAME, &dp->dp_leak_dir));
 			rrw_exit(&dp->dp_config_rwlock, FTAG);
 		}
 		dsl_dir_diduse_space(dp->dp_leak_dir, DD_USED_HEAD,
 		    dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes,
 		    dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes,
 		    dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes, tx);
 		dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
 		    -dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes,
 		    -dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes,
 		    -dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes, tx);
 	}
 
 	if (dp->dp_free_dir != NULL && !scn->scn_async_destroying) {
 		/* finished; verify that space accounting went to zero */
 		ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes);
 		ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes);
 		ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes);
 	}
 
 	EQUIV(bpobj_is_open(&dp->dp_obsolete_bpobj),
 	    0 == zap_contains(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_OBSOLETE_BPOBJ));
 	if (err == 0 && bpobj_is_open(&dp->dp_obsolete_bpobj)) {
 		ASSERT(spa_feature_is_active(dp->dp_spa,
 		    SPA_FEATURE_OBSOLETE_COUNTS));
 
 		scn->scn_is_bptree = B_FALSE;
 		scn->scn_async_block_min_time_ms = zfs_obsolete_min_time_ms;
 		err = bpobj_iterate(&dp->dp_obsolete_bpobj,
 		    dsl_scan_obsolete_block_cb, scn, tx);
 		if (err != 0 && err != ERESTART)
 			zfs_panic_recover("error %u from bpobj_iterate()", err);
 
 		if (bpobj_is_empty(&dp->dp_obsolete_bpobj))
 			dsl_pool_destroy_obsolete_bpobj(dp, tx);
 	}
 
 	return (0);
 }
 
 /*
  * This is the primary entry point for scans that is called from syncing
  * context. Scans must happen entirely during syncing context so that we
  * cna guarantee that blocks we are currently scanning will not change out
  * from under us. While a scan is active, this funciton controls how quickly
  * transaction groups proceed, instead of the normal handling provided by
  * txg_sync_thread().
  */
 void
 dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
 {
 	dsl_scan_t *scn = dp->dp_scan;
 	spa_t *spa = dp->dp_spa;
 	int err = 0;
 	state_sync_type_t sync_type = SYNC_OPTIONAL;
 
 	/*
 	 * Check for scn_restart_txg before checking spa_load_state, so
 	 * that we can restart an old-style scan while the pool is being
 	 * imported (see dsl_scan_init).
 	 */
 	if (dsl_scan_restarting(scn, tx)) {
 		pool_scan_func_t func = POOL_SCAN_SCRUB;
 		dsl_scan_done(scn, B_FALSE, tx);
 		if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
 			func = POOL_SCAN_RESILVER;
 		zfs_dbgmsg("restarting scan func=%u txg=%llu",
 		    func, (longlong_t)tx->tx_txg);
 		dsl_scan_setup_sync(&func, tx);
 	}
 
 	/*
 	 * Only process scans in sync pass 1.
 	 */
 	if (spa_sync_pass(dp->dp_spa) > 1)
 		return;
 
 	/*
 	 * If the spa is shutting down, then stop scanning. This will
 	 * ensure that the scan does not dirty any new data during the
 	 * shutdown phase.
 	 */
 	if (spa_shutting_down(spa))
 		return;
 
 	/*
 	 * If the scan is inactive due to a stalled async destroy, try again.
 	 */
 	if (!scn->scn_async_stalled && !dsl_scan_active(scn))
 		return;
 
 	/* reset scan statistics */
 	scn->scn_visited_this_txg = 0;
 	scn->scn_holes_this_txg = 0;
 	scn->scn_lt_min_this_txg = 0;
 	scn->scn_gt_max_this_txg = 0;
 	scn->scn_ddt_contained_this_txg = 0;
 	scn->scn_objsets_visited_this_txg = 0;
 	scn->scn_avg_seg_size_this_txg = 0;
 	scn->scn_segs_this_txg = 0;
 	scn->scn_avg_zio_size_this_txg = 0;
 	scn->scn_zios_this_txg = 0;
 	scn->scn_suspending = B_FALSE;
 	scn->scn_sync_start_time = gethrtime();
 	spa->spa_scrub_active = B_TRUE;
 
 	/*
 	 * First process the async destroys.  If we pause, don't do
 	 * any scrubbing or resilvering.  This ensures that there are no
 	 * async destroys while we are scanning, so the scan code doesn't
 	 * have to worry about traversing it.  It is also faster to free the
 	 * blocks than to scrub them.
 	 */
 	err = dsl_process_async_destroys(dp, tx);
 	if (err != 0)
 		return;
 
 	if (!dsl_scan_is_running(scn) || dsl_scan_is_paused_scrub(scn))
 		return;
 
 	/*
 	 * Wait a few txgs after importing to begin scanning so that
 	 * we can get the pool imported quickly.
 	 */
 	if (spa->spa_syncing_txg < spa->spa_first_txg + SCAN_IMPORT_WAIT_TXGS)
 		return;
 
 	/*
 	 * It is possible to switch from unsorted to sorted at any time,
 	 * but afterwards the scan will remain sorted unless reloaded from
 	 * a checkpoint after a reboot.
 	 */
 	if (!zfs_scan_legacy) {
 		scn->scn_is_sorted = B_TRUE;
 		if (scn->scn_last_checkpoint == 0)
 			scn->scn_last_checkpoint = ddi_get_lbolt();
 	}
 
 	/*
 	 * For sorted scans, determine what kind of work we will be doing
 	 * this txg based on our memory limitations and whether or not we
 	 * need to perform a checkpoint.
 	 */
 	if (scn->scn_is_sorted) {
 		/*
 		 * If we are over our checkpoint interval, set scn_clearing
 		 * so that we can begin checkpointing immediately. The
 		 * checkpoint allows us to save a consisent bookmark
 		 * representing how much data we have scrubbed so far.
 		 * Otherwise, use the memory limit to determine if we should
 		 * scan for metadata or start issue scrub IOs. We accumulate
 		 * metadata until we hit our hard memory limit at which point
 		 * we issue scrub IOs until we are at our soft memory limit.
 		 */
 		if (scn->scn_checkpointing ||
 		    ddi_get_lbolt() - scn->scn_last_checkpoint >
 		    SEC_TO_TICK(zfs_scan_checkpoint_intval)) {
 			if (!scn->scn_checkpointing)
 				zfs_dbgmsg("begin scan checkpoint");
 
 			scn->scn_checkpointing = B_TRUE;
 			scn->scn_clearing = B_TRUE;
 		} else {
 			boolean_t should_clear = dsl_scan_should_clear(scn);
 			if (should_clear && !scn->scn_clearing) {
 				zfs_dbgmsg("begin scan clearing");
 				scn->scn_clearing = B_TRUE;
 			} else if (!should_clear && scn->scn_clearing) {
 				zfs_dbgmsg("finish scan clearing");
 				scn->scn_clearing = B_FALSE;
 			}
 		}
 	} else {
 		ASSERT0(scn->scn_checkpointing);
                 ASSERT0(scn->scn_clearing);
 	}
 
 	if (!scn->scn_clearing && scn->scn_done_txg == 0) {
 		/* Need to scan metadata for more blocks to scrub */
 		dsl_scan_phys_t *scnp = &scn->scn_phys;
 		taskqid_t prefetch_tqid;
 		uint64_t bytes_per_leaf = zfs_scan_vdev_limit;
 		uint64_t nr_leaves = dsl_scan_count_leaves(spa->spa_root_vdev);
 
 		/*
 		 * Calculate the max number of in-flight bytes for pool-wide
 		 * scanning operations (minimum 1MB). Limits for the issuing
 		 * phase are done per top-level vdev and are handled separately.
 		 */
 		scn->scn_maxinflight_bytes =
 		    MAX(nr_leaves * bytes_per_leaf, 1ULL << 20);
 
 		if (scnp->scn_ddt_bookmark.ddb_class <=
 		    scnp->scn_ddt_class_max) {
 			ASSERT(ZB_IS_ZERO(&scnp->scn_bookmark));
 			zfs_dbgmsg("doing scan sync txg %llu; "
 			    "ddt bm=%llu/%llu/%llu/%llx",
 			    (longlong_t)tx->tx_txg,
 			    (longlong_t)scnp->scn_ddt_bookmark.ddb_class,
 			    (longlong_t)scnp->scn_ddt_bookmark.ddb_type,
 			    (longlong_t)scnp->scn_ddt_bookmark.ddb_checksum,
 			    (longlong_t)scnp->scn_ddt_bookmark.ddb_cursor);
 		} else {
 			zfs_dbgmsg("doing scan sync txg %llu; "
 			    "bm=%llu/%llu/%llu/%llu",
 			    (longlong_t)tx->tx_txg,
 			    (longlong_t)scnp->scn_bookmark.zb_objset,
 			    (longlong_t)scnp->scn_bookmark.zb_object,
 			    (longlong_t)scnp->scn_bookmark.zb_level,
 			    (longlong_t)scnp->scn_bookmark.zb_blkid);
 		}
 
 		scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
 		    NULL, ZIO_FLAG_CANFAIL);
 
 		scn->scn_prefetch_stop = B_FALSE;
 		prefetch_tqid = taskq_dispatch(dp->dp_sync_taskq,
 		    dsl_scan_prefetch_thread, scn, TQ_SLEEP);
 		ASSERT(prefetch_tqid != TASKQID_INVALID);
 
 		dsl_pool_config_enter(dp, FTAG);
 		dsl_scan_visit(scn, tx);
 		dsl_pool_config_exit(dp, FTAG);
 
 		mutex_enter(&dp->dp_spa->spa_scrub_lock);
 		scn->scn_prefetch_stop = B_TRUE;
 		cv_broadcast(&spa->spa_scrub_io_cv);
 		mutex_exit(&dp->dp_spa->spa_scrub_lock);
 
 		taskq_wait_id(dp->dp_sync_taskq, prefetch_tqid);
 		(void) zio_wait(scn->scn_zio_root);
 		scn->scn_zio_root = NULL;
 
 		zfs_dbgmsg("scan visited %llu blocks in %llums "
 		    "(%llu os's, %llu holes, %llu < mintxg, "
 		    "%llu in ddt, %llu > maxtxg)",
 		    (longlong_t)scn->scn_visited_this_txg,
 		    (longlong_t)NSEC2MSEC(gethrtime() -
 		    scn->scn_sync_start_time),
 		    (longlong_t)scn->scn_objsets_visited_this_txg,
 		    (longlong_t)scn->scn_holes_this_txg,
 		    (longlong_t)scn->scn_lt_min_this_txg,
 		    (longlong_t)scn->scn_ddt_contained_this_txg,
 		    (longlong_t)scn->scn_gt_max_this_txg);
 
 		if (!scn->scn_suspending) {
 			ASSERT0(avl_numnodes(&scn->scn_queue));
 			scn->scn_done_txg = tx->tx_txg + 1;
 			if (scn->scn_is_sorted) {
 				scn->scn_checkpointing = B_TRUE;
 				scn->scn_clearing = B_TRUE;
 			}
 			zfs_dbgmsg("scan complete txg %llu",
 				   (longlong_t)tx->tx_txg);
 		}
 	} else if (scn->scn_is_sorted && scn->scn_bytes_pending != 0) {
 		/* need to issue scrubbing IOs from per-vdev queues */
 		scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
 		    NULL, ZIO_FLAG_CANFAIL);
 		scan_io_queues_run(scn);
 		(void) zio_wait(scn->scn_zio_root);
 		scn->scn_zio_root = NULL;
 
 		/* calculate and dprintf the current memory usage */
 		(void) dsl_scan_should_clear(scn);
 		dsl_scan_update_stats(scn);
 
 		zfs_dbgmsg("scrubbed %llu blocks (%llu segs) in %llums "
 		    "(avg_block_size = %llu, avg_seg_size = %llu)",
 		    (longlong_t)scn->scn_zios_this_txg,
 		    (longlong_t)scn->scn_segs_this_txg,
 		    (longlong_t)NSEC2MSEC(gethrtime() -
 		    scn->scn_sync_start_time),
 		    (longlong_t)scn->scn_avg_zio_size_this_txg,
 		    (longlong_t)scn->scn_avg_seg_size_this_txg);
 	} else if (scn->scn_done_txg != 0 && scn->scn_done_txg <= tx->tx_txg) {
 		/* Finished with everything. Mark the scrub as complete */
 		zfs_dbgmsg("scan issuing complete txg %llu",
 		    (longlong_t)tx->tx_txg);
 		ASSERT3U(scn->scn_done_txg, !=, 0);
 		ASSERT0(spa->spa_scrub_inflight);
 		ASSERT0(scn->scn_bytes_pending);
 		dsl_scan_done(scn, B_TRUE, tx);
 		sync_type = SYNC_MANDATORY;
 	}
 
 	dsl_scan_sync_state(scn, tx, sync_type);
 }
 
 static void
 count_block(dsl_scan_t *scn, zfs_all_blkstats_t *zab, const blkptr_t *bp)
 {
 	int i;
 
 	/* update the spa's stats on how many bytes we have issued */
 	for (i = 0; i < BP_GET_NDVAS(bp); i++) {
 		atomic_add_64(&scn->scn_dp->dp_spa->spa_scan_pass_issued,
 		    DVA_GET_ASIZE(&bp->blk_dva[i]));
 	}
 
 	/*
 	 * If we resume after a reboot, zab will be NULL; don't record
 	 * incomplete stats in that case.
 	 */
 	if (zab == NULL)
 		return;
 
 	mutex_enter(&zab->zab_lock);
 
 	for (i = 0; i < 4; i++) {
 		int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS;
 		int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL;
 		if (t & DMU_OT_NEWTYPE)
 			t = DMU_OT_OTHER;
 		zfs_blkstat_t *zb = &zab->zab_type[l][t];
 		int equal;
 
 		zb->zb_count++;
 		zb->zb_asize += BP_GET_ASIZE(bp);
 		zb->zb_lsize += BP_GET_LSIZE(bp);
 		zb->zb_psize += BP_GET_PSIZE(bp);
 		zb->zb_gangs += BP_COUNT_GANG(bp);
 
 		switch (BP_GET_NDVAS(bp)) {
 		case 2:
 			if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
 			    DVA_GET_VDEV(&bp->blk_dva[1]))
 				zb->zb_ditto_2_of_2_samevdev++;
 			break;
 		case 3:
 			equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
 			    DVA_GET_VDEV(&bp->blk_dva[1])) +
 			    (DVA_GET_VDEV(&bp->blk_dva[0]) ==
 			    DVA_GET_VDEV(&bp->blk_dva[2])) +
 			    (DVA_GET_VDEV(&bp->blk_dva[1]) ==
 			    DVA_GET_VDEV(&bp->blk_dva[2]));
 			if (equal == 1)
 				zb->zb_ditto_2_of_3_samevdev++;
 			else if (equal == 3)
 				zb->zb_ditto_3_of_3_samevdev++;
 			break;
 		}
 	}
 
 	mutex_exit(&zab->zab_lock);
 }
 
 static void
 scan_io_queue_insert_impl(dsl_scan_io_queue_t *queue, scan_io_t *sio)
 {
 	avl_index_t idx;
 	int64_t asize = sio->sio_asize;
 	dsl_scan_t *scn = queue->q_scn;
 
 	ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock));
 
 	if (avl_find(&queue->q_sios_by_addr, sio, &idx) != NULL) {
 		/* block is already scheduled for reading */
 		atomic_add_64(&scn->scn_bytes_pending, -asize);
 		kmem_free(sio, sizeof (*sio));
 		return;
 	}
 	avl_insert(&queue->q_sios_by_addr, sio, idx);
 	range_tree_add(queue->q_exts_by_addr, sio->sio_offset, asize);
 }
 
 /*
  * Given all the info we got from our metadata scanning process, we
  * construct a scan_io_t and insert it into the scan sorting queue. The
  * I/O must already be suitable for us to process. This is controlled
  * by dsl_scan_enqueue().
  */
 static void
 scan_io_queue_insert(dsl_scan_io_queue_t *queue, const blkptr_t *bp, int dva_i,
     int zio_flags, const zbookmark_phys_t *zb)
 {
 	dsl_scan_t *scn = queue->q_scn;
 	scan_io_t *sio = kmem_zalloc(sizeof (*sio), KM_SLEEP);
 
 	ASSERT0(BP_IS_GANG(bp));
 	ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock));
 
 	bp2sio(bp, sio, dva_i);
 	sio->sio_flags = zio_flags;
 	sio->sio_zb = *zb;
 
 	/*
 	 * Increment the bytes pending counter now so that we can't
 	 * get an integer underflow in case the worker processes the
 	 * zio before we get to incrementing this counter.
 	 */
 	atomic_add_64(&scn->scn_bytes_pending, sio->sio_asize);
 
 	scan_io_queue_insert_impl(queue, sio);
 }
 
 /*
  * Given a set of I/O parameters as discovered by the metadata traversal
  * process, attempts to place the I/O into the sorted queues (if allowed),
  * or immediately executes the I/O.
  */
 static void
 dsl_scan_enqueue(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
     const zbookmark_phys_t *zb)
 {
 	spa_t *spa = dp->dp_spa;
 
 	ASSERT(!BP_IS_EMBEDDED(bp));
 
 	/*
 	 * Gang blocks are hard to issue sequentially, so we just issue them
 	 * here immediately instead of queuing them.
 	 */
 	if (!dp->dp_scan->scn_is_sorted || BP_IS_GANG(bp)) {
 		scan_exec_io(dp, bp, zio_flags, zb, NULL);
 		return;
 	}
 	for (int i = 0; i < BP_GET_NDVAS(bp); i++) {
 		dva_t dva;
 		vdev_t *vdev;
 
 		dva = bp->blk_dva[i];
 		vdev = vdev_lookup_top(spa, DVA_GET_VDEV(&dva));
 		ASSERT(vdev != NULL);
 
 		mutex_enter(&vdev->vdev_scan_io_queue_lock);
 		if (vdev->vdev_scan_io_queue == NULL)
 			vdev->vdev_scan_io_queue = scan_io_queue_create(vdev);
 		ASSERT(dp->dp_scan != NULL);
 		scan_io_queue_insert(vdev->vdev_scan_io_queue, bp,
 		    i, zio_flags, zb);
 		mutex_exit(&vdev->vdev_scan_io_queue_lock);
 	}
 }
 
 static int
 dsl_scan_scrub_cb(dsl_pool_t *dp,
     const blkptr_t *bp, const zbookmark_phys_t *zb)
 {
 	dsl_scan_t *scn = dp->dp_scan;
 	spa_t *spa = dp->dp_spa;
 	uint64_t phys_birth = BP_PHYSICAL_BIRTH(bp);
 	size_t psize = BP_GET_PSIZE(bp);
 	boolean_t needs_io;
 	int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;
 	int d;
 
 	if (phys_birth <= scn->scn_phys.scn_min_txg ||
 	    phys_birth >= scn->scn_phys.scn_max_txg) {
 		count_block(scn, dp->dp_blkstats, bp);
 		return (0);
 	}
 
 	/* Embedded BP's have phys_birth==0, so we reject them above. */
 	ASSERT(!BP_IS_EMBEDDED(bp));
 
 	ASSERT(DSL_SCAN_IS_SCRUB_RESILVER(scn));
 	if (scn->scn_phys.scn_func == POOL_SCAN_SCRUB) {
 		zio_flags |= ZIO_FLAG_SCRUB;
 		needs_io = B_TRUE;
 	} else {
 		ASSERT3U(scn->scn_phys.scn_func, ==, POOL_SCAN_RESILVER);
 		zio_flags |= ZIO_FLAG_RESILVER;
 		needs_io = B_FALSE;
 	}
 
 	/* If it's an intent log block, failure is expected. */
 	if (zb->zb_level == ZB_ZIL_LEVEL)
 		zio_flags |= ZIO_FLAG_SPECULATIVE;
 
 	for (d = 0; d < BP_GET_NDVAS(bp); d++) {
 		const dva_t *dva = &bp->blk_dva[d];
 
 		/*
 		 * Keep track of how much data we've examined so that
 		 * zpool(1M) status can make useful progress reports.
 		 */
 		scn->scn_phys.scn_examined += DVA_GET_ASIZE(dva);
 		spa->spa_scan_pass_exam += DVA_GET_ASIZE(dva);
 
 		/* if it's a resilver, this may not be in the target range */
 		if (!needs_io)
 			needs_io = dsl_scan_need_resilver(spa, dva, psize,
                             phys_birth);
 	}
 
 	if (needs_io && !zfs_no_scrub_io) {
 		dsl_scan_enqueue(dp, bp, zio_flags, zb);
 	} else {
 		count_block(scn, dp->dp_blkstats, bp);
 	}
 
 	/* do not relocate this block */
 	return (0);
 }
 
 static void
 dsl_scan_scrub_done(zio_t *zio)
 {
 	spa_t *spa = zio->io_spa;
 	blkptr_t *bp = zio->io_bp;
 	dsl_scan_io_queue_t *queue = zio->io_private;
 
 	abd_free(zio->io_abd);
 
 	if (queue == NULL) {
 		mutex_enter(&spa->spa_scrub_lock);
 		ASSERT3U(spa->spa_scrub_inflight, >=, BP_GET_PSIZE(bp));
 		spa->spa_scrub_inflight -= BP_GET_PSIZE(bp);
 		cv_broadcast(&spa->spa_scrub_io_cv);
 		mutex_exit(&spa->spa_scrub_lock);
 	} else {
 		mutex_enter(&queue->q_vd->vdev_scan_io_queue_lock);
 		ASSERT3U(queue->q_inflight_bytes, >=, BP_GET_PSIZE(bp));
 		queue->q_inflight_bytes -= BP_GET_PSIZE(bp);
 		cv_broadcast(&queue->q_zio_cv);
 		mutex_exit(&queue->q_vd->vdev_scan_io_queue_lock);
 	}
 
 	if (zio->io_error && (zio->io_error != ECKSUM ||
 	    !(zio->io_flags & ZIO_FLAG_SPECULATIVE))) {
 		atomic_inc_64(&spa->spa_dsl_pool->dp_scan->scn_phys.scn_errors);
 	}
 }
 
 /*
  * Given a scanning zio's information, executes the zio. The zio need
  * not necessarily be only sortable, this function simply executes the
  * zio, no matter what it is. The optional queue argument allows the
  * caller to specify that they want per top level vdev IO rate limiting
  * instead of the legacy global limiting.
  */
 static void
 scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
     const zbookmark_phys_t *zb, dsl_scan_io_queue_t *queue)
 {
 	spa_t *spa = dp->dp_spa;
 	dsl_scan_t *scn = dp->dp_scan;
 	size_t size = BP_GET_PSIZE(bp);
 	abd_t *data = abd_alloc_for_io(size, B_FALSE);
 	unsigned int scan_delay = 0;
 
 	if (queue == NULL) {
 		mutex_enter(&spa->spa_scrub_lock);
 		while (spa->spa_scrub_inflight >= scn->scn_maxinflight_bytes)
 			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
 		spa->spa_scrub_inflight += BP_GET_PSIZE(bp);
 		mutex_exit(&spa->spa_scrub_lock);
 	} else {
 		kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
 
 		mutex_enter(q_lock);
 		while (queue->q_inflight_bytes >= queue->q_maxinflight_bytes)
 			cv_wait(&queue->q_zio_cv, q_lock);
 		queue->q_inflight_bytes += BP_GET_PSIZE(bp);
 		mutex_exit(q_lock);
 	}
 
 	if (zio_flags & ZIO_FLAG_RESILVER)
 		scan_delay = zfs_resilver_delay;
 	else {
 		ASSERT(zio_flags & ZIO_FLAG_SCRUB);
 		scan_delay = zfs_scrub_delay;
 	}
 
 	if (scan_delay && (ddi_get_lbolt64() - spa->spa_last_io <= zfs_scan_idle))
 		delay(MAX((int)scan_delay, 0));
 	
 	count_block(dp->dp_scan, dp->dp_blkstats, bp);
 	zio_nowait(zio_read(dp->dp_scan->scn_zio_root, spa, bp, data, size,
 	    dsl_scan_scrub_done, queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
 }
 
 /*
  * This is the primary extent sorting algorithm. We balance two parameters:
  * 1) how many bytes of I/O are in an extent
  * 2) how well the extent is filled with I/O (as a fraction of its total size)
  * Since we allow extents to have gaps between their constituent I/Os, it's
  * possible to have a fairly large extent that contains the same amount of
  * I/O bytes than a much smaller extent, which just packs the I/O more tightly.
  * The algorithm sorts based on a score calculated from the extent's size,
  * the relative fill volume (in %) and a "fill weight" parameter that controls
  * the split between whether we prefer larger extents or more well populated
  * extents:
  *
  * SCORE = FILL_IN_BYTES + (FILL_IN_PERCENT * FILL_IN_BYTES * FILL_WEIGHT)
  *
  * Example:
  * 1) assume extsz = 64 MiB
  * 2) assume fill = 32 MiB (extent is half full)
  * 3) assume fill_weight = 3
  * 4)	SCORE = 32M + (((32M * 100) / 64M) * 3 * 32M) / 100
  *	SCORE = 32M + (50 * 3 * 32M) / 100
  *	SCORE = 32M + (4800M / 100)
  *	SCORE = 32M + 48M
  *	         ^     ^
  *	         |     +--- final total relative fill-based score
  *	         +--------- final total fill-based score
  *	SCORE = 80M
  *
  * As can be seen, at fill_ratio=3, the algorithm is slightly biased towards
  * extents that are more completely filled (in a 3:2 ratio) vs just larger.
  * Note that as an optimization, we replace multiplication and division by
  * 100 with bitshifting by 7 (which effecitvely multiplies and divides by 128).
  */
 static int
 ext_size_compare(const void *x, const void *y)
 {
 	const range_seg_t *rsa = x, *rsb = y;
 	uint64_t sa = rsa->rs_end - rsa->rs_start,
 	    sb = rsb->rs_end - rsb->rs_start;
 	uint64_t score_a, score_b;
 
 	score_a = rsa->rs_fill + ((((rsa->rs_fill << 7) / sa) *
 	    fill_weight * rsa->rs_fill) >> 7);
 	score_b = rsb->rs_fill + ((((rsb->rs_fill << 7) / sb) *
 	    fill_weight * rsb->rs_fill) >> 7);
 
 	if (score_a > score_b)
 		return (-1);
 	if (score_a == score_b) {
 		if (rsa->rs_start < rsb->rs_start)
 			return (-1);
 		if (rsa->rs_start == rsb->rs_start)
 			return (0);
 		return (1);
 	}
 	return (1);
 }
 
 /*
  * Comparator for the q_sios_by_addr tree. Sorting is simply performed
  * based on LBA-order (from lowest to highest).
  */
 static int
 io_addr_compare(const void *x, const void *y)
 {
 	const scan_io_t *a = x, *b = y;
 
 	if (a->sio_offset < b->sio_offset)
 		return (-1);
 	if (a->sio_offset == b->sio_offset)
 		return (0);
 	return (1);
 }
 
 /* IO queues are created on demand when they are needed. */
 static dsl_scan_io_queue_t *
 scan_io_queue_create(vdev_t *vd)
 {
 	dsl_scan_t *scn = vd->vdev_spa->spa_dsl_pool->dp_scan;
 	dsl_scan_io_queue_t *q = kmem_zalloc(sizeof (*q), KM_SLEEP);
 
 	q->q_scn = scn;
 	q->q_vd = vd;
 	cv_init(&q->q_zio_cv, NULL, CV_DEFAULT, NULL);
 	q->q_exts_by_addr = range_tree_create_impl(&rt_avl_ops,
 	    &q->q_exts_by_size, ext_size_compare, zfs_scan_max_ext_gap);
 	avl_create(&q->q_sios_by_addr, io_addr_compare,
 	    sizeof (scan_io_t), offsetof(scan_io_t, sio_nodes.sio_addr_node));
 
 	return (q);
 }
 
 /*
  * Destroys a scan queue and all segments and scan_io_t's contained in it.
  * No further execution of I/O occurs, anything pending in the queue is
  * simply freed without being executed.
  */
 void
 dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue)
 {
 	dsl_scan_t *scn = queue->q_scn;
 	scan_io_t *sio;
 	void *cookie = NULL;
 	int64_t bytes_dequeued = 0;
 
 	ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock));
 
 	while ((sio = avl_destroy_nodes(&queue->q_sios_by_addr, &cookie)) !=
 	    NULL) {
 		ASSERT(range_tree_contains(queue->q_exts_by_addr,
 		    sio->sio_offset, sio->sio_asize));
 		bytes_dequeued += sio->sio_asize;
 		kmem_free(sio, sizeof (*sio));
 	}
 
 	atomic_add_64(&scn->scn_bytes_pending, -bytes_dequeued);
 	range_tree_vacate(queue->q_exts_by_addr, NULL, queue);
 	range_tree_destroy(queue->q_exts_by_addr);
 	avl_destroy(&queue->q_sios_by_addr);
 	cv_destroy(&queue->q_zio_cv);
 
 	kmem_free(queue, sizeof (*queue));
 }
 
 /*
  * Properly transfers a dsl_scan_queue_t from `svd' to `tvd'. This is
  * called on behalf of vdev_top_transfer when creating or destroying
  * a mirror vdev due to zpool attach/detach.
  */
 void
 dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd)
 {
 	mutex_enter(&svd->vdev_scan_io_queue_lock);
 	mutex_enter(&tvd->vdev_scan_io_queue_lock);
 
 	VERIFY3P(tvd->vdev_scan_io_queue, ==, NULL);
 	tvd->vdev_scan_io_queue = svd->vdev_scan_io_queue;
 	svd->vdev_scan_io_queue = NULL;
 	if (tvd->vdev_scan_io_queue != NULL)
 		tvd->vdev_scan_io_queue->q_vd = tvd;
 
 	mutex_exit(&tvd->vdev_scan_io_queue_lock);
 	mutex_exit(&svd->vdev_scan_io_queue_lock);
 }
 
 static void
 scan_io_queues_destroy(dsl_scan_t *scn)
 {
 	vdev_t *rvd = scn->scn_dp->dp_spa->spa_root_vdev;
 
 	for (uint64_t i = 0; i < rvd->vdev_children; i++) {
 		vdev_t *tvd = rvd->vdev_child[i];
 
 		mutex_enter(&tvd->vdev_scan_io_queue_lock);
 		if (tvd->vdev_scan_io_queue != NULL)
 			dsl_scan_io_queue_destroy(tvd->vdev_scan_io_queue);
 		tvd->vdev_scan_io_queue = NULL;
 		mutex_exit(&tvd->vdev_scan_io_queue_lock);
 	}
 }
 
 static void
 dsl_scan_freed_dva(spa_t *spa, const blkptr_t *bp, int dva_i)
 {
 	dsl_pool_t *dp = spa->spa_dsl_pool;
 	dsl_scan_t *scn = dp->dp_scan;
 	vdev_t *vdev;
 	kmutex_t *q_lock;
 	dsl_scan_io_queue_t *queue;
 	scan_io_t srch, *sio;
 	avl_index_t idx;
 	uint64_t start, size;
 
 	vdev = vdev_lookup_top(spa, DVA_GET_VDEV(&bp->blk_dva[dva_i]));
 	ASSERT(vdev != NULL);
 	q_lock = &vdev->vdev_scan_io_queue_lock;
 	queue = vdev->vdev_scan_io_queue;
 
 	mutex_enter(q_lock);
 	if (queue == NULL) {
 		mutex_exit(q_lock);
 		return;
 	}
 
 	bp2sio(bp, &srch, dva_i);
 	start = srch.sio_offset;
 	size = srch.sio_asize;
 
 	/*
 	 * We can find the zio in two states:
 	 * 1) Cold, just sitting in the queue of zio's to be issued at
 	 *	some point in the future. In this case, all we do is
 	 *	remove the zio from the q_sios_by_addr tree, decrement
 	 *	its data volume from the containing range_seg_t and
 	 *	resort the q_exts_by_size tree to reflect that the
 	 *	range_seg_t has lost some of its 'fill'. We don't shorten
 	 *	the range_seg_t - this is usually rare enough not to be
 	 *	worth the extra hassle of trying keep track of precise
 	 *	extent boundaries.
 	 * 2) Hot, where the zio is currently in-flight in
 	 *	dsl_scan_issue_ios. In this case, we can't simply
 	 *	reach in and stop the in-flight zio's, so we instead
 	 *	block the caller. Eventually, dsl_scan_issue_ios will
 	 *	be done with issuing the zio's it gathered and will
 	 *	signal us.
 	 */
 	sio = avl_find(&queue->q_sios_by_addr, &srch, &idx);
 	if (sio != NULL) {
 		int64_t asize = sio->sio_asize;
 		blkptr_t tmpbp;
 
 		/* Got it while it was cold in the queue */
 		ASSERT3U(start, ==, sio->sio_offset);
 		ASSERT3U(size, ==, asize);
 		avl_remove(&queue->q_sios_by_addr, sio);
 
 		ASSERT(range_tree_contains(queue->q_exts_by_addr, start, size));
 		range_tree_remove_fill(queue->q_exts_by_addr, start, size);
 
 		/*
 		 * We only update scn_bytes_pending in the cold path,
 		 * otherwise it will already have been accounted for as
 		 * part of the zio's execution.
 		 */
 		atomic_add_64(&scn->scn_bytes_pending, -asize);
 
 		/* count the block as though we issued it */
 		sio2bp(sio, &tmpbp, dva_i);
 		count_block(scn, dp->dp_blkstats, &tmpbp);
 
 		kmem_free(sio, sizeof (*sio));
 	}
 	mutex_exit(q_lock);
 }
 
 /*
  * Callback invoked when a zio_free() zio is executing. This needs to be
  * intercepted to prevent the zio from deallocating a particular portion
  * of disk space and it then getting reallocated and written to, while we
  * still have it queued up for processing.
  */
 void
 dsl_scan_freed(spa_t *spa, const blkptr_t *bp)
 {
 	dsl_pool_t *dp = spa->spa_dsl_pool;
 	dsl_scan_t *scn = dp->dp_scan;
 
 	ASSERT(!BP_IS_EMBEDDED(bp));
 	ASSERT(scn != NULL);
 	if (!dsl_scan_is_running(scn))
 		return;
 
 	for (int i = 0; i < BP_GET_NDVAS(bp); i++)
 		dsl_scan_freed_dva(spa, bp, i);
 }
Index: projects/clang900-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
===================================================================
--- projects/clang900-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	(revision 352536)
+++ projects/clang900-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	(revision 352537)
@@ -1,6021 +1,6021 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2017 Nexenta Systems, Inc.
  */
 
 /* Portions Copyright 2007 Jeremy Teo */
 /* Portions Copyright 2010 Robert Milkowski */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/systm.h>
 #include <sys/sysmacros.h>
 #include <sys/resource.h>
 #include <sys/vfs.h>
 #include <sys/vm.h>
 #include <sys/vnode.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/kmem.h>
 #include <sys/taskq.h>
 #include <sys/uio.h>
 #include <sys/atomic.h>
 #include <sys/namei.h>
 #include <sys/mman.h>
 #include <sys/cmn_err.h>
 #include <sys/errno.h>
 #include <sys/unistd.h>
 #include <sys/zfs_dir.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/fs/zfs.h>
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/spa.h>
 #include <sys/txg.h>
 #include <sys/dbuf.h>
 #include <sys/zap.h>
 #include <sys/sa.h>
 #include <sys/dirent.h>
 #include <sys/policy.h>
 #include <sys/sunddi.h>
 #include <sys/filio.h>
 #include <sys/sid.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
 #include <sys/zfs_sa.h>
 #include <sys/zfs_rlock.h>
 #include <sys/extdirent.h>
 #include <sys/kidmap.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sched.h>
 #include <sys/acl.h>
 #include <sys/vmmeter.h>
 #include <vm/vm_param.h>
 #include <sys/zil.h>
 
 /*
  * Programming rules.
  *
  * Each vnode op performs some logical unit of work.  To do this, the ZPL must
  * properly lock its in-core state, create a DMU transaction, do the work,
  * record this work in the intent log (ZIL), commit the DMU transaction,
  * and wait for the intent log to commit if it is a synchronous operation.
  * Moreover, the vnode ops must work in both normal and log replay context.
  * The ordering of events is important to avoid deadlocks and references
  * to freed memory.  The example below illustrates the following Big Rules:
  *
  *  (1)	A check must be made in each zfs thread for a mounted file system.
  *	This is done avoiding races using ZFS_ENTER(zfsvfs).
  *	A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
  *	must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
  *	can return EIO from the calling function.
  *
  *  (2)	VN_RELE() should always be the last thing except for zil_commit()
  *	(if necessary) and ZFS_EXIT(). This is for 3 reasons:
  *	First, if it's the last reference, the vnode/znode
  *	can be freed, so the zp may point to freed memory.  Second, the last
  *	reference will call zfs_zinactive(), which may induce a lot of work --
  *	pushing cached pages (which acquires range locks) and syncing out
  *	cached atime changes.  Third, zfs_zinactive() may require a new tx,
  *	which could deadlock the system if you were already holding one.
  *	If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
  *
  *  (3)	All range locks must be grabbed before calling dmu_tx_assign(),
  *	as they can span dmu_tx_assign() calls.
  *
  *  (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
  *      dmu_tx_assign().  This is critical because we don't want to block
  *      while holding locks.
  *
  *	If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
  *	reduces lock contention and CPU usage when we must wait (note that if
  *	throughput is constrained by the storage, nearly every transaction
  *	must wait).
  *
  *      Note, in particular, that if a lock is sometimes acquired before
  *      the tx assigns, and sometimes after (e.g. z_lock), then failing
  *      to use a non-blocking assign can deadlock the system.  The scenario:
  *
  *	Thread A has grabbed a lock before calling dmu_tx_assign().
  *	Thread B is in an already-assigned tx, and blocks for this lock.
  *	Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
  *	forever, because the previous txg can't quiesce until B's tx commits.
  *
  *	If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
  *	then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
  *	calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
  *	to indicate that this operation has already called dmu_tx_wait().
  *	This will ensure that we don't retry forever, waiting a short bit
  *	each time.
  *
  *  (5)	If the operation succeeded, generate the intent log entry for it
  *	before dropping locks.  This ensures that the ordering of events
  *	in the intent log matches the order in which they actually occurred.
  *	During ZIL replay the zfs_log_* functions will update the sequence
  *	number to indicate the zil transaction has replayed.
  *
  *  (6)	At the end of each vnode op, the DMU tx must always commit,
  *	regardless of whether there were any errors.
  *
  *  (7)	After dropping all locks, invoke zil_commit(zilog, foid)
  *	to ensure that synchronous semantics are provided when necessary.
  *
  * In general, this is how things should be ordered in each vnode op:
  *
  *	ZFS_ENTER(zfsvfs);		// exit if unmounted
  * top:
  *	zfs_dirent_lookup(&dl, ...)	// lock directory entry (may VN_HOLD())
  *	rw_enter(...);			// grab any other locks you need
  *	tx = dmu_tx_create(...);	// get DMU tx
  *	dmu_tx_hold_*();		// hold each object you might modify
  *	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
  *	if (error) {
  *		rw_exit(...);		// drop locks
  *		zfs_dirent_unlock(dl);	// unlock directory entry
  *		VN_RELE(...);		// release held vnodes
  *		if (error == ERESTART) {
  *			waited = B_TRUE;
  *			dmu_tx_wait(tx);
  *			dmu_tx_abort(tx);
  *			goto top;
  *		}
  *		dmu_tx_abort(tx);	// abort DMU tx
  *		ZFS_EXIT(zfsvfs);	// finished in zfs
  *		return (error);		// really out of space
  *	}
  *	error = do_real_work();		// do whatever this VOP does
  *	if (error == 0)
  *		zfs_log_*(...);		// on success, make ZIL entry
  *	dmu_tx_commit(tx);		// commit DMU tx -- error or not
  *	rw_exit(...);			// drop locks
  *	zfs_dirent_unlock(dl);		// unlock directory entry
  *	VN_RELE(...);			// release held vnodes
  *	zil_commit(zilog, foid);	// synchronous when necessary
  *	ZFS_EXIT(zfsvfs);		// finished in zfs
  *	return (error);			// done, report error
  */
 
 /* ARGSUSED */
 static int
 zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 {
 	znode_t	*zp = VTOZ(*vpp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
 	    ((flag & FAPPEND) == 0)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
 	    ZTOV(zp)->v_type == VREG &&
 	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
 		if (fs_vscan(*vpp, cr, 0) != 0) {
 			ZFS_EXIT(zfsvfs);
 			return (SET_ERROR(EACCES));
 		}
 	}
 
 	/* Keep a count of the synchronous opens in the znode */
 	if (flag & (FSYNC | FDSYNC))
 		atomic_inc_32(&zp->z_sync_cnt);
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
     caller_context_t *ct)
 {
 	znode_t	*zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 
 	/*
 	 * Clean up any locks held by this process on the vp.
 	 */
 	cleanlocks(vp, ddi_get_pid(), 0);
 	cleanshares(vp, ddi_get_pid());
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	/* Decrement the synchronous opens in the znode */
 	if ((flag & (FSYNC | FDSYNC)) && (count == 1))
 		atomic_dec_32(&zp->z_sync_cnt);
 
 	if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
 	    ZTOV(zp)->v_type == VREG &&
 	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
 		VERIFY(fs_vscan(vp, cr, 1) == 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 /*
  * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and
  * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter.
  */
 static int
 zfs_holey(vnode_t *vp, u_long cmd, offset_t *off)
 {
 	znode_t	*zp = VTOZ(vp);
 	uint64_t noff = (uint64_t)*off; /* new offset */
 	uint64_t file_sz;
 	int error;
 	boolean_t hole;
 
 	file_sz = zp->z_size;
 	if (noff >= file_sz)  {
 		return (SET_ERROR(ENXIO));
 	}
 
 	if (cmd == _FIO_SEEK_HOLE)
 		hole = B_TRUE;
 	else
 		hole = B_FALSE;
 
 	error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff);
 
 	if (error == ESRCH)
 		return (SET_ERROR(ENXIO));
 
 	/*
 	 * We could find a hole that begins after the logical end-of-file,
 	 * because dmu_offset_next() only works on whole blocks.  If the
 	 * EOF falls mid-block, then indicate that the "virtual hole"
 	 * at the end of the file begins at the logical EOF, rather than
 	 * at the end of the last block.
 	 */
 	if (noff > file_sz) {
 		ASSERT(hole);
 		noff = file_sz;
 	}
 
 	if (noff < *off)
 		return (error);
 	*off = noff;
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred,
     int *rvalp, caller_context_t *ct)
 {
 	offset_t off;
 	offset_t ndata;
 	dmu_object_info_t doi;
 	int error;
 	zfsvfs_t *zfsvfs;
 	znode_t *zp;
 
 	switch (com) {
 	case _FIOFFS:
 	{
 		return (0);
 
 		/*
 		 * The following two ioctls are used by bfu.  Faking out,
 		 * necessary to avoid bfu errors.
 		 */
 	}
 	case _FIOGDIO:
 	case _FIOSDIO:
 	{
 		return (0);
 	}
 
 	case _FIO_SEEK_DATA:
 	case _FIO_SEEK_HOLE:
 	{
 #ifdef illumos
 		if (ddi_copyin((void *)data, &off, sizeof (off), flag))
 			return (SET_ERROR(EFAULT));
 #else
 		off = *(offset_t *)data;
 #endif
 		zp = VTOZ(vp);
 		zfsvfs = zp->z_zfsvfs;
 		ZFS_ENTER(zfsvfs);
 		ZFS_VERIFY_ZP(zp);
 
 		/* offset parameter is in/out */
 		error = zfs_holey(vp, com, &off);
 		ZFS_EXIT(zfsvfs);
 		if (error)
 			return (error);
 #ifdef illumos
 		if (ddi_copyout(&off, (void *)data, sizeof (off), flag))
 			return (SET_ERROR(EFAULT));
 #else
 		*(offset_t *)data = off;
 #endif
 		return (0);
 	}
 #ifdef illumos
 	case _FIO_COUNT_FILLED:
 	{
 		/*
 		 * _FIO_COUNT_FILLED adds a new ioctl command which
 		 * exposes the number of filled blocks in a
 		 * ZFS object.
 		 */
 		zp = VTOZ(vp);
 		zfsvfs = zp->z_zfsvfs;
 		ZFS_ENTER(zfsvfs);
 		ZFS_VERIFY_ZP(zp);
 
 		/*
 		 * Wait for all dirty blocks for this object
 		 * to get synced out to disk, and the DMU info
 		 * updated.
 		 */
 		error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id);
 		if (error) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 
 		/*
 		 * Retrieve fill count from DMU object.
 		 */
 		error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi);
 		if (error) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 
 		ndata = doi.doi_fill_count;
 
 		ZFS_EXIT(zfsvfs);
 		if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag))
 			return (SET_ERROR(EFAULT));
 		return (0);
 	}
 #endif
 	}
 	return (SET_ERROR(ENOTTY));
 }
 
 static vm_page_t
 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
 {
 	vm_object_t obj;
 	vm_page_t pp;
 	int64_t end;
 
 	/*
 	 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
 	 * aligned boundaries, if the range is not aligned.  As a result a
 	 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
 	 * It may happen that all DEV_BSIZE subranges are marked clean and thus
 	 * the whole page would be considred clean despite have some dirty data.
 	 * For this reason we should shrink the range to DEV_BSIZE aligned
 	 * boundaries before calling vm_page_clear_dirty.
 	 */
 	end = rounddown2(off + nbytes, DEV_BSIZE);
 	off = roundup2(off, DEV_BSIZE);
 	nbytes = end - off;
 
 	obj = vp->v_object;
 	zfs_vmobject_assert_wlocked(obj);
 
 	vm_page_grab_valid(&pp, obj, OFF_TO_IDX(start), VM_ALLOC_NOCREAT |
 	    VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
 	if (pp != NULL) {
 		ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
 		vm_object_pip_add(obj, 1);
 		pmap_remove_write(pp);
 		if (nbytes != 0)
 			vm_page_clear_dirty(pp, off, nbytes);
 	}
 	return (pp);
 }
 
 static void
 page_unbusy(vm_page_t pp)
 {
 
 	vm_page_sunbusy(pp);
 	vm_object_pip_wakeup(pp->object);
 }
 
 static vm_page_t
 page_wire(vnode_t *vp, int64_t start)
 {
 	vm_object_t obj;
 	vm_page_t m;
 
 	obj = vp->v_object;
 	zfs_vmobject_assert_wlocked(obj);
 
 	vm_page_grab_valid(&m, obj, OFF_TO_IDX(start), VM_ALLOC_NOCREAT |
 	    VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY | VM_ALLOC_NOBUSY);
 	return (m);
 }
 
 static void
 page_unwire(vm_page_t pp)
 {
 
 	vm_page_unwire(pp, PQ_ACTIVE);
 }
 
 /*
  * When a file is memory mapped, we must keep the IO data synchronized
  * between the DMU cache and the memory mapped pages.  What this means:
  *
  * On Write:	If we find a memory mapped page, we write to *both*
  *		the page and the dmu buffer.
  */
 static void
 update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid,
     int segflg, dmu_tx_t *tx)
 {
 	vm_object_t obj;
 	struct sf_buf *sf;
 	caddr_t va;
 	int off;
 
 	ASSERT(segflg != UIO_NOCOPY);
 	ASSERT(vp->v_mount != NULL);
 	obj = vp->v_object;
 	ASSERT(obj != NULL);
 
 	off = start & PAGEOFFSET;
 	zfs_vmobject_wlock(obj);
 	vm_object_pip_add(obj, 1);
 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
 		vm_page_t pp;
 		int nbytes = imin(PAGESIZE - off, len);
 
 		if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
 			zfs_vmobject_wunlock(obj);
 
 			va = zfs_map_page(pp, &sf);
 			(void) dmu_read(os, oid, start+off, nbytes,
 			    va+off, DMU_READ_PREFETCH);;
 			zfs_unmap_page(sf);
 
 			zfs_vmobject_wlock(obj);
 			page_unbusy(pp);
 		}
 		len -= nbytes;
 		off = 0;
 	}
 	vm_object_pip_wakeup(obj);
 	zfs_vmobject_wunlock(obj);
 }
 
 /*
  * Read with UIO_NOCOPY flag means that sendfile(2) requests
  * ZFS to populate a range of page cache pages with data.
  *
  * NOTE: this function could be optimized to pre-allocate
  * all pages in advance, drain exclusive busy on all of them,
  * map them into contiguous KVA region and populate them
  * in one single dmu_read() call.
  */
 static int
 mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio)
 {
 	znode_t *zp = VTOZ(vp);
 	objset_t *os = zp->z_zfsvfs->z_os;
 	struct sf_buf *sf;
 	vm_object_t obj;
 	vm_page_t pp;
 	int64_t start;
 	caddr_t va;
 	int len = nbytes;
 	int off;
 	int error = 0;
 
 	ASSERT(uio->uio_segflg == UIO_NOCOPY);
 	ASSERT(vp->v_mount != NULL);
 	obj = vp->v_object;
 	ASSERT(obj != NULL);
 	ASSERT((uio->uio_loffset & PAGEOFFSET) == 0);
 
 	zfs_vmobject_wlock(obj);
 	for (start = uio->uio_loffset; len > 0; start += PAGESIZE) {
 		int bytes = MIN(PAGESIZE, len);
 
 		pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY |
 		    VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
 		if (pp->valid == 0) {
 			zfs_vmobject_wunlock(obj);
 			va = zfs_map_page(pp, &sf);
 			error = dmu_read(os, zp->z_id, start, bytes, va,
 			    DMU_READ_PREFETCH);
 			if (bytes != PAGESIZE && error == 0)
 				bzero(va + bytes, PAGESIZE - bytes);
 			zfs_unmap_page(sf);
 			zfs_vmobject_wlock(obj);
 			vm_page_sunbusy(pp);
 			if (error) {
 				if (!vm_page_busied(pp) && !vm_page_wired(pp) &&
 				    pp->valid == 0)
 					vm_page_free(pp);
 			} else {
 				pp->valid = VM_PAGE_BITS_ALL;
 				vm_page_lock(pp);
 				vm_page_activate(pp);
 				vm_page_unlock(pp);
 			}
 		} else {
 			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
 			vm_page_sunbusy(pp);
 		}
 		if (error)
 			break;
 		uio->uio_resid -= bytes;
 		uio->uio_offset += bytes;
 		len -= bytes;
 	}
 	zfs_vmobject_wunlock(obj);
 	return (error);
 }
 
 /*
  * When a file is memory mapped, we must keep the IO data synchronized
  * between the DMU cache and the memory mapped pages.  What this means:
  *
  * On Read:	We "read" preferentially from memory mapped pages,
  *		else we default from the dmu buffer.
  *
  * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
  *	 the file is memory mapped.
  */
 static int
 mappedread(vnode_t *vp, int nbytes, uio_t *uio)
 {
 	znode_t *zp = VTOZ(vp);
 	vm_object_t obj;
 	int64_t start;
 	caddr_t va;
 	int len = nbytes;
 	int off;
 	int error = 0;
 
 	ASSERT(vp->v_mount != NULL);
 	obj = vp->v_object;
 	ASSERT(obj != NULL);
 
 	start = uio->uio_loffset;
 	off = start & PAGEOFFSET;
 	zfs_vmobject_wlock(obj);
 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
 		vm_page_t pp;
 		uint64_t bytes = MIN(PAGESIZE - off, len);
 
 		if (pp = page_wire(vp, start)) {
 			struct sf_buf *sf;
 			caddr_t va;
 
 			zfs_vmobject_wunlock(obj);
 			va = zfs_map_page(pp, &sf);
 #ifdef illumos
 			error = uiomove(va + off, bytes, UIO_READ, uio);
 #else
 			error = vn_io_fault_uiomove(va + off, bytes, uio);
 #endif
 			zfs_unmap_page(sf);
 			zfs_vmobject_wlock(obj);
 			page_unwire(pp);
 		} else {
 			zfs_vmobject_wunlock(obj);
 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
 			    uio, bytes);
 			zfs_vmobject_wlock(obj);
 		}
 		len -= bytes;
 		off = 0;
 		if (error)
 			break;
 	}
 	zfs_vmobject_wunlock(obj);
 	return (error);
 }
 
 offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */
 
 /*
  * Read bytes from specified file into supplied buffer.
  *
  *	IN:	vp	- vnode of file to be read from.
  *		uio	- structure supplying read location, range info,
  *			  and return buffer.
  *		ioflag	- SYNC flags; used to provide FRSYNC semantics.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *
  *	OUT:	uio	- updated offset and range, buffer filled.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Side Effects:
  *	vp - atime updated if byte count > 0
  */
 /* ARGSUSED */
 static int
 zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 {
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	ssize_t		n, nbytes;
 	int		error = 0;
 	rl_t		*rl;
 	xuio_t		*xuio = NULL;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	if (zp->z_pflags & ZFS_AV_QUARANTINED) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EACCES));
 	}
 
 	/*
 	 * Validate file offset
 	 */
 	if (uio->uio_loffset < (offset_t)0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * Fasttrack empty reads
 	 */
 	if (uio->uio_resid == 0) {
 		ZFS_EXIT(zfsvfs);
 		return (0);
 	}
 
 	/*
 	 * Check for mandatory locks
 	 */
 	if (MANDMODE(zp->z_mode)) {
 		if (error = chklock(vp, FREAD,
 		    uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 	}
 
 	/*
 	 * If we're in FRSYNC mode, sync out this znode before reading it.
 	 */
 	if (zfsvfs->z_log &&
 	    (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
 		zil_commit(zfsvfs->z_log, zp->z_id);
 
 	/*
 	 * Lock the range against changes.
 	 */
 	rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER);
 
 	/*
 	 * If we are reading past end-of-file we can skip
 	 * to the end; but we might still need to set atime.
 	 */
 	if (uio->uio_loffset >= zp->z_size) {
 		error = 0;
 		goto out;
 	}
 
 	ASSERT(uio->uio_loffset < zp->z_size);
 	n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset);
 
 #ifdef illumos
 	if ((uio->uio_extflg == UIO_XUIO) &&
 	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) {
 		int nblk;
 		int blksz = zp->z_blksz;
 		uint64_t offset = uio->uio_loffset;
 
 		xuio = (xuio_t *)uio;
 		if ((ISP2(blksz))) {
 			nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset,
 			    blksz)) / blksz;
 		} else {
 			ASSERT(offset + n <= blksz);
 			nblk = 1;
 		}
 		(void) dmu_xuio_init(xuio, nblk);
 
 		if (vn_has_cached_data(vp)) {
 			/*
 			 * For simplicity, we always allocate a full buffer
 			 * even if we only expect to read a portion of a block.
 			 */
 			while (--nblk >= 0) {
 				(void) dmu_xuio_add(xuio,
 				    dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
 				    blksz), 0, blksz);
 			}
 		}
 	}
 #endif	/* illumos */
 
 	while (n > 0) {
 		nbytes = MIN(n, zfs_read_chunk_size -
 		    P2PHASE(uio->uio_loffset, zfs_read_chunk_size));
 
 #ifdef __FreeBSD__
 		if (uio->uio_segflg == UIO_NOCOPY)
 			error = mappedread_sf(vp, nbytes, uio);
 		else
 #endif /* __FreeBSD__ */
 		if (vn_has_cached_data(vp)) {
 			error = mappedread(vp, nbytes, uio);
 		} else {
 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
 			    uio, nbytes);
 		}
 		if (error) {
 			/* convert checksum errors into IO errors */
 			if (error == ECKSUM)
 				error = SET_ERROR(EIO);
 			break;
 		}
 
 		n -= nbytes;
 	}
 out:
 	zfs_range_unlock(rl);
 
 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Write the bytes to a file.
  *
  *	IN:	vp	- vnode of file to be written to.
  *		uio	- structure supplying write location, range info,
  *			  and data buffer.
  *		ioflag	- FAPPEND, FSYNC, and/or FDSYNC.  FAPPEND is
  *			  set if in append mode.
  *		cr	- credentials of caller.
  *		ct	- caller context (NFS/CIFS fem monitor only)
  *
  *	OUT:	uio	- updated offset and range.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - ctime|mtime updated if byte count > 0
  */
 
 /* ARGSUSED */
 static int
 zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 {
 	znode_t		*zp = VTOZ(vp);
 	rlim64_t	limit = MAXOFFSET_T;
 	ssize_t		start_resid = uio->uio_resid;
 	ssize_t		tx_bytes;
 	uint64_t	end_size;
 	dmu_tx_t	*tx;
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	zilog_t		*zilog;
 	offset_t	woff;
 	ssize_t		n, nbytes;
 	rl_t		*rl;
 	int		max_blksz = zfsvfs->z_max_blksz;
 	int		error = 0;
 	arc_buf_t	*abuf;
 	iovec_t		*aiov = NULL;
 	xuio_t		*xuio = NULL;
 	int		i_iov = 0;
 	int		iovcnt = uio->uio_iovcnt;
 	iovec_t		*iovp = uio->uio_iov;
 	int		write_eof;
 	int		count = 0;
 	sa_bulk_attr_t	bulk[4];
 	uint64_t	mtime[2], ctime[2];
 
 	/*
 	 * Fasttrack empty write
 	 */
 	n = start_resid;
 	if (n == 0)
 		return (0);
 
 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 		limit = MAXOFFSET_T;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
 	    &zp->z_size, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 	    &zp->z_pflags, 8);
 
 	/*
 	 * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our
 	 * callers might not be able to detect properly that we are read-only,
 	 * so check it explicitly here.
 	 */
 	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EROFS));
 	}
 
 	/*
 	 * If immutable or not appending then return EPERM.
 	 * Intentionally allow ZFS_READONLY through here.
 	 * See zfs_zaccess_common()
 	 */
 	if ((zp->z_pflags & ZFS_IMMUTABLE) ||
 	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
 	    (uio->uio_loffset < zp->z_size))) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	zilog = zfsvfs->z_log;
 
 	/*
 	 * Validate file offset
 	 */
 	woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
 	if (woff < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * Check for mandatory locks before calling zfs_range_lock()
 	 * in order to prevent a deadlock with locks set via fcntl().
 	 */
 	if (MANDMODE((mode_t)zp->z_mode) &&
 	    (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 #ifdef illumos
 	/*
 	 * Pre-fault the pages to ensure slow (eg NFS) pages
 	 * don't hold up txg.
 	 * Skip this if uio contains loaned arc_buf.
 	 */
 	if ((uio->uio_extflg == UIO_XUIO) &&
 	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
 		xuio = (xuio_t *)uio;
 	else
 		uio_prefaultpages(MIN(n, max_blksz), uio);
 #endif
 
 	/*
 	 * If in append mode, set the io offset pointer to eof.
 	 */
 	if (ioflag & FAPPEND) {
 		/*
 		 * Obtain an appending range lock to guarantee file append
 		 * semantics.  We reset the write offset once we have the lock.
 		 */
 		rl = zfs_range_lock(zp, 0, n, RL_APPEND);
 		woff = rl->r_off;
 		if (rl->r_len == UINT64_MAX) {
 			/*
 			 * We overlocked the file because this write will cause
 			 * the file block size to increase.
 			 * Note that zp_size cannot change with this lock held.
 			 */
 			woff = zp->z_size;
 		}
 		uio->uio_loffset = woff;
 	} else {
 		/*
 		 * Note that if the file block size will change as a result of
 		 * this write, then this range lock will lock the entire file
 		 * so that we can re-write the block safely.
 		 */
 		rl = zfs_range_lock(zp, woff, n, RL_WRITER);
 	}
 
 	if (vn_rlimit_fsize(vp, uio, uio->uio_td)) {
 		zfs_range_unlock(rl);
 		ZFS_EXIT(zfsvfs);
 		return (EFBIG);
 	}
 
 	if (woff >= limit) {
 		zfs_range_unlock(rl);
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EFBIG));
 	}
 
 	if ((woff + n) > limit || woff > (limit - n))
 		n = limit - woff;
 
 	/* Will this write extend the file length? */
 	write_eof = (woff + n > zp->z_size);
 
 	end_size = MAX(zp->z_size, woff + n);
 
 	/*
 	 * Write the file in reasonable size chunks.  Each chunk is written
 	 * in a separate transaction; this keeps the intent log records small
 	 * and allows us to do more fine-grained space accounting.
 	 */
 	while (n > 0) {
 		abuf = NULL;
 		woff = uio->uio_loffset;
 		if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
 		    zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
 			if (abuf != NULL)
 				dmu_return_arcbuf(abuf);
 			error = SET_ERROR(EDQUOT);
 			break;
 		}
 
 		if (xuio && abuf == NULL) {
 			ASSERT(i_iov < iovcnt);
 			aiov = &iovp[i_iov];
 			abuf = dmu_xuio_arcbuf(xuio, i_iov);
 			dmu_xuio_clear(xuio, i_iov);
 			DTRACE_PROBE3(zfs_cp_write, int, i_iov,
 			    iovec_t *, aiov, arc_buf_t *, abuf);
 			ASSERT((aiov->iov_base == abuf->b_data) ||
 			    ((char *)aiov->iov_base - (char *)abuf->b_data +
 			    aiov->iov_len == arc_buf_size(abuf)));
 			i_iov++;
 		} else if (abuf == NULL && n >= max_blksz &&
 		    woff >= zp->z_size &&
 		    P2PHASE(woff, max_blksz) == 0 &&
 		    zp->z_blksz == max_blksz) {
 			/*
 			 * This write covers a full block.  "Borrow" a buffer
 			 * from the dmu so that we can fill it before we enter
 			 * a transaction.  This avoids the possibility of
 			 * holding up the transaction if the data copy hangs
 			 * up on a pagefault (e.g., from an NFS server mapping).
 			 */
 			size_t cbytes;
 
 			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
 			    max_blksz);
 			ASSERT(abuf != NULL);
 			ASSERT(arc_buf_size(abuf) == max_blksz);
 			if (error = uiocopy(abuf->b_data, max_blksz,
 			    UIO_WRITE, uio, &cbytes)) {
 				dmu_return_arcbuf(abuf);
 				break;
 			}
 			ASSERT(cbytes == max_blksz);
 		}
 
 		/*
 		 * Start a transaction.
 		 */
 		tx = dmu_tx_create(zfsvfs->z_os);
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 		dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
 		zfs_sa_upgrade_txholds(tx, zp);
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error) {
 			dmu_tx_abort(tx);
 			if (abuf != NULL)
 				dmu_return_arcbuf(abuf);
 			break;
 		}
 
 		/*
 		 * If zfs_range_lock() over-locked we grow the blocksize
 		 * and then reduce the lock range.  This will only happen
 		 * on the first iteration since zfs_range_reduce() will
 		 * shrink down r_len to the appropriate size.
 		 */
 		if (rl->r_len == UINT64_MAX) {
 			uint64_t new_blksz;
 
 			if (zp->z_blksz > max_blksz) {
 				/*
 				 * File's blocksize is already larger than the
 				 * "recordsize" property.  Only let it grow to
 				 * the next power of 2.
 				 */
 				ASSERT(!ISP2(zp->z_blksz));
 				new_blksz = MIN(end_size,
 				    1 << highbit64(zp->z_blksz));
 			} else {
 				new_blksz = MIN(end_size, max_blksz);
 			}
 			zfs_grow_blocksize(zp, new_blksz, tx);
 			zfs_range_reduce(rl, woff, n);
 		}
 
 		/*
 		 * XXX - should we really limit each write to z_max_blksz?
 		 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
 		 */
 		nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
 
 		if (woff + nbytes > zp->z_size)
 			vnode_pager_setsize(vp, woff + nbytes);
 
 		if (abuf == NULL) {
 			tx_bytes = uio->uio_resid;
 			error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
 			    uio, nbytes, tx);
 			tx_bytes -= uio->uio_resid;
 		} else {
 			tx_bytes = nbytes;
 			ASSERT(xuio == NULL || tx_bytes == aiov->iov_len);
 			/*
 			 * If this is not a full block write, but we are
 			 * extending the file past EOF and this data starts
 			 * block-aligned, use assign_arcbuf().  Otherwise,
 			 * write via dmu_write().
 			 */
 			if (tx_bytes < max_blksz && (!write_eof ||
 			    aiov->iov_base != abuf->b_data)) {
 				ASSERT(xuio);
 				dmu_write(zfsvfs->z_os, zp->z_id, woff,
 				    aiov->iov_len, aiov->iov_base, tx);
 				dmu_return_arcbuf(abuf);
 				xuio_stat_wbuf_copied();
 			} else {
 				ASSERT(xuio || tx_bytes == max_blksz);
 				dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
 				    woff, abuf, tx);
 			}
 			ASSERT(tx_bytes <= uio->uio_resid);
 			uioskip(uio, tx_bytes);
 		}
 		if (tx_bytes && vn_has_cached_data(vp)) {
 			update_pages(vp, woff, tx_bytes, zfsvfs->z_os,
 			    zp->z_id, uio->uio_segflg, tx);
 		}
 
 		/*
 		 * If we made no progress, we're done.  If we made even
 		 * partial progress, update the znode and ZIL accordingly.
 		 */
 		if (tx_bytes == 0) {
 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
 			    (void *)&zp->z_size, sizeof (uint64_t), tx);
 			dmu_tx_commit(tx);
 			ASSERT(error != 0);
 			break;
 		}
 
 		/*
 		 * Clear Set-UID/Set-GID bits on successful write if not
 		 * privileged and at least one of the excute bits is set.
 		 *
 		 * It would be nice to to this after all writes have
 		 * been done, but that would still expose the ISUID/ISGID
 		 * to another app after the partial write is committed.
 		 *
 		 * Note: we don't call zfs_fuid_map_id() here because
 		 * user 0 is not an ephemeral uid.
 		 */
 		mutex_enter(&zp->z_acl_lock);
 		if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
 		    (S_IXUSR >> 6))) != 0 &&
 		    (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
 		    secpolicy_vnode_setid_retain(vp, cr,
 		    (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) {
 			uint64_t newmode;
 			zp->z_mode &= ~(S_ISUID | S_ISGID);
 			newmode = zp->z_mode;
 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
 			    (void *)&newmode, sizeof (uint64_t), tx);
 		}
 		mutex_exit(&zp->z_acl_lock);
 
 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
 		    B_TRUE);
 
 		/*
 		 * Update the file size (zp_size) if it has changed;
 		 * account for possible concurrent updates.
 		 */
 		while ((end_size = zp->z_size) < uio->uio_loffset) {
 			(void) atomic_cas_64(&zp->z_size, end_size,
 			    uio->uio_loffset);
 #ifdef illumos
 			ASSERT(error == 0);
 #else
 			ASSERT(error == 0 || error == EFAULT);
 #endif
 		}
 		/*
 		 * If we are replaying and eof is non zero then force
 		 * the file size to the specified eof. Note, there's no
 		 * concurrency during replay.
 		 */
 		if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
 			zp->z_size = zfsvfs->z_replay_eof;
 
 		if (error == 0)
 			error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 		else
 			(void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 
 		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
 		dmu_tx_commit(tx);
 
 		if (error != 0)
 			break;
 		ASSERT(tx_bytes == nbytes);
 		n -= nbytes;
 
 #ifdef illumos
 		if (!xuio && n > 0)
 			uio_prefaultpages(MIN(n, max_blksz), uio);
 #endif
 	}
 
 	zfs_range_unlock(rl);
 
 	/*
 	 * If we're in replay mode, or we made no progress, return error.
 	 * Otherwise, it's at least a partial write, so it's successful.
 	 */
 	if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 #ifdef __FreeBSD__
 	/*
 	 * EFAULT means that at least one page of the source buffer was not
 	 * available.  VFS will re-try remaining I/O upon this error.
 	 */
 	if (error == EFAULT) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 #endif
 
 	if (ioflag & (FSYNC | FDSYNC) ||
 	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, zp->z_id);
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 /* ARGSUSED */
 void
 zfs_get_done(zgd_t *zgd, int error)
 {
 	znode_t *zp = zgd->zgd_private;
 	objset_t *os = zp->z_zfsvfs->z_os;
 
 	if (zgd->zgd_db)
 		dmu_buf_rele(zgd->zgd_db, zgd);
 
 	zfs_range_unlock(zgd->zgd_rl);
 
 	/*
 	 * Release the vnode asynchronously as we currently have the
 	 * txg stopped from syncing.
 	 */
 	VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
 
 	kmem_free(zgd, sizeof (zgd_t));
 }
 
 #ifdef DEBUG
 static int zil_fault_io = 0;
 #endif
 
 /*
  * Get data to generate a TX_WRITE intent log record.
  */
 int
 zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
 {
 	zfsvfs_t *zfsvfs = arg;
 	objset_t *os = zfsvfs->z_os;
 	znode_t *zp;
 	uint64_t object = lr->lr_foid;
 	uint64_t offset = lr->lr_offset;
 	uint64_t size = lr->lr_length;
 	dmu_buf_t *db;
 	zgd_t *zgd;
 	int error = 0;
 
 	ASSERT3P(lwb, !=, NULL);
 	ASSERT3P(zio, !=, NULL);
 	ASSERT3U(size, !=, 0);
 
 	/*
 	 * Nothing to do if the file has been removed
 	 */
 	if (zfs_zget(zfsvfs, object, &zp) != 0)
 		return (SET_ERROR(ENOENT));
 	if (zp->z_unlinked) {
 		/*
 		 * Release the vnode asynchronously as we currently have the
 		 * txg stopped from syncing.
 		 */
 		VN_RELE_ASYNC(ZTOV(zp),
 		    dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
 		return (SET_ERROR(ENOENT));
 	}
 
 	zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
 	zgd->zgd_lwb = lwb;
 	zgd->zgd_private = zp;
 
 	/*
 	 * Write records come in two flavors: immediate and indirect.
 	 * For small writes it's cheaper to store the data with the
 	 * log record (immediate); for large writes it's cheaper to
 	 * sync the data and get a pointer to it (indirect) so that
 	 * we don't have to write the data twice.
 	 */
 	if (buf != NULL) { /* immediate write */
 		zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER);
 		/* test for truncation needs to be done while range locked */
 		if (offset >= zp->z_size) {
 			error = SET_ERROR(ENOENT);
 		} else {
 			error = dmu_read(os, object, offset, size, buf,
 			    DMU_READ_NO_PREFETCH);
 		}
 		ASSERT(error == 0 || error == ENOENT);
 	} else { /* indirect write */
 		/*
 		 * Have to lock the whole block to ensure when it's
 		 * written out and its checksum is being calculated
 		 * that no one can change the data. We need to re-check
 		 * blocksize after we get the lock in case it's changed!
 		 */
 		for (;;) {
 			uint64_t blkoff;
 			size = zp->z_blksz;
 			blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
 			offset -= blkoff;
 			zgd->zgd_rl = zfs_range_lock(zp, offset, size,
 			    RL_READER);
 			if (zp->z_blksz == size)
 				break;
 			offset += blkoff;
 			zfs_range_unlock(zgd->zgd_rl);
 		}
 		/* test for truncation needs to be done while range locked */
 		if (lr->lr_offset >= zp->z_size)
 			error = SET_ERROR(ENOENT);
 #ifdef DEBUG
 		if (zil_fault_io) {
 			error = SET_ERROR(EIO);
 			zil_fault_io = 0;
 		}
 #endif
 		if (error == 0)
 			error = dmu_buf_hold(os, object, offset, zgd, &db,
 			    DMU_READ_NO_PREFETCH);
 
 		if (error == 0) {
 			blkptr_t *bp = &lr->lr_blkptr;
 
 			zgd->zgd_db = db;
 			zgd->zgd_bp = bp;
 
 			ASSERT(db->db_offset == offset);
 			ASSERT(db->db_size == size);
 
 			error = dmu_sync(zio, lr->lr_common.lrc_txg,
 			    zfs_get_done, zgd);
 			ASSERT(error || lr->lr_length <= size);
 
 			/*
 			 * On success, we need to wait for the write I/O
 			 * initiated by dmu_sync() to complete before we can
 			 * release this dbuf.  We will finish everything up
 			 * in the zfs_get_done() callback.
 			 */
 			if (error == 0)
 				return (0);
 
 			if (error == EALREADY) {
 				lr->lr_common.lrc_txtype = TX_WRITE2;
 				/*
 				 * TX_WRITE2 relies on the data previously
 				 * written by the TX_WRITE that caused
 				 * EALREADY.  We zero out the BP because
 				 * it is the old, currently-on-disk BP.
 				 */
 				zgd->zgd_bp = NULL;
 				BP_ZERO(bp);
 				error = 0;
 			}
 		}
 	}
 
 	zfs_get_done(zgd, error);
 
 	return (error);
 }
 
 /*ARGSUSED*/
 static int
 zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr,
     caller_context_t *ct)
 {
 	znode_t *zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	int error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	if (flag & V_ACE_MASK)
 		error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
 	else
 		error = zfs_zaccess_rwx(zp, mode, flag, cr);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 static int
 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
 {
 	int error;
 
 	*vpp = arg;
 	error = vn_lock(*vpp, lkflags);
 	if (error != 0)
 		vrele(*vpp);
 	return (error);
 }
 
 static int
 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags)
 {
 	znode_t *zdp = VTOZ(dvp);
 	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
 	int error;
 	int ltype;
 
 	ASSERT_VOP_LOCKED(dvp, __func__);
 #ifdef DIAGNOSTIC
 	if ((zdp->z_pflags & ZFS_XATTR) == 0)
 		VERIFY(!RRM_LOCK_HELD(&zfsvfs->z_teardown_lock));
 #endif
 
 	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
 		ASSERT3P(dvp, ==, vp);
 		vref(dvp);
 		ltype = lkflags & LK_TYPE_MASK;
 		if (ltype != VOP_ISLOCKED(dvp)) {
 			if (ltype == LK_EXCLUSIVE)
 				vn_lock(dvp, LK_UPGRADE | LK_RETRY);
 			else /* if (ltype == LK_SHARED) */
 				vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
 
 			/*
 			 * Relock for the "." case could leave us with
 			 * reclaimed vnode.
 			 */
 			if (dvp->v_iflag & VI_DOOMED) {
 				vrele(dvp);
 				return (SET_ERROR(ENOENT));
 			}
 		}
 		return (0);
 	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
 		/*
 		 * Note that in this case, dvp is the child vnode, and we
 		 * are looking up the parent vnode - exactly reverse from
 		 * normal operation.  Unlocking dvp requires some rather
 		 * tricky unlock/relock dance to prevent mp from being freed;
 		 * use vn_vget_ino_gen() which takes care of all that.
 		 *
 		 * XXX Note that there is a time window when both vnodes are
 		 * unlocked.  It is possible, although highly unlikely, that
 		 * during that window the parent-child relationship between
 		 * the vnodes may change, for example, get reversed.
 		 * In that case we would have a wrong lock order for the vnodes.
 		 * All other filesystems seem to ignore this problem, so we
 		 * do the same here.
 		 * A potential solution could be implemented as follows:
 		 * - using LK_NOWAIT when locking the second vnode and retrying
 		 *   if necessary
 		 * - checking that the parent-child relationship still holds
 		 *   after locking both vnodes and retrying if it doesn't
 		 */
 		error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp);
 		return (error);
 	} else {
 		error = vn_lock(vp, lkflags);
 		if (error != 0)
 			vrele(vp);
 		return (error);
 	}
 }
 
 /*
  * Lookup an entry in a directory, or an extended attribute directory.
  * If it exists, return a held vnode reference for it.
  *
  *	IN:	dvp	- vnode of directory to search.
  *		nm	- name of entry to lookup.
  *		pnp	- full pathname to lookup [UNUSED].
  *		flags	- LOOKUP_XATTR set if looking for an attribute.
  *		rdir	- root directory vnode [UNUSED].
  *		cr	- credentials of caller.
  *		ct	- caller context
  *
  *	OUT:	vpp	- vnode of located entry, NULL if not found.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	NA
  */
 /* ARGSUSED */
 static int
 zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp,
     int nameiop, cred_t *cr, kthread_t *td, int flags)
 {
 	znode_t *zdp = VTOZ(dvp);
 	znode_t *zp;
 	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
 	int	error = 0;
 
 	/*
 	 * Fast path lookup, however we must skip DNLC lookup
 	 * for case folding or normalizing lookups because the
 	 * DNLC code only stores the passed in name.  This means
 	 * creating 'a' and removing 'A' on a case insensitive
 	 * file system would work, but DNLC still thinks 'a'
 	 * exists and won't let you create it again on the next
 	 * pass through fast path.
 	 */
 	if (!(flags & LOOKUP_XATTR)) {
 		if (dvp->v_type != VDIR) {
 			return (SET_ERROR(ENOTDIR));
 		} else if (zdp->z_sa_hdl == NULL) {
 			return (SET_ERROR(EIO));
 		}
 	}
 
 	DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm);
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zdp);
 
 	*vpp = NULL;
 
 	if (flags & LOOKUP_XATTR) {
 #ifdef TODO
 		/*
 		 * If the xattr property is off, refuse the lookup request.
 		 */
 		if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) {
 			ZFS_EXIT(zfsvfs);
 			return (SET_ERROR(EINVAL));
 		}
 #endif
 
 		/*
 		 * We don't allow recursive attributes..
 		 * Maybe someday we will.
 		 */
 		if (zdp->z_pflags & ZFS_XATTR) {
 			ZFS_EXIT(zfsvfs);
 			return (SET_ERROR(EINVAL));
 		}
 
 		if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 
 		/*
 		 * Do we have permission to get into attribute directory?
 		 */
 		if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0,
 		    B_FALSE, cr)) {
 			vrele(*vpp);
 			*vpp = NULL;
 		}
 
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * Check accessibility of directory.
 	 */
 	if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 
 
 	/*
 	 * First handle the special cases.
 	 */
 	if ((cnp->cn_flags & ISDOTDOT) != 0) {
 		/*
 		 * If we are a snapshot mounted under .zfs, return
 		 * the vp for the snapshot directory.
 		 */
 		if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
 			struct componentname cn;
 			vnode_t *zfsctl_vp;
 			int ltype;
 
 			ZFS_EXIT(zfsvfs);
 			ltype = VOP_ISLOCKED(dvp);
 			VOP_UNLOCK(dvp, 0);
 			error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
 			    &zfsctl_vp);
 			if (error == 0) {
 				cn.cn_nameptr = "snapshot";
 				cn.cn_namelen = strlen(cn.cn_nameptr);
 				cn.cn_nameiop = cnp->cn_nameiop;
 				cn.cn_flags = cnp->cn_flags & ~ISDOTDOT;
 				cn.cn_lkflags = cnp->cn_lkflags;
 				error = VOP_LOOKUP(zfsctl_vp, vpp, &cn);
 				vput(zfsctl_vp);
 			}
 			vn_lock(dvp, ltype | LK_RETRY);
 			return (error);
 		}
 	}
 	if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
 		ZFS_EXIT(zfsvfs);
 		if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
 			return (SET_ERROR(ENOTSUP));
 		error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
 		return (error);
 	}
 
 	/*
 	 * The loop is retry the lookup if the parent-child relationship
 	 * changes during the dot-dot locking complexities.
 	 */
 	for (;;) {
 		uint64_t parent;
 
 		error = zfs_dirlook(zdp, nm, &zp);
 		if (error == 0)
 			*vpp = ZTOV(zp);
 
 		ZFS_EXIT(zfsvfs);
 		if (error != 0)
 			break;
 
 		error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags);
 		if (error != 0) {
 			/*
 			 * If we've got a locking error, then the vnode
 			 * got reclaimed because of a force unmount.
 			 * We never enter doomed vnodes into the name cache.
 			 */
 			*vpp = NULL;
 			return (error);
 		}
 
 		if ((cnp->cn_flags & ISDOTDOT) == 0)
 			break;
 
 		ZFS_ENTER(zfsvfs);
 		if (zdp->z_sa_hdl == NULL) {
 			error = SET_ERROR(EIO);
 		} else {
 			error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 			    &parent, sizeof (parent));
 		}
 		if (error != 0) {
 			ZFS_EXIT(zfsvfs);
 			vput(ZTOV(zp));
 			break;
 		}
 		if (zp->z_id == parent) {
 			ZFS_EXIT(zfsvfs);
 			break;
 		}
 		vput(ZTOV(zp));
 	}
 
 out:
 	if (error != 0)
 		*vpp = NULL;
 
 	/* Translate errors and add SAVENAME when needed. */
 	if (cnp->cn_flags & ISLASTCN) {
 		switch (nameiop) {
 		case CREATE:
 		case RENAME:
 			if (error == ENOENT) {
 				error = EJUSTRETURN;
 				cnp->cn_flags |= SAVENAME;
 				break;
 			}
 			/* FALLTHROUGH */
 		case DELETE:
 			if (error == 0)
 				cnp->cn_flags |= SAVENAME;
 			break;
 		}
 	}
 
 	/* Insert name into cache (as non-existent) if appropriate. */
 	if (zfsvfs->z_use_namecache &&
 	    error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(dvp, NULL, cnp);
 
 	/* Insert name into cache if appropriate. */
 	if (zfsvfs->z_use_namecache &&
 	    error == 0 && (cnp->cn_flags & MAKEENTRY)) {
 		if (!(cnp->cn_flags & ISLASTCN) ||
 		    (nameiop != DELETE && nameiop != RENAME)) {
 			cache_enter(dvp, *vpp, cnp);
 		}
 	}
 
 	return (error);
 }
 
 /*
  * Attempt to create a new entry in a directory.  If the entry
  * already exists, truncate the file if permissible, else return
  * an error.  Return the vp of the created or trunc'd file.
  *
  *	IN:	dvp	- vnode of directory to put new file entry in.
  *		name	- name of new file entry.
  *		vap	- attributes of new file.
  *		excl	- flag indicating exclusive or non-exclusive mode.
  *		mode	- mode to open file with.
  *		cr	- credentials of caller.
  *		flag	- large file flag [UNUSED].
  *		ct	- caller context
  *		vsecp	- ACL to be set
  *
  *	OUT:	vpp	- vnode of created or trunc'd entry.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dvp - ctime|mtime updated if new entry created
  *	 vp - ctime|mtime always, atime if new
  */
 
 /* ARGSUSED */
 static int
 zfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode,
     vnode_t **vpp, cred_t *cr, kthread_t *td)
 {
 	znode_t		*zp, *dzp = VTOZ(dvp);
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zilog_t		*zilog;
 	objset_t	*os;
 	dmu_tx_t	*tx;
 	int		error;
 	ksid_t		*ksid;
 	uid_t		uid;
 	gid_t		gid = crgetgid(cr);
 	zfs_acl_ids_t   acl_ids;
 	boolean_t	fuid_dirtied;
 	void		*vsecp = NULL;
 	int		flag = 0;
 	uint64_t	txtype;
 
 	/*
 	 * If we have an ephemeral id, ACL, or XVATTR then
 	 * make sure file system is at proper version
 	 */
 
 	ksid = crgetsid(cr, KSID_OWNER);
 	if (ksid)
 		uid = ksid_getid(ksid);
 	else
 		uid = crgetuid(cr);
 
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    (vsecp || (vap->va_mask & AT_XVATTR) ||
 	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
 		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	os = zfsvfs->z_os;
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	if (vap->va_mask & AT_XVATTR) {
 		if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_type)) != 0) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 	}
 
 	*vpp = NULL;
 
 	if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
 		vap->va_mode &= ~S_ISVTX;
 
 	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
 	if (error) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	ASSERT3P(zp, ==, NULL);
 
 	/*
 	 * Create a new file object and update the directory
 	 * to reference it.
 	 */
 	if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
 		goto out;
 	}
 
 	/*
 	 * We only support the creation of regular files in
 	 * extended attribute directories.
 	 */
 
 	if ((dzp->z_pflags & ZFS_XATTR) &&
 	    (vap->va_type != VREG)) {
 		error = SET_ERROR(EINVAL);
 		goto out;
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0, vap,
 	    cr, vsecp, &acl_ids)) != 0)
 		goto out;
 
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
 		zfs_acl_ids_free(&acl_ids);
 		error = SET_ERROR(EDQUOT);
 		goto out;
 	}
 
 	getnewvnode_reserve(1);
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 	    ZFS_SA_BASE_ATTR_SIZE);
 
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
 	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
 	if (!zfsvfs->z_use_sa &&
 	    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 		    0, acl_ids.z_aclp->z_acl_bytes);
 	}
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		getnewvnode_drop_reserve();
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	(void) zfs_link_create(dzp, name, zp, tx, ZNEW);
 	txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
 	zfs_log_create(zilog, tx, txtype, dzp, zp, name,
 	    vsecp, acl_ids.z_fuidp, vap);
 	zfs_acl_ids_free(&acl_ids);
 	dmu_tx_commit(tx);
 
 	getnewvnode_drop_reserve();
 
 out:
 	if (error == 0) {
 		*vpp = ZTOV(zp);
 	}
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Remove an entry from a directory.
  *
  *	IN:	dvp	- vnode of directory to remove entry from.
  *		name	- name of entry to remove.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *		flags	- case flags
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dvp - ctime|mtime
  *	 vp - ctime (if nlink > 0)
  */
 
 /*ARGSUSED*/
 static int
 zfs_remove(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr)
 {
 	znode_t		*dzp = VTOZ(dvp);
 	znode_t		*zp = VTOZ(vp);
 	znode_t		*xzp;
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zilog_t		*zilog;
 	uint64_t	acl_obj, xattr_obj;
 	uint64_t	obj = 0;
 	dmu_tx_t	*tx;
 	boolean_t	unlinked, toobig = FALSE;
 	uint64_t	txtype;
 	int		error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	ZFS_VERIFY_ZP(zp);
 	zilog = zfsvfs->z_log;
 	zp = VTOZ(vp);
 
 	xattr_obj = 0;
 	xzp = NULL;
 
 	if (error = zfs_zaccess_delete(dzp, zp, cr)) {
 		goto out;
 	}
 
 	/*
 	 * Need to use rmdir for removing directories.
 	 */
 	if (vp->v_type == VDIR) {
 		error = SET_ERROR(EPERM);
 		goto out;
 	}
 
 	vnevent_remove(vp, dvp, name, ct);
 
 	obj = zp->z_id;
 
 	/* are there any extended attributes? */
 	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
 	    &xattr_obj, sizeof (xattr_obj));
 	if (error == 0 && xattr_obj) {
 		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
 		ASSERT0(error);
 	}
 
 	/*
 	 * We may delete the znode now, or we may put it in the unlinked set;
 	 * it depends on whether we're the last link, and on whether there are
 	 * other holds on the vnode.  So we dmu_tx_hold() the right things to
 	 * allow for either case.
 	 */
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 	zfs_sa_upgrade_txholds(tx, dzp);
 
 	if (xzp) {
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
 	}
 
 	/* charge as an update -- would be nice not to charge at all */
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 
 	/*
 	 * Mark this transaction as typically resulting in a net free of space
 	 */
 	dmu_tx_mark_netfree(tx);
 
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * Remove the directory entry.
 	 */
 	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked);
 
 	if (error) {
 		dmu_tx_commit(tx);
 		goto out;
 	}
 
 	if (unlinked) {
 		zfs_unlinked_add(zp, tx);
 		vp->v_vflag |= VV_NOSYNC;
 	}
 
 	txtype = TX_REMOVE;
 	zfs_log_remove(zilog, tx, txtype, dzp, name, obj);
 
 	dmu_tx_commit(tx);
 out:
 
 	if (xzp)
 		vrele(ZTOV(xzp));
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Create a new directory and insert it into dvp using the name
  * provided.  Return a pointer to the inserted directory.
  *
  *	IN:	dvp	- vnode of directory to add subdir to.
  *		dirname	- name of new directory.
  *		vap	- attributes of new directory.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *		flags	- case flags
  *		vsecp	- ACL to be set
  *
  *	OUT:	vpp	- vnode of created directory.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dvp - ctime|mtime updated
  *	 vp - ctime|mtime|atime updated
  */
 /*ARGSUSED*/
 static int
 zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr)
 {
 	znode_t		*zp, *dzp = VTOZ(dvp);
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zilog_t		*zilog;
 	uint64_t	txtype;
 	dmu_tx_t	*tx;
 	int		error;
 	ksid_t		*ksid;
 	uid_t		uid;
 	gid_t		gid = crgetgid(cr);
 	zfs_acl_ids_t   acl_ids;
 	boolean_t	fuid_dirtied;
 
 	ASSERT(vap->va_type == VDIR);
 
 	/*
 	 * If we have an ephemeral id, ACL, or XVATTR then
 	 * make sure file system is at proper version
 	 */
 
 	ksid = crgetsid(cr, KSID_OWNER);
 	if (ksid)
 		uid = ksid_getid(ksid);
 	else
 		uid = crgetuid(cr);
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    ((vap->va_mask & AT_XVATTR) ||
 	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
 		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	zilog = zfsvfs->z_log;
 
 	if (dzp->z_pflags & ZFS_XATTR) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(dirname,
 	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	if (vap->va_mask & AT_XVATTR) {
 		if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_type)) != 0) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
 	    NULL, &acl_ids)) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * First make sure the new directory doesn't exist.
 	 *
 	 * Existence is checked first to make sure we don't return
 	 * EACCES instead of EEXIST which can cause some applications
 	 * to fail.
 	 */
 	*vpp = NULL;
 
 	if (error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW)) {
 		zfs_acl_ids_free(&acl_ids);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	ASSERT3P(zp, ==, NULL);
 
 	if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) {
 		zfs_acl_ids_free(&acl_ids);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
 		zfs_acl_ids_free(&acl_ids);
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EDQUOT));
 	}
 
 	/*
 	 * Add a new entry to the directory.
 	 */
 	getnewvnode_reserve(1);
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
 	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
 		    acl_ids.z_aclp->z_acl_bytes);
 	}
 
 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 	    ZFS_SA_BASE_ATTR_SIZE);
 
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		getnewvnode_drop_reserve();
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * Create new node.
 	 */
 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	/*
 	 * Now put new name in parent dir.
 	 */
 	(void) zfs_link_create(dzp, dirname, zp, tx, ZNEW);
 
 	*vpp = ZTOV(zp);
 
 	txtype = zfs_log_create_txtype(Z_DIR, NULL, vap);
 	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL,
 	    acl_ids.z_fuidp, vap);
 
 	zfs_acl_ids_free(&acl_ids);
 
 	dmu_tx_commit(tx);
 
 	getnewvnode_drop_reserve();
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 /*
  * Remove a directory subdir entry.  If the current working
  * directory is the same as the subdir to be removed, the
  * remove will fail.
  *
  *	IN:	dvp	- vnode of directory to remove from.
  *		name	- name of directory to be removed.
  *		cwd	- vnode of current working directory.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *		flags	- case flags
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dvp - ctime|mtime updated
  */
 /*ARGSUSED*/
 static int
 zfs_rmdir(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr)
 {
 	znode_t		*dzp = VTOZ(dvp);
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zilog_t		*zilog;
 	dmu_tx_t	*tx;
 	int		error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	ZFS_VERIFY_ZP(zp);
 	zilog = zfsvfs->z_log;
 
 
 	if (error = zfs_zaccess_delete(dzp, zp, cr)) {
 		goto out;
 	}
 
 	if (vp->v_type != VDIR) {
 		error = SET_ERROR(ENOTDIR);
 		goto out;
 	}
 
 	vnevent_rmdir(vp, dvp, name, ct);
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 	zfs_sa_upgrade_txholds(tx, zp);
 	zfs_sa_upgrade_txholds(tx, dzp);
 	dmu_tx_mark_netfree(tx);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	cache_purge(dvp);
 
 	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
 
 	if (error == 0) {
 		uint64_t txtype = TX_RMDIR;
 		zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT);
 	}
 
 	dmu_tx_commit(tx);
 
 	cache_purge(vp);
 out:
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Read as many directory entries as will fit into the provided
  * buffer from the given directory cursor position (specified in
  * the uio structure).
  *
  *	IN:	vp	- vnode of directory to read.
  *		uio	- structure supplying read location, range info,
  *			  and return buffer.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *		flags	- case flags
  *
  *	OUT:	uio	- updated offset and range, buffer filled.
  *		eofp	- set to true if end-of-file detected.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - atime updated
  *
  * Note that the low 4 bits of the cookie returned by zap is always zero.
  * This allows us to use the low range for "special" directory entries:
  * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
  * we use the offset 2 for the '.zfs' directory.
  */
 /* ARGSUSED */
 static int
 zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies)
 {
 	znode_t		*zp = VTOZ(vp);
 	iovec_t		*iovp;
 	edirent_t	*eodp;
 	dirent64_t	*odp;
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	objset_t	*os;
 	caddr_t		outbuf;
 	size_t		bufsize;
 	zap_cursor_t	zc;
 	zap_attribute_t	zap;
 	uint_t		bytes_wanted;
 	uint64_t	offset; /* must be unsigned; checks for < 1 */
 	uint64_t	parent;
 	int		local_eof;
 	int		outcount;
 	int		error;
 	uint8_t		prefetch;
 	boolean_t	check_sysattrs;
 	uint8_t		type;
 	int		ncooks;
 	u_long		*cooks = NULL;
 	int		flags = 0;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 	    &parent, sizeof (parent))) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * If we are not given an eof variable,
 	 * use a local one.
 	 */
 	if (eofp == NULL)
 		eofp = &local_eof;
 
 	/*
 	 * Check for valid iov_len.
 	 */
 	if (uio->uio_iov->iov_len <= 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * Quit if directory has been removed (posix)
 	 */
 	if ((*eofp = zp->z_unlinked) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (0);
 	}
 
 	error = 0;
 	os = zfsvfs->z_os;
 	offset = uio->uio_loffset;
 	prefetch = zp->z_zn_prefetch;
 
 	/*
 	 * Initialize the iterator cursor.
 	 */
 	if (offset <= 3) {
 		/*
 		 * Start iteration from the beginning of the directory.
 		 */
 		zap_cursor_init(&zc, os, zp->z_id);
 	} else {
 		/*
 		 * The offset is a serialized cursor.
 		 */
 		zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
 	}
 
 	/*
 	 * Get space to change directory entries into fs independent format.
 	 */
 	iovp = uio->uio_iov;
 	bytes_wanted = iovp->iov_len;
 	if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) {
 		bufsize = bytes_wanted;
 		outbuf = kmem_alloc(bufsize, KM_SLEEP);
 		odp = (struct dirent64 *)outbuf;
 	} else {
 		bufsize = bytes_wanted;
 		outbuf = NULL;
 		odp = (struct dirent64 *)iovp->iov_base;
 	}
 	eodp = (struct edirent *)odp;
 
 	if (ncookies != NULL) {
 		/*
 		 * Minimum entry size is dirent size and 1 byte for a file name.
 		 */
 		ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1);
 		cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK);
 		*cookies = cooks;
 		*ncookies = ncooks;
 	}
 	/*
 	 * If this VFS supports the system attribute view interface; and
 	 * we're looking at an extended attribute directory; and we care
 	 * about normalization conflicts on this vfs; then we must check
 	 * for normalization conflicts with the sysattr name space.
 	 */
 #ifdef TODO
 	check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
 	    (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm &&
 	    (flags & V_RDDIR_ENTFLAGS);
 #else
 	check_sysattrs = 0;
 #endif
 
 	/*
 	 * Transform to file-system independent format
 	 */
 	outcount = 0;
 	while (outcount < bytes_wanted) {
 		ino64_t objnum;
 		ushort_t reclen;
 		off64_t *next = NULL;
 
 		/*
 		 * Special case `.', `..', and `.zfs'.
 		 */
 		if (offset == 0) {
 			(void) strcpy(zap.za_name, ".");
 			zap.za_normalization_conflict = 0;
 			objnum = zp->z_id;
 			type = DT_DIR;
 		} else if (offset == 1) {
 			(void) strcpy(zap.za_name, "..");
 			zap.za_normalization_conflict = 0;
 			objnum = parent;
 			type = DT_DIR;
 		} else if (offset == 2 && zfs_show_ctldir(zp)) {
 			(void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
 			zap.za_normalization_conflict = 0;
 			objnum = ZFSCTL_INO_ROOT;
 			type = DT_DIR;
 		} else {
 			/*
 			 * Grab next entry.
 			 */
 			if (error = zap_cursor_retrieve(&zc, &zap)) {
 				if ((*eofp = (error == ENOENT)) != 0)
 					break;
 				else
 					goto update;
 			}
 
 			if (zap.za_integer_length != 8 ||
 			    zap.za_num_integers != 1) {
 				cmn_err(CE_WARN, "zap_readdir: bad directory "
 				    "entry, obj = %lld, offset = %lld\n",
 				    (u_longlong_t)zp->z_id,
 				    (u_longlong_t)offset);
 				error = SET_ERROR(ENXIO);
 				goto update;
 			}
 
 			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
 			/*
 			 * MacOS X can extract the object type here such as:
 			 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
 			 */
 			type = ZFS_DIRENT_TYPE(zap.za_first_integer);
 
 			if (check_sysattrs && !zap.za_normalization_conflict) {
 #ifdef TODO
 				zap.za_normalization_conflict =
 				    xattr_sysattr_casechk(zap.za_name);
 #else
 				panic("%s:%u: TODO", __func__, __LINE__);
 #endif
 			}
 		}
 
 		if (flags & V_RDDIR_ACCFILTER) {
 			/*
 			 * If we have no access at all, don't include
 			 * this entry in the returned information
 			 */
 			znode_t	*ezp;
 			if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0)
 				goto skip_entry;
 			if (!zfs_has_access(ezp, cr)) {
 				vrele(ZTOV(ezp));
 				goto skip_entry;
 			}
 			vrele(ZTOV(ezp));
 		}
 
 		if (flags & V_RDDIR_ENTFLAGS)
 			reclen = EDIRENT_RECLEN(strlen(zap.za_name));
 		else
 			reclen = DIRENT64_RECLEN(strlen(zap.za_name));
 
 		/*
 		 * Will this entry fit in the buffer?
 		 */
 		if (outcount + reclen > bufsize) {
 			/*
 			 * Did we manage to fit anything in the buffer?
 			 */
 			if (!outcount) {
 				error = SET_ERROR(EINVAL);
 				goto update;
 			}
 			break;
 		}
 		if (flags & V_RDDIR_ENTFLAGS) {
 			/*
 			 * Add extended flag entry:
 			 */
 			eodp->ed_ino = objnum;
 			eodp->ed_reclen = reclen;
 			/* NOTE: ed_off is the offset for the *next* entry. */
 			next = &eodp->ed_off;
 			eodp->ed_eflags = zap.za_normalization_conflict ?
 			    ED_CASE_CONFLICT : 0;
 			(void) strncpy(eodp->ed_name, zap.za_name,
 			    EDIRENT_NAMELEN(reclen));
 			eodp = (edirent_t *)((intptr_t)eodp + reclen);
 		} else {
 			/*
 			 * Add normal entry:
 			 */
 			odp->d_ino = objnum;
 			odp->d_reclen = reclen;
 			odp->d_namlen = strlen(zap.za_name);
 			/* NOTE: d_off is the offset for the *next* entry. */
 			next = &odp->d_off;
 			(void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
 			odp->d_type = type;
 			dirent_terminate(odp);
 			odp = (dirent64_t *)((intptr_t)odp + reclen);
 		}
 		outcount += reclen;
 
 		ASSERT(outcount <= bufsize);
 
 		/* Prefetch znode */
 		if (prefetch)
 			dmu_prefetch(os, objnum, 0, 0, 0,
 			    ZIO_PRIORITY_SYNC_READ);
 
 	skip_entry:
 		/*
 		 * Move to the next entry, fill in the previous offset.
 		 */
 		if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
 			zap_cursor_advance(&zc);
 			offset = zap_cursor_serialize(&zc);
 		} else {
 			offset += 1;
 		}
 
 		/* Fill the offset right after advancing the cursor. */
 		if (next != NULL)
 			*next = offset;
 		if (cooks != NULL) {
 			*cooks++ = offset;
 			ncooks--;
 			KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
 		}
 	}
 	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
 
 	/* Subtract unused cookies */
 	if (ncookies != NULL)
 		*ncookies -= ncooks;
 
 	if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) {
 		iovp->iov_base += outcount;
 		iovp->iov_len -= outcount;
 		uio->uio_resid -= outcount;
 	} else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) {
 		/*
 		 * Reset the pointer.
 		 */
 		offset = uio->uio_loffset;
 	}
 
 update:
 	zap_cursor_fini(&zc);
 	if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
 		kmem_free(outbuf, bufsize);
 
 	if (error == ENOENT)
 		error = 0;
 
 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
 
 	uio->uio_loffset = offset;
 	ZFS_EXIT(zfsvfs);
 	if (error != 0 && cookies != NULL) {
 		free(*cookies, M_TEMP);
 		*cookies = NULL;
 		*ncookies = 0;
 	}
 	return (error);
 }
 
 ulong_t zfs_fsync_sync_cnt = 4;
 
 static int
 zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
 {
 	znode_t	*zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 
 	(void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
 
 	if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
 		ZFS_ENTER(zfsvfs);
 		ZFS_VERIFY_ZP(zp);
 		zil_commit(zfsvfs->z_log, zp->z_id);
 		ZFS_EXIT(zfsvfs);
 	}
 	return (0);
 }
 
 
 /*
  * Get the requested file attributes and place them in the provided
  * vattr structure.
  *
  *	IN:	vp	- vnode of file.
  *		vap	- va_mask identifies requested attributes.
  *			  If AT_XVATTR set, then optional attrs are requested
  *		flags	- ATTR_NOACLCHECK (CIFS server context)
  *		cr	- credentials of caller.
  *		ct	- caller context
  *
  *	OUT:	vap	- attribute values.
  *
  *	RETURN:	0 (always succeeds).
  */
 /* ARGSUSED */
 static int
 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
     caller_context_t *ct)
 {
 	znode_t *zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	int	error = 0;
 	uint32_t blksize;
 	u_longlong_t nblocks;
 	uint64_t mtime[2], ctime[2], crtime[2], rdev;
 	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
 	xoptattr_t *xoap = NULL;
 	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 	sa_bulk_attr_t bulk[4];
 	int count = 0;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
 
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
 		    &rdev, 8);
 
 	if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
 	 * Also, if we are the owner don't bother, since owner should
 	 * always be allowed to read basic attributes of file.
 	 */
 	if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
 	    (vap->va_uid != crgetuid(cr))) {
 		if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
 		    skipaclchk, cr)) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 	}
 
 	/*
 	 * Return all attributes.  It's cheaper to provide the answer
 	 * than to determine whether we were asked the question.
 	 */
 
 	vap->va_type = IFTOVT(zp->z_mode);
 	vap->va_mode = zp->z_mode & ~S_IFMT;
 #ifdef illumos
 	vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev;
 #else
 	vn_fsid(vp, vap);
 #endif
 	vap->va_nodeid = zp->z_id;
 	vap->va_nlink = zp->z_links;
 	if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) &&
 	    zp->z_links < ZFS_LINK_MAX)
 		vap->va_nlink++;
 	vap->va_size = zp->z_size;
 #ifdef illumos
 	vap->va_rdev = vp->v_rdev;
 #else
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		vap->va_rdev = zfs_cmpldev(rdev);
 #endif
 	vap->va_seq = zp->z_seq;
 	vap->va_flags = 0;	/* FreeBSD: Reset chflags(2) flags. */
      	vap->va_filerev = zp->z_seq;
 
 	/*
 	 * Add in any requested optional attributes and the create time.
 	 * Also set the corresponding bits in the returned attribute bitmap.
 	 */
 	if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
 		if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
 			xoap->xoa_archive =
 			    ((zp->z_pflags & ZFS_ARCHIVE) != 0);
 			XVA_SET_RTN(xvap, XAT_ARCHIVE);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
 			xoap->xoa_readonly =
 			    ((zp->z_pflags & ZFS_READONLY) != 0);
 			XVA_SET_RTN(xvap, XAT_READONLY);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
 			xoap->xoa_system =
 			    ((zp->z_pflags & ZFS_SYSTEM) != 0);
 			XVA_SET_RTN(xvap, XAT_SYSTEM);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
 			xoap->xoa_hidden =
 			    ((zp->z_pflags & ZFS_HIDDEN) != 0);
 			XVA_SET_RTN(xvap, XAT_HIDDEN);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
 			xoap->xoa_nounlink =
 			    ((zp->z_pflags & ZFS_NOUNLINK) != 0);
 			XVA_SET_RTN(xvap, XAT_NOUNLINK);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
 			xoap->xoa_immutable =
 			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
 			XVA_SET_RTN(xvap, XAT_IMMUTABLE);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
 			xoap->xoa_appendonly =
 			    ((zp->z_pflags & ZFS_APPENDONLY) != 0);
 			XVA_SET_RTN(xvap, XAT_APPENDONLY);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
 			xoap->xoa_nodump =
 			    ((zp->z_pflags & ZFS_NODUMP) != 0);
 			XVA_SET_RTN(xvap, XAT_NODUMP);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
 			xoap->xoa_opaque =
 			    ((zp->z_pflags & ZFS_OPAQUE) != 0);
 			XVA_SET_RTN(xvap, XAT_OPAQUE);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
 			xoap->xoa_av_quarantined =
 			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
 			XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
 			xoap->xoa_av_modified =
 			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
 			XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
 		    vp->v_type == VREG) {
 			zfs_sa_get_scanstamp(zp, xvap);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
 			xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
 			XVA_SET_RTN(xvap, XAT_REPARSE);
 		}
 		if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
 			xoap->xoa_generation = zp->z_gen;
 			XVA_SET_RTN(xvap, XAT_GEN);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
 			xoap->xoa_offline =
 			    ((zp->z_pflags & ZFS_OFFLINE) != 0);
 			XVA_SET_RTN(xvap, XAT_OFFLINE);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
 			xoap->xoa_sparse =
 			    ((zp->z_pflags & ZFS_SPARSE) != 0);
 			XVA_SET_RTN(xvap, XAT_SPARSE);
 		}
 	}
 
 	ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
 	ZFS_TIME_DECODE(&vap->va_mtime, mtime);
 	ZFS_TIME_DECODE(&vap->va_ctime, ctime);
 	ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
 
 
 	sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
 	vap->va_blksize = blksize;
 	vap->va_bytes = nblocks << 9;	/* nblocks * 512 */
 
 	if (zp->z_blksz == 0) {
 		/*
 		 * Block size hasn't been set; suggest maximal I/O transfers.
 		 */
 		vap->va_blksize = zfsvfs->z_max_blksz;
 	}
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 /*
  * Set the file attributes to the values contained in the
  * vattr structure.
  *
  *	IN:	vp	- vnode of file to be modified.
  *		vap	- new attribute values.
  *			  If AT_XVATTR set, then optional attrs are being set
  *		flags	- ATTR_UTIME set if non-default time values provided.
  *			- ATTR_NOACLCHECK (CIFS context only).
  *		cr	- credentials of caller.
  *		ct	- caller context
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - ctime updated, mtime updated if size changed.
  */
 /* ARGSUSED */
 static int
 zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
     caller_context_t *ct)
 {
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	zilog_t		*zilog;
 	dmu_tx_t	*tx;
 	vattr_t		oldva;
 	xvattr_t	tmpxvattr;
 	uint_t		mask = vap->va_mask;
 	uint_t		saved_mask = 0;
 	uint64_t	saved_mode;
 	int		trim_mask = 0;
 	uint64_t	new_mode;
 	uint64_t	new_uid, new_gid;
 	uint64_t	xattr_obj;
 	uint64_t	mtime[2], ctime[2];
 	znode_t		*attrzp;
 	int		need_policy = FALSE;
 	int		err, err2;
 	zfs_fuid_info_t *fuidp = NULL;
 	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
 	xoptattr_t	*xoap;
 	zfs_acl_t	*aclp;
 	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 	boolean_t	fuid_dirtied = B_FALSE;
 	sa_bulk_attr_t	bulk[7], xattr_bulk[7];
 	int		count = 0, xattr_count = 0;
 
 	if (mask == 0)
 		return (0);
 
 	if (mask & AT_NOSET)
 		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	zilog = zfsvfs->z_log;
 
 	/*
 	 * Make sure that if we have ephemeral uid/gid or xvattr specified
 	 * that file system is at proper version level
 	 */
 
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
 	    ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
 	    (mask & AT_XVATTR))) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (mask & AT_SIZE && vp->v_type == VDIR) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EISDIR));
 	}
 
 	if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * If this is an xvattr_t, then get a pointer to the structure of
 	 * optional attributes.  If this is NULL, then we have a vattr_t.
 	 */
 	xoap = xva_getxoptattr(xvap);
 
 	xva_init(&tmpxvattr);
 
 	/*
 	 * Immutable files can only alter immutable bit and atime
 	 */
 	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
 	    ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
 	    ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	/*
 	 * Note: ZFS_READONLY is handled in zfs_zaccess_common.
 	 */
 
 	/*
 	 * Verify timestamps doesn't overflow 32 bits.
 	 * ZFS can handle large timestamps, but 32bit syscalls can't
 	 * handle times greater than 2039.  This check should be removed
 	 * once large timestamps are fully supported.
 	 */
 	if (mask & (AT_ATIME | AT_MTIME)) {
 		if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
 		    ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
 			ZFS_EXIT(zfsvfs);
 			return (SET_ERROR(EOVERFLOW));
 		}
 	}
 	if (xoap && (mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
 	    TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EOVERFLOW));
 	}
 
 	attrzp = NULL;
 	aclp = NULL;
 
 	/* Can this be moved to before the top label? */
 	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EROFS));
 	}
 
 	/*
 	 * First validate permissions
 	 */
 
 	if (mask & AT_SIZE) {
 		/*
 		 * XXX - Note, we are not providing any open
 		 * mode flags here (like FNDELAY), so we may
 		 * block if there are locks present... this
 		 * should be addressed in openat().
 		 */
 		/* XXX - would it be OK to generate a log record here? */
 		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
 		if (err) {
 			ZFS_EXIT(zfsvfs);
 			return (err);
 		}
 	}
 
 	if (mask & (AT_ATIME|AT_MTIME) ||
 	    ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
 	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
 	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
 	    XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
 	    XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
 	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
 	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
 		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
 		    skipaclchk, cr);
 	}
 
 	if (mask & (AT_UID|AT_GID)) {
 		int	idmask = (mask & (AT_UID|AT_GID));
 		int	take_owner;
 		int	take_group;
 
 		/*
 		 * NOTE: even if a new mode is being set,
 		 * we may clear S_ISUID/S_ISGID bits.
 		 */
 
 		if (!(mask & AT_MODE))
 			vap->va_mode = zp->z_mode;
 
 		/*
 		 * Take ownership or chgrp to group we are a member of
 		 */
 
 		take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
 		take_group = (mask & AT_GID) &&
 		    zfs_groupmember(zfsvfs, vap->va_gid, cr);
 
 		/*
 		 * If both AT_UID and AT_GID are set then take_owner and
 		 * take_group must both be set in order to allow taking
 		 * ownership.
 		 *
 		 * Otherwise, send the check through secpolicy_vnode_setattr()
 		 *
 		 */
 
 		if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
 		    ((idmask == AT_UID) && take_owner) ||
 		    ((idmask == AT_GID) && take_group)) {
 			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
 			    skipaclchk, cr) == 0) {
 				/*
 				 * Remove setuid/setgid for non-privileged users
 				 */
 				secpolicy_setid_clear(vap, vp, cr);
 				trim_mask = (mask & (AT_UID|AT_GID));
 			} else {
 				need_policy =  TRUE;
 			}
 		} else {
 			need_policy =  TRUE;
 		}
 	}
 
 	oldva.va_mode = zp->z_mode;
 	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
 	if (mask & AT_XVATTR) {
 		/*
 		 * Update xvattr mask to include only those attributes
 		 * that are actually changing.
 		 *
 		 * the bits will be restored prior to actually setting
 		 * the attributes so the caller thinks they were set.
 		 */
 		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
 			if (xoap->xoa_appendonly !=
 			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
 				XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
 			if (xoap->xoa_nounlink !=
 			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
 				XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
 			if (xoap->xoa_immutable !=
 			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
 				XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
 			if (xoap->xoa_nodump !=
 			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_NODUMP);
 				XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
 			if (xoap->xoa_av_modified !=
 			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
 				XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
 			if ((vp->v_type != VREG &&
 			    xoap->xoa_av_quarantined) ||
 			    xoap->xoa_av_quarantined !=
 			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
 				XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
 			ZFS_EXIT(zfsvfs);
 			return (SET_ERROR(EPERM));
 		}
 
 		if (need_policy == FALSE &&
 		    (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
 		    XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
 			need_policy = TRUE;
 		}
 	}
 
 	if (mask & AT_MODE) {
 		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
 			err = secpolicy_setid_setsticky_clear(vp, vap,
 			    &oldva, cr);
 			if (err) {
 				ZFS_EXIT(zfsvfs);
 				return (err);
 			}
 			trim_mask |= AT_MODE;
 		} else {
 			need_policy = TRUE;
 		}
 	}
 
 	if (need_policy) {
 		/*
 		 * If trim_mask is set then take ownership
 		 * has been granted or write_acl is present and user
 		 * has the ability to modify mode.  In that case remove
 		 * UID|GID and or MODE from mask so that
 		 * secpolicy_vnode_setattr() doesn't revoke it.
 		 */
 
 		if (trim_mask) {
 			saved_mask = vap->va_mask;
 			vap->va_mask &= ~trim_mask;
 			if (trim_mask & AT_MODE) {
 				/*
 				 * Save the mode, as secpolicy_vnode_setattr()
 				 * will overwrite it with ova.va_mode.
 				 */
 				saved_mode = vap->va_mode;
 			}
 		}
 		err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
 		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
 		if (err) {
 			ZFS_EXIT(zfsvfs);
 			return (err);
 		}
 
 		if (trim_mask) {
 			vap->va_mask |= saved_mask;
 			if (trim_mask & AT_MODE) {
 				/*
 				 * Recover the mode after
 				 * secpolicy_vnode_setattr().
 				 */
 				vap->va_mode = saved_mode;
 			}
 		}
 	}
 
 	/*
 	 * secpolicy_vnode_setattr, or take ownership may have
 	 * changed va_mask
 	 */
 	mask = vap->va_mask;
 
 	if ((mask & (AT_UID | AT_GID))) {
 		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
 		    &xattr_obj, sizeof (xattr_obj));
 
 		if (err == 0 && xattr_obj) {
 			err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
 			if (err == 0) {
 				err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE);
 				if (err != 0)
 					vrele(ZTOV(attrzp));
 			}
 			if (err)
 				goto out2;
 		}
 		if (mask & AT_UID) {
 			new_uid = zfs_fuid_create(zfsvfs,
 			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
 			if (new_uid != zp->z_uid &&
 			    zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) {
 				if (attrzp)
 					vput(ZTOV(attrzp));
 				err = SET_ERROR(EDQUOT);
 				goto out2;
 			}
 		}
 
 		if (mask & AT_GID) {
 			new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
 			    cr, ZFS_GROUP, &fuidp);
 			if (new_gid != zp->z_gid &&
 			    zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) {
 				if (attrzp)
 					vput(ZTOV(attrzp));
 				err = SET_ERROR(EDQUOT);
 				goto out2;
 			}
 		}
 	}
 	tx = dmu_tx_create(zfsvfs->z_os);
 
 	if (mask & AT_MODE) {
 		uint64_t pmode = zp->z_mode;
 		uint64_t acl_obj;
 		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
 
 		if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
 		    !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
 			err = SET_ERROR(EPERM);
 			goto out;
 		}
 
 		if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))
 			goto out;
 
 		if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
 			/*
 			 * Are we upgrading ACL from old V0 format
 			 * to V1 format?
 			 */
 			if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
 			    zfs_znode_acl_version(zp) ==
 			    ZFS_ACL_VERSION_INITIAL) {
 				dmu_tx_hold_free(tx, acl_obj, 0,
 				    DMU_OBJECT_END);
 				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 				    0, aclp->z_acl_bytes);
 			} else {
 				dmu_tx_hold_write(tx, acl_obj, 0,
 				    aclp->z_acl_bytes);
 			}
 		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 			    0, aclp->z_acl_bytes);
 		}
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 	} else {
 		if ((mask & AT_XVATTR) &&
 		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 		else
 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	}
 
 	if (attrzp) {
 		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
 	}
 
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 
 	zfs_sa_upgrade_txholds(tx, zp);
 
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err)
 		goto out;
 
 	count = 0;
 	/*
 	 * Set each attribute requested.
 	 * We group settings according to the locks they need to acquire.
 	 *
 	 * Note: you cannot set ctime directly, although it will be
 	 * updated as a side-effect of calling this function.
 	 */
 
 	if (mask & (AT_UID|AT_GID|AT_MODE))
 		mutex_enter(&zp->z_acl_lock);
 
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 	    &zp->z_pflags, sizeof (zp->z_pflags));
 
 	if (attrzp) {
 		if (mask & (AT_UID|AT_GID|AT_MODE))
 			mutex_enter(&attrzp->z_acl_lock);
 		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 		    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
 		    sizeof (attrzp->z_pflags));
 	}
 
 	if (mask & (AT_UID|AT_GID)) {
 
 		if (mask & AT_UID) {
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
 			    &new_uid, sizeof (new_uid));
 			zp->z_uid = new_uid;
 			if (attrzp) {
 				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 				    SA_ZPL_UID(zfsvfs), NULL, &new_uid,
 				    sizeof (new_uid));
 				attrzp->z_uid = new_uid;
 			}
 		}
 
 		if (mask & AT_GID) {
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
 			    NULL, &new_gid, sizeof (new_gid));
 			zp->z_gid = new_gid;
 			if (attrzp) {
 				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 				    SA_ZPL_GID(zfsvfs), NULL, &new_gid,
 				    sizeof (new_gid));
 				attrzp->z_gid = new_gid;
 			}
 		}
 		if (!(mask & AT_MODE)) {
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
 			    NULL, &new_mode, sizeof (new_mode));
 			new_mode = zp->z_mode;
 		}
 		err = zfs_acl_chown_setattr(zp);
 		ASSERT(err == 0);
 		if (attrzp) {
 			err = zfs_acl_chown_setattr(attrzp);
 			ASSERT(err == 0);
 		}
 	}
 
 	if (mask & AT_MODE) {
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
 		    &new_mode, sizeof (new_mode));
 		zp->z_mode = new_mode;
 		ASSERT3U((uintptr_t)aclp, !=, 0);
 		err = zfs_aclset_common(zp, aclp, cr, tx);
 		ASSERT0(err);
 		if (zp->z_acl_cached)
 			zfs_acl_free(zp->z_acl_cached);
 		zp->z_acl_cached = aclp;
 		aclp = NULL;
 	}
 
 
 	if (mask & AT_ATIME) {
 		ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
 		    &zp->z_atime, sizeof (zp->z_atime));
 	}
 
 	if (mask & AT_MTIME) {
 		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
 		    mtime, sizeof (mtime));
 	}
 
 	/* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
 	if (mask & AT_SIZE && !(mask & AT_MTIME)) {
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
 		    NULL, mtime, sizeof (mtime));
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
 		    &ctime, sizeof (ctime));
 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
 		    B_TRUE);
 	} else if (mask != 0) {
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
 		    &ctime, sizeof (ctime));
 		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
 		    B_TRUE);
 		if (attrzp) {
 			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 			    SA_ZPL_CTIME(zfsvfs), NULL,
 			    &ctime, sizeof (ctime));
 			zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
 			    mtime, ctime, B_TRUE);
 		}
 	}
 	/*
 	 * Do this after setting timestamps to prevent timestamp
 	 * update from toggling bit
 	 */
 
 	if (xoap && (mask & AT_XVATTR)) {
 
 		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
 			xoap->xoa_createtime = vap->va_birthtime;
 		/*
 		 * restore trimmed off masks
 		 * so that return masks can be set for caller.
 		 */
 
 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
 			XVA_SET_REQ(xvap, XAT_APPENDONLY);
 		}
 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
 			XVA_SET_REQ(xvap, XAT_NOUNLINK);
 		}
 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
 			XVA_SET_REQ(xvap, XAT_IMMUTABLE);
 		}
 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
 			XVA_SET_REQ(xvap, XAT_NODUMP);
 		}
 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
 			XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
 		}
 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
 			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
 			ASSERT(vp->v_type == VREG);
 
 		zfs_xvattr_set(zp, xvap, tx);
 	}
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	if (mask != 0)
 		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
 
 	if (mask & (AT_UID|AT_GID|AT_MODE))
 		mutex_exit(&zp->z_acl_lock);
 
 	if (attrzp) {
 		if (mask & (AT_UID|AT_GID|AT_MODE))
 			mutex_exit(&attrzp->z_acl_lock);
 	}
 out:
 	if (err == 0 && attrzp) {
 		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
 		    xattr_count, tx);
 		ASSERT(err2 == 0);
 	}
 
 	if (attrzp)
 		vput(ZTOV(attrzp));
 
 	if (aclp)
 		zfs_acl_free(aclp);
 
 	if (fuidp) {
 		zfs_fuid_info_free(fuidp);
 		fuidp = NULL;
 	}
 
 	if (err) {
 		dmu_tx_abort(tx);
 	} else {
 		err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 		dmu_tx_commit(tx);
 	}
 
 out2:
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (err);
 }
 
 /*
  * We acquire all but fdvp locks using non-blocking acquisitions.  If we
  * fail to acquire any lock in the path we will drop all held locks,
  * acquire the new lock in a blocking fashion, and then release it and
  * restart the rename.  This acquire/release step ensures that we do not
  * spin on a lock waiting for release.  On error release all vnode locks
  * and decrement references the way tmpfs_rename() would do.
  */
 static int
 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
     struct vnode *tdvp, struct vnode **tvpp,
     const struct componentname *scnp, const struct componentname *tcnp)
 {
 	zfsvfs_t	*zfsvfs;
 	struct vnode	*nvp, *svp, *tvp;
 	znode_t		*sdzp, *tdzp, *szp, *tzp;
 	const char	*snm = scnp->cn_nameptr;
 	const char	*tnm = tcnp->cn_nameptr;
 	int error;
 
 	VOP_UNLOCK(tdvp, 0);
 	if (*tvpp != NULL && *tvpp != tdvp)
 		VOP_UNLOCK(*tvpp, 0);
 
 relock:
 	error = vn_lock(sdvp, LK_EXCLUSIVE);
 	if (error)
 		goto out;
 	sdzp = VTOZ(sdvp);
 
 	error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
 	if (error != 0) {
 		VOP_UNLOCK(sdvp, 0);
 		if (error != EBUSY)
 			goto out;
 		error = vn_lock(tdvp, LK_EXCLUSIVE);
 		if (error)
 			goto out;
 		VOP_UNLOCK(tdvp, 0);
 		goto relock;
 	}
 	tdzp = VTOZ(tdvp);
 
 	/*
 	 * Before using sdzp and tdzp we must ensure that they are live.
 	 * As a porting legacy from illumos we have two things to worry
 	 * about.  One is typical for FreeBSD and it is that the vnode is
 	 * not reclaimed (doomed).  The other is that the znode is live.
 	 * The current code can invalidate the znode without acquiring the
 	 * corresponding vnode lock if the object represented by the znode
 	 * and vnode is no longer valid after a rollback or receive operation.
 	 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock
 	 * that protects the znodes from the invalidation.
 	 */
 	zfsvfs = sdzp->z_zfsvfs;
 	ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
 	ZFS_ENTER(zfsvfs);
 
 	/*
 	 * We can not use ZFS_VERIFY_ZP() here because it could directly return
 	 * bypassing the cleanup code in the case of an error.
 	 */
 	if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) {
 		ZFS_EXIT(zfsvfs);
 		VOP_UNLOCK(sdvp, 0);
 		VOP_UNLOCK(tdvp, 0);
 		error = SET_ERROR(EIO);
 		goto out;
 	}
 
 	/*
 	 * Re-resolve svp to be certain it still exists and fetch the
 	 * correct vnode.
 	 */
 	error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS);
 	if (error != 0) {
 		/* Source entry invalid or not there. */
 		ZFS_EXIT(zfsvfs);
 		VOP_UNLOCK(sdvp, 0);
 		VOP_UNLOCK(tdvp, 0);
 		if ((scnp->cn_flags & ISDOTDOT) != 0 ||
 		    (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
 			error = SET_ERROR(EINVAL);
 		goto out;
 	}
 	svp = ZTOV(szp);
 
 	/*
 	 * Re-resolve tvp, if it disappeared we just carry on.
 	 */
 	error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0);
 	if (error != 0) {
 		ZFS_EXIT(zfsvfs);
 		VOP_UNLOCK(sdvp, 0);
 		VOP_UNLOCK(tdvp, 0);
 		vrele(svp);
 		if ((tcnp->cn_flags & ISDOTDOT) != 0)
 			error = SET_ERROR(EINVAL);
 		goto out;
 	}
 	if (tzp != NULL)
 		tvp = ZTOV(tzp);
 	else
 		tvp = NULL;
 
 	/*
 	 * At present the vnode locks must be acquired before z_teardown_lock,
 	 * although it would be more logical to use the opposite order.
 	 */
 	ZFS_EXIT(zfsvfs);
 
 	/*
 	 * Now try acquire locks on svp and tvp.
 	 */
 	nvp = svp;
 	error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
 	if (error != 0) {
 		VOP_UNLOCK(sdvp, 0);
 		VOP_UNLOCK(tdvp, 0);
 		if (tvp != NULL)
 			vrele(tvp);
 		if (error != EBUSY) {
 			vrele(nvp);
 			goto out;
 		}
 		error = vn_lock(nvp, LK_EXCLUSIVE);
 		if (error != 0) {
 			vrele(nvp);
 			goto out;
 		}
 		VOP_UNLOCK(nvp, 0);
 		/*
 		 * Concurrent rename race.
 		 * XXX ?
 		 */
 		if (nvp == tdvp) {
 			vrele(nvp);
 			error = SET_ERROR(EINVAL);
 			goto out;
 		}
 		vrele(*svpp);
 		*svpp = nvp;
 		goto relock;
 	}
 	vrele(*svpp);
 	*svpp = nvp;
 
 	if (*tvpp != NULL)
 		vrele(*tvpp);
 	*tvpp = NULL;
 	if (tvp != NULL) {
 		nvp = tvp;
 		error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
 		if (error != 0) {
 			VOP_UNLOCK(sdvp, 0);
 			VOP_UNLOCK(tdvp, 0);
 			VOP_UNLOCK(*svpp, 0);
 			if (error != EBUSY) {
 				vrele(nvp);
 				goto out;
 			}
 			error = vn_lock(nvp, LK_EXCLUSIVE);
 			if (error != 0) {
 				vrele(nvp);
 				goto out;
 			}
 			vput(nvp);
 			goto relock;
 		}
 		*tvpp = nvp;
 	}
 
 	return (0);
 
 out:
 	return (error);
 }
 
 /*
  * Note that we must use VRELE_ASYNC in this function as it walks
  * up the directory tree and vrele may need to acquire an exclusive
  * lock if a last reference to a vnode is dropped.
  */
 static int
 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp)
 {
 	zfsvfs_t	*zfsvfs;
 	znode_t		*zp, *zp1;
 	uint64_t	parent;
 	int		error;
 
 	zfsvfs = tdzp->z_zfsvfs;
 	if (tdzp == szp)
 		return (SET_ERROR(EINVAL));
 	if (tdzp == sdzp)
 		return (0);
 	if (tdzp->z_id == zfsvfs->z_root)
 		return (0);
 	zp = tdzp;
 	for (;;) {
 		ASSERT(!zp->z_unlinked);
 		if ((error = sa_lookup(zp->z_sa_hdl,
 		    SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
 			break;
 
 		if (parent == szp->z_id) {
 			error = SET_ERROR(EINVAL);
 			break;
 		}
 		if (parent == zfsvfs->z_root)
 			break;
 		if (parent == sdzp->z_id)
 			break;
 
 		error = zfs_zget(zfsvfs, parent, &zp1);
 		if (error != 0)
 			break;
 
 		if (zp != tdzp)
 			VN_RELE_ASYNC(ZTOV(zp),
 			    dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
 		zp = zp1;
 	}
 
 	if (error == ENOTDIR)
 		panic("checkpath: .. not a directory\n");
 	if (zp != tdzp)
 		VN_RELE_ASYNC(ZTOV(zp),
 		    dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
 	return (error);
 }
 
 /*
  * Move an entry from the provided source directory to the target
  * directory.  Change the entry name as indicated.
  *
  *	IN:	sdvp	- Source directory containing the "old entry".
  *		snm	- Old entry name.
  *		tdvp	- Target directory to contain the "new entry".
  *		tnm	- New entry name.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *		flags	- case flags
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	sdvp,tdvp - ctime|mtime updated
  */
 /*ARGSUSED*/
 static int
 zfs_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
     vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
     cred_t *cr)
 {
 	zfsvfs_t	*zfsvfs;
 	znode_t		*sdzp, *tdzp, *szp, *tzp;
 	zilog_t		*zilog = NULL;
 	dmu_tx_t	*tx;
 	char		*snm = scnp->cn_nameptr;
 	char		*tnm = tcnp->cn_nameptr;
 	int		error = 0;
 
 	/* Reject renames across filesystems. */
 	if ((*svpp)->v_mount != tdvp->v_mount ||
 	    ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) {
 		error = SET_ERROR(EXDEV);
 		goto out;
 	}
 
 	if (zfsctl_is_node(tdvp)) {
 		error = SET_ERROR(EXDEV);
 		goto out;
 	}
 
 	/*
 	 * Lock all four vnodes to ensure safety and semantics of renaming.
 	 */
 	error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp);
 	if (error != 0) {
 		/* no vnodes are locked in the case of error here */
 		return (error);
 	}
 
 	tdzp = VTOZ(tdvp);
 	sdzp = VTOZ(sdvp);
 	zfsvfs = tdzp->z_zfsvfs;
 	zilog = zfsvfs->z_log;
 
 	/*
 	 * After we re-enter ZFS_ENTER() we will have to revalidate all
 	 * znodes involved.
 	 */
 	ZFS_ENTER(zfsvfs);
 
 	if (zfsvfs->z_utf8 && u8_validate(tnm,
 	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		error = SET_ERROR(EILSEQ);
 		goto unlockout;
 	}
 
 	/* If source and target are the same file, there is nothing to do. */
 	if ((*svpp) == (*tvpp)) {
 		error = 0;
 		goto unlockout;
 	}
 
 	if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
 	    ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
 	    (*tvpp)->v_mountedhere != NULL)) {
 		error = SET_ERROR(EXDEV);
 		goto unlockout;
 	}
 
 	/*
 	 * We can not use ZFS_VERIFY_ZP() here because it could directly return
 	 * bypassing the cleanup code in the case of an error.
 	 */
 	if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) {
 		error = SET_ERROR(EIO);
 		goto unlockout;
 	}
 
 	szp = VTOZ(*svpp);
 	tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
 	if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) {
 		error = SET_ERROR(EIO);
 		goto unlockout;
 	}
 
 	/*
 	 * This is to prevent the creation of links into attribute space
 	 * by renaming a linked file into/outof an attribute directory.
 	 * See the comment in zfs_link() for why this is considered bad.
 	 */
 	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
 		error = SET_ERROR(EINVAL);
 		goto unlockout;
 	}
 
 	/*
 	 * Must have write access at the source to remove the old entry
 	 * and write access at the target to create the new entry.
 	 * Note that if target and source are the same, this can be
 	 * done in a single check.
 	 */
 	if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))
 		goto unlockout;
 
 	if ((*svpp)->v_type == VDIR) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') ||
 		    sdzp == szp ||
 		    (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
 			error = EINVAL;
 			goto unlockout;
 		}
 
 		/*
 		 * Check to make sure rename is valid.
 		 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
 		 */
 		if (error = zfs_rename_check(szp, sdzp, tdzp))
 			goto unlockout;
 	}
 
 	/*
 	 * Does target exist?
 	 */
 	if (tzp) {
 		/*
 		 * Source and target must be the same type.
 		 */
 		if ((*svpp)->v_type == VDIR) {
 			if ((*tvpp)->v_type != VDIR) {
 				error = SET_ERROR(ENOTDIR);
 				goto unlockout;
 			} else {
 				cache_purge(tdvp);
 				if (sdvp != tdvp)
 					cache_purge(sdvp);
 			}
 		} else {
 			if ((*tvpp)->v_type == VDIR) {
 				error = SET_ERROR(EISDIR);
 				goto unlockout;
 			}
 		}
 	}
 
 	vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
 	if (tzp)
 		vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
 
 	/*
 	 * notify the target directory if it is not the same
 	 * as source directory.
 	 */
 	if (tdvp != sdvp) {
 		vnevent_rename_dest_dir(tdvp, ct);
 	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
 	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
 	if (sdzp != tdzp) {
 		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
 		zfs_sa_upgrade_txholds(tx, tdzp);
 	}
 	if (tzp) {
 		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
 		zfs_sa_upgrade_txholds(tx, tzp);
 	}
 
 	zfs_sa_upgrade_txholds(tx, szp);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		goto unlockout;
 	}
 
 
 	if (tzp)	/* Attempt to remove the existing target */
 		error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
 
 	if (error == 0) {
 		error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING);
 		if (error == 0) {
 			szp->z_pflags |= ZFS_AV_MODIFIED;
 
 			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
 			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
 			ASSERT0(error);
 
 			error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING,
 			    NULL);
 			if (error == 0) {
 				zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
 				    snm, tdzp, tnm, szp);
 
 				/*
 				 * Update path information for the target vnode
 				 */
 				vn_renamepath(tdvp, *svpp, tnm, strlen(tnm));
 			} else {
 				/*
 				 * At this point, we have successfully created
 				 * the target name, but have failed to remove
 				 * the source name.  Since the create was done
 				 * with the ZRENAMING flag, there are
 				 * complications; for one, the link count is
 				 * wrong.  The easiest way to deal with this
 				 * is to remove the newly created target, and
 				 * return the original error.  This must
 				 * succeed; fortunately, it is very unlikely to
 				 * fail, since we just created it.
 				 */
 				VERIFY3U(zfs_link_destroy(tdzp, tnm, szp, tx,
 				    ZRENAMING, NULL), ==, 0);
 			}
 		}
 		if (error == 0) {
 			cache_purge(*svpp);
 			if (*tvpp != NULL)
 				cache_purge(*tvpp);
 			cache_purge_negative(tdvp);
 		}
 	}
 
 	dmu_tx_commit(tx);
 
 unlockout:			/* all 4 vnodes are locked, ZFS_ENTER called */
 	ZFS_EXIT(zfsvfs);
 	VOP_UNLOCK(*svpp, 0);
 	VOP_UNLOCK(sdvp, 0);
 
 out:				/* original two vnodes are locked */
 	if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	if (*tvpp != NULL)
 		VOP_UNLOCK(*tvpp, 0);
 	if (tdvp != *tvpp)
 		VOP_UNLOCK(tdvp, 0);
 	return (error);
 }
 
 /*
  * Insert the indicated symbolic reference entry into the directory.
  *
  *	IN:	dvp	- Directory to contain new symbolic link.
  *		link	- Name for new symlink entry.
  *		vap	- Attributes of new entry.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *		flags	- case flags
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dvp - ctime|mtime updated
  */
 /*ARGSUSED*/
 static int
 zfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link,
     cred_t *cr, kthread_t *td)
 {
 	znode_t		*zp, *dzp = VTOZ(dvp);
 	dmu_tx_t	*tx;
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zilog_t		*zilog;
 	uint64_t	len = strlen(link);
 	int		error;
 	zfs_acl_ids_t	acl_ids;
 	boolean_t	fuid_dirtied;
 	uint64_t	txtype = TX_SYMLINK;
 	int		flags = 0;
 
 	ASSERT(vap->va_type == VLNK);
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	if (len > MAXPATHLEN) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(ENAMETOOLONG));
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0,
 	    vap, cr, NULL, &acl_ids)) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * Attempt to lock directory; fail if entry already exists.
 	 */
 	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
 		zfs_acl_ids_free(&acl_ids);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
 		zfs_acl_ids_free(&acl_ids);
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EDQUOT));
 	}
 
 	getnewvnode_reserve(1);
 	tx = dmu_tx_create(zfsvfs->z_os);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 	    ZFS_SA_BASE_ATTR_SIZE + len);
 	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
 	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
 		    acl_ids.z_aclp->z_acl_bytes);
 	}
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		getnewvnode_drop_reserve();
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * Create a new object for the symlink.
 	 * for version 4 ZPL datsets the symlink will be an SA attribute
 	 */
 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	if (zp->z_is_sa)
 		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
 		    link, len, tx);
 	else
 		zfs_sa_symlink(zp, link, len, tx);
 
 	zp->z_size = len;
 	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
 	    &zp->z_size, sizeof (zp->z_size), tx);
 	/*
 	 * Insert the new object into the directory.
 	 */
 	(void) zfs_link_create(dzp, name, zp, tx, ZNEW);
 
 	zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
 	*vpp = ZTOV(zp);
 
 	zfs_acl_ids_free(&acl_ids);
 
 	dmu_tx_commit(tx);
 
 	getnewvnode_drop_reserve();
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Return, in the buffer contained in the provided uio structure,
  * the symbolic path referred to by vp.
  *
  *	IN:	vp	- vnode of symbolic link.
  *		uio	- structure to contain the link path.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *
  *	OUT:	uio	- structure containing the link path.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - atime updated
  */
 /* ARGSUSED */
 static int
 zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct)
 {
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	int		error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	if (zp->z_is_sa)
 		error = sa_lookup_uio(zp->z_sa_hdl,
 		    SA_ZPL_SYMLINK(zfsvfs), uio);
 	else
 		error = zfs_sa_readlink(zp, uio);
 
 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Insert a new entry into directory tdvp referencing svp.
  *
  *	IN:	tdvp	- Directory to contain new entry.
  *		svp	- vnode of new entry.
  *		name	- name of new entry.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	tdvp - ctime|mtime updated
  *	 svp - ctime updated
  */
 /* ARGSUSED */
 static int
 zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
     caller_context_t *ct, int flags)
 {
 	znode_t		*dzp = VTOZ(tdvp);
 	znode_t		*tzp, *szp;
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zilog_t		*zilog;
 	dmu_tx_t	*tx;
 	int		error;
 	uint64_t	parent;
 	uid_t		owner;
 
 	ASSERT(tdvp->v_type == VDIR);
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	zilog = zfsvfs->z_log;
 
 	/*
 	 * POSIX dictates that we return EPERM here.
 	 * Better choices include ENOTSUP or EISDIR.
 	 */
 	if (svp->v_type == VDIR) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	szp = VTOZ(svp);
 	ZFS_VERIFY_ZP(szp);
 
 	if (szp->z_pflags & (ZFS_APPENDONLY | ZFS_IMMUTABLE | ZFS_READONLY)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	/* Prevent links to .zfs/shares files */
 
 	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 	    &parent, sizeof (uint64_t))) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	if (parent == zfsvfs->z_shares_dir) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(name,
 	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	/*
 	 * We do not support links between attributes and non-attributes
 	 * because of the potential security risk of creating links
 	 * into "normal" file space in order to circumvent restrictions
 	 * imposed in attribute space.
 	 */
 	if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 
 	owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
 	if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * Attempt to lock directory; fail if entry already exists.
 	 */
 	error = zfs_dirent_lookup(dzp, name, &tzp, ZNEW);
 	if (error) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
 	zfs_sa_upgrade_txholds(tx, szp);
 	zfs_sa_upgrade_txholds(tx, dzp);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	error = zfs_link_create(dzp, name, szp, tx, 0);
 
 	if (error == 0) {
 		uint64_t txtype = TX_LINK;
 		zfs_log_link(zilog, tx, txtype, dzp, szp, name);
 	}
 
 	dmu_tx_commit(tx);
 
 	if (error == 0) {
 		vnevent_link(svp, ct);
 	}
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 
 /*ARGSUSED*/
 void
 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
 {
 	znode_t	*zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	int error;
 
 	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
 	if (zp->z_sa_hdl == NULL) {
 		/*
 		 * The fs has been unmounted, or we did a
 		 * suspend/resume and this file no longer exists.
 		 */
 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
 		vrecycle(vp);
 		return;
 	}
 
 	if (zp->z_unlinked) {
 		/*
 		 * Fast path to recycle a vnode of a removed file.
 		 */
 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
 		vrecycle(vp);
 		return;
 	}
 
 	if (zp->z_atime_dirty && zp->z_unlinked == 0) {
 		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
 
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 		zfs_sa_upgrade_txholds(tx, zp);
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error) {
 			dmu_tx_abort(tx);
 		} else {
 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
 			    (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
 			zp->z_atime_dirty = 0;
 			dmu_tx_commit(tx);
 		}
 	}
 	rw_exit(&zfsvfs->z_teardown_inactive_lock);
 }
 
 
 CTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid));
 CTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid));
 
 /*ARGSUSED*/
 static int
 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
 {
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	uint32_t	gen;
 	uint64_t	gen64;
 	uint64_t	object = zp->z_id;
 	zfid_short_t	*zfid;
 	int		size, i, error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
 	    &gen64, sizeof (uint64_t))) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	gen = (uint32_t)gen64;
 
 	size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
 
 #ifdef illumos
 	if (fidp->fid_len < size) {
 		fidp->fid_len = size;
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(ENOSPC));
 	}
 #else
 	fidp->fid_len = size;
 #endif
 
 	zfid = (zfid_short_t *)fidp;
 
 	zfid->zf_len = size;
 
 	for (i = 0; i < sizeof (zfid->zf_object); i++)
 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
 
 	/* Must have a non-zero generation number to distinguish from .zfs */
 	if (gen == 0)
 		gen = 1;
 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
 		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
 
 	if (size == LONG_FID_LEN) {
 		uint64_t	objsetid = dmu_objset_id(zfsvfs->z_os);
 		zfid_long_t	*zlfid;
 
 		zlfid = (zfid_long_t *)fidp;
 
 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
 			zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
 
 		/* XXX - this should be the generation number for the objset */
 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
 			zlfid->zf_setgen[i] = 0;
 	}
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 static int
 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
     caller_context_t *ct)
 {
 	znode_t		*zp, *xzp;
 	zfsvfs_t	*zfsvfs;
 	int		error;
 
 	switch (cmd) {
 	case _PC_LINK_MAX:
 		*valp = MIN(LONG_MAX, ZFS_LINK_MAX);
 		return (0);
 
 	case _PC_FILESIZEBITS:
 		*valp = 64;
 		return (0);
 #ifdef illumos
 	case _PC_XATTR_EXISTS:
 		zp = VTOZ(vp);
 		zfsvfs = zp->z_zfsvfs;
 		ZFS_ENTER(zfsvfs);
 		ZFS_VERIFY_ZP(zp);
 		*valp = 0;
 		error = zfs_dirent_lookup(zp, "", &xzp,
 		    ZXATTR | ZEXISTS | ZSHARED);
 		if (error == 0) {
 			if (!zfs_dirempty(xzp))
 				*valp = 1;
 			vrele(ZTOV(xzp));
 		} else if (error == ENOENT) {
 			/*
 			 * If there aren't extended attributes, it's the
 			 * same as having zero of them.
 			 */
 			error = 0;
 		}
 		ZFS_EXIT(zfsvfs);
 		return (error);
 
 	case _PC_SATTR_ENABLED:
 	case _PC_SATTR_EXISTS:
 		*valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
 		    (vp->v_type == VREG || vp->v_type == VDIR);
 		return (0);
 
 	case _PC_ACCESS_FILTERING:
 		*valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) &&
 		    vp->v_type == VDIR;
 		return (0);
 
 	case _PC_ACL_ENABLED:
 		*valp = _ACL_ACE_ENABLED;
 		return (0);
 #endif	/* illumos */
 	case _PC_MIN_HOLE_SIZE:
 		*valp = (int)SPA_MINBLOCKSIZE;
 		return (0);
 #ifdef illumos
 	case _PC_TIMESTAMP_RESOLUTION:
 		/* nanosecond timestamp resolution */
 		*valp = 1L;
 		return (0);
 #endif
 	case _PC_ACL_EXTENDED:
 		*valp = 0;
 		return (0);
 
 	case _PC_ACL_NFS4:
 		*valp = 1;
 		return (0);
 
 	case _PC_ACL_PATH_MAX:
 		*valp = ACL_MAX_ENTRIES;
 		return (0);
 
 	default:
 		return (EOPNOTSUPP);
 	}
 }
 
 /*ARGSUSED*/
 static int
 zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
     caller_context_t *ct)
 {
 	znode_t *zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	int error;
 	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 	error = zfs_getacl(zp, vsecp, skipaclchk, cr);
 	ZFS_EXIT(zfsvfs);
 
 	return (error);
 }
 
 /*ARGSUSED*/
 int
 zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
     caller_context_t *ct)
 {
 	znode_t *zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	int error;
 	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 	zilog_t	*zilog = zfsvfs->z_log;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	error = zfs_setacl(zp, vsecp, skipaclchk, cr);
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 static int
 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
     int *rahead)
 {
 	znode_t *zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	objset_t *os = zp->z_zfsvfs->z_os;
 	rl_t *rl;
 	vm_object_t object;
 	off_t start, end, obj_size;
 	uint_t blksz;
 	int pgsin_b, pgsin_a;
 	int error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	start = IDX_TO_OFF(ma[0]->pindex);
 	end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
 
 	/*
 	 * Lock a range covering all required and optional pages.
 	 * Note that we need to handle the case of the block size growing.
 	 */
 	for (;;) {
 		blksz = zp->z_blksz;
 		rl = zfs_range_lock(zp, rounddown(start, blksz),
 		    roundup(end, blksz) - rounddown(start, blksz), RL_READER);
 		if (blksz == zp->z_blksz)
 			break;
 		zfs_range_unlock(rl);
 	}
 
 	object = ma[0]->object;
 	zfs_vmobject_wlock(object);
 	obj_size = object->un_pager.vnp.vnp_size;
 	zfs_vmobject_wunlock(object);
 	if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
 		zfs_range_unlock(rl);
 		ZFS_EXIT(zfsvfs);
 		return (zfs_vm_pagerret_bad);
 	}
 
 	pgsin_b = 0;
 	if (rbehind != NULL) {
 		pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz));
 		pgsin_b = MIN(*rbehind, pgsin_b);
 	}
 
 	pgsin_a = 0;
 	if (rahead != NULL) {
 		pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end);
 		if (end + IDX_TO_OFF(pgsin_a) >= obj_size)
 			pgsin_a = OFF_TO_IDX(round_page(obj_size) - end);
 		pgsin_a = MIN(*rahead, pgsin_a);
 	}
 
 	/*
 	 * NB: we need to pass the exact byte size of the data that we expect
 	 * to read after accounting for the file size.  This is required because
 	 * ZFS will panic if we request DMU to read beyond the end of the last
 	 * allocated block.
 	 */
 	error = dmu_read_pages(os, zp->z_id, ma, count, &pgsin_b, &pgsin_a,
 	    MIN(end, obj_size) - (end - PAGE_SIZE));
 
 	zfs_range_unlock(rl);
 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
 	ZFS_EXIT(zfsvfs);
 
 	if (error != 0)
 		return (zfs_vm_pagerret_error);
 
 	VM_CNT_INC(v_vnodein);
 	VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a);
 	if (rbehind != NULL)
 		*rbehind = pgsin_b;
 	if (rahead != NULL)
 		*rahead = pgsin_a;
 	return (zfs_vm_pagerret_ok);
 }
 
 static int
 zfs_freebsd_getpages(ap)
 	struct vop_getpages_args /* {
 		struct vnode *a_vp;
 		vm_page_t *a_m;
 		int a_count;
 		int *a_rbehind;
 		int *a_rahead;
 	} */ *ap;
 {
 
 	return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
 	    ap->a_rahead));
 }
 
 static int
 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
     int *rtvals)
 {
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	rl_t		*rl;
 	dmu_tx_t	*tx;
 	struct sf_buf	*sf;
 	vm_object_t	object;
 	vm_page_t	m;
 	caddr_t		va;
 	size_t		tocopy;
 	size_t		lo_len;
 	vm_ooffset_t	lo_off;
 	vm_ooffset_t	off;
 	uint_t		blksz;
 	int		ncount;
 	int		pcount;
 	int		err;
 	int		i;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	object = vp->v_object;
 	pcount = btoc(len);
 	ncount = pcount;
 
 	KASSERT(ma[0]->object == object, ("mismatching object"));
 	KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length"));
 
 	for (i = 0; i < pcount; i++)
 		rtvals[i] = zfs_vm_pagerret_error;
 
 	off = IDX_TO_OFF(ma[0]->pindex);
 	blksz = zp->z_blksz;
 	lo_off = rounddown(off, blksz);
 	lo_len = roundup(len + (off - lo_off), blksz);
 	rl = zfs_range_lock(zp, lo_off, lo_len, RL_WRITER);
 
 	zfs_vmobject_wlock(object);
 	if (len + off > object->un_pager.vnp.vnp_size) {
 		if (object->un_pager.vnp.vnp_size > off) {
 			int pgoff;
 
 			len = object->un_pager.vnp.vnp_size - off;
 			ncount = btoc(len);
 			if ((pgoff = (int)len & PAGE_MASK) != 0) {
 				/*
 				 * If the object is locked and the following
 				 * conditions hold, then the page's dirty
 				 * field cannot be concurrently changed by a
 				 * pmap operation.
 				 */
 				m = ma[ncount - 1];
 				vm_page_assert_sbusied(m);
 				KASSERT(!pmap_page_is_write_mapped(m),
 				    ("zfs_putpages: page %p is not read-only", m));
 				vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
 				    pgoff);
 			}
 		} else {
 			len = 0;
 			ncount = 0;
 		}
 		if (ncount < pcount) {
 			for (i = ncount; i < pcount; i++) {
 				rtvals[i] = zfs_vm_pagerret_bad;
 			}
 		}
 	}
 	zfs_vmobject_wunlock(object);
 
 	if (ncount == 0)
 		goto out;
 
 	if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
 	    zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
 		goto out;
 	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_write(tx, zp->z_id, off, len);
 
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err != 0) {
 		dmu_tx_abort(tx);
 		goto out;
 	}
 
 	if (zp->z_blksz < PAGE_SIZE) {
 		for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
 			tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
 			va = zfs_map_page(ma[i], &sf);
 			dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
 			zfs_unmap_page(sf);
 		}
 	} else {
 		err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx);
 	}
 
 	if (err == 0) {
 		uint64_t mtime[2], ctime[2];
 		sa_bulk_attr_t bulk[3];
 		int count = 0;
 
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
 		    &mtime, 16);
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
 		    &ctime, 16);
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 		    &zp->z_pflags, 8);
 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
 		    B_TRUE);
 		err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 		ASSERT0(err);
 		zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
 
 		zfs_vmobject_wlock(object);
 		for (i = 0; i < ncount; i++) {
 			rtvals[i] = zfs_vm_pagerret_ok;
 			vm_page_undirty(ma[i]);
 		}
 		zfs_vmobject_wunlock(object);
 		VM_CNT_INC(v_vnodeout);
 		VM_CNT_ADD(v_vnodepgsout, ncount);
 	}
 	dmu_tx_commit(tx);
 
 out:
 	zfs_range_unlock(rl);
 	if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 ||
 	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zfsvfs->z_log, zp->z_id);
 	ZFS_EXIT(zfsvfs);
 	return (rtvals[0]);
 }
 
 int
 zfs_freebsd_putpages(ap)
 	struct vop_putpages_args /* {
 		struct vnode *a_vp;
 		vm_page_t *a_m;
 		int a_count;
 		int a_sync;
 		int *a_rtvals;
 	} */ *ap;
 {
 
 	return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync,
 	    ap->a_rtvals));
 }
 
 static int
 zfs_freebsd_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct bufobj **a_bop;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 
 	if (ap->a_bop != NULL)
 		*ap->a_bop = &ap->a_vp->v_bufobj;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn;
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 
 	return (0);
 }
 
 static int
 zfs_freebsd_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int a_mode;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	vnode_t	*vp = ap->a_vp;
 	znode_t *zp = VTOZ(vp);
 	int error;
 
 	error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL);
 	if (error == 0)
 		vnode_create_vobject(vp, zp->z_size, ap->a_td);
 	return (error);
 }
 
 static int
 zfs_freebsd_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 
 	return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL));
 }
 
 static int
 zfs_freebsd_ioctl(ap)
 	struct vop_ioctl_args /* {
 		struct vnode *a_vp;
 		u_long a_command;
 		caddr_t a_data;
 		int a_fflag;
 		struct ucred *cred;
 		struct thread *td;
 	} */ *ap;
 {
 
 	return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
 	    ap->a_fflag, ap->a_cred, NULL, NULL));
 }
 
 static int
 ioflags(int ioflags)
 {
 	int flags = 0;
 
 	if (ioflags & IO_APPEND)
 		flags |= FAPPEND;
 	if (ioflags & IO_NDELAY)
 		flags |= FNONBLOCK;
 	if (ioflags & IO_SYNC)
 		flags |= (FSYNC | FDSYNC | FRSYNC);
 
 	return (flags);
 }
 
 static int
 zfs_freebsd_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 
 	return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag),
 	    ap->a_cred, NULL));
 }
 
 static int
 zfs_freebsd_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 
 	return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag),
 	    ap->a_cred, NULL));
 }
 
 static int
 zfs_freebsd_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		accmode_t a_accmode;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	vnode_t *vp = ap->a_vp;
 	znode_t *zp = VTOZ(vp);
 	accmode_t accmode;
 	int error = 0;
 
 	/*
 	 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
 	 */
 	accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
 	if (accmode != 0)
 		error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL);
 
 	/*
 	 * VADMIN has to be handled by vaccess().
 	 */
 	if (error == 0) {
 		accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
 		if (accmode != 0) {
 			error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
 			    zp->z_gid, accmode, ap->a_cred, NULL);
 		}
 	}
 
 	/*
 	 * For VEXEC, ensure that at least one execute bit is set for
 	 * non-directories.
 	 */
 	if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
 	    (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
 		error = EACCES;
 	}
 
 	return (error);
 }
 
 static int
 zfs_freebsd_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
 	char nm[NAME_MAX + 1];
 
 	ASSERT(cnp->cn_namelen < sizeof(nm));
 	strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm)));
 
 	return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
 	    cnp->cn_cred, cnp->cn_thread, 0));
 }
 
 static int
 zfs_cache_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	zfsvfs_t *zfsvfs;
 
 	zfsvfs = ap->a_dvp->v_mount->mnt_data;
 	if (zfsvfs->z_use_namecache)
 		return (vfs_cache_lookup(ap));
 	else
 		return (zfs_freebsd_lookup(ap));
 }
 
 static int
 zfs_freebsd_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	zfsvfs_t *zfsvfs;
 	struct componentname *cnp = ap->a_cnp;
 	vattr_t *vap = ap->a_vap;
 	int error, mode;
 
 	ASSERT(cnp->cn_flags & SAVENAME);
 
 	vattr_init_mask(vap);
 	mode = vap->va_mode & ALLPERMS;
 	zfsvfs = ap->a_dvp->v_mount->mnt_data;
 
 	error = zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode,
 	    ap->a_vpp, cnp->cn_cred, cnp->cn_thread);
 	if (zfsvfs->z_use_namecache &&
 	    error == 0 && (cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
 	return (error);
 }
 
 static int
 zfs_freebsd_remove(ap)
 	struct vop_remove_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 
 	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
 
 	return (zfs_remove(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
 	    ap->a_cnp->cn_cred));
 }
 
 static int
 zfs_freebsd_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	vattr_t *vap = ap->a_vap;
 
 	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
 
 	vattr_init_mask(vap);
 
 	return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp,
 	    ap->a_cnp->cn_cred));
 }
 
 static int
 zfs_freebsd_rmdir(ap)
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
 
 	ASSERT(cnp->cn_flags & SAVENAME);
 
 	return (zfs_rmdir(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
 }
 
 static int
 zfs_freebsd_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_eofflag;
 		int *a_ncookies;
 		u_long **a_cookies;
 	} */ *ap;
 {
 
 	return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag,
 	    ap->a_ncookies, ap->a_cookies));
 }
 
 static int
 zfs_freebsd_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		int a_waitfor;
 		struct thread *a_td;
 	} */ *ap;
 {
 
 	vop_stdfsync(ap);
 	return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL));
 }
 
 static int
 zfs_freebsd_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	vattr_t *vap = ap->a_vap;
 	xvattr_t xvap;
 	u_long fflags = 0;
 	int error;
 
 	xva_init(&xvap);
 	xvap.xva_vattr = *vap;
 	xvap.xva_vattr.va_mask |= AT_XVATTR;
 
 	/* Convert chflags into ZFS-type flags. */
 	/* XXX: what about SF_SETTABLE?. */
 	XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
 	XVA_SET_REQ(&xvap, XAT_APPENDONLY);
 	XVA_SET_REQ(&xvap, XAT_NOUNLINK);
 	XVA_SET_REQ(&xvap, XAT_NODUMP);
 	XVA_SET_REQ(&xvap, XAT_READONLY);
 	XVA_SET_REQ(&xvap, XAT_ARCHIVE);
 	XVA_SET_REQ(&xvap, XAT_SYSTEM);
 	XVA_SET_REQ(&xvap, XAT_HIDDEN);
 	XVA_SET_REQ(&xvap, XAT_REPARSE);
 	XVA_SET_REQ(&xvap, XAT_OFFLINE);
 	XVA_SET_REQ(&xvap, XAT_SPARSE);
 
 	error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL);
 	if (error != 0)
 		return (error);
 
 	/* Convert ZFS xattr into chflags. */
 #define	FLAG_CHECK(fflag, xflag, xfield)	do {			\
 	if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0)		\
 		fflags |= (fflag);					\
 } while (0)
 	FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
 	    xvap.xva_xoptattrs.xoa_immutable);
 	FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
 	    xvap.xva_xoptattrs.xoa_appendonly);
 	FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
 	    xvap.xva_xoptattrs.xoa_nounlink);
 	FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE,
 	    xvap.xva_xoptattrs.xoa_archive);
 	FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
 	    xvap.xva_xoptattrs.xoa_nodump);
 	FLAG_CHECK(UF_READONLY, XAT_READONLY,
 	    xvap.xva_xoptattrs.xoa_readonly);
 	FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM,
 	    xvap.xva_xoptattrs.xoa_system);
 	FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN,
 	    xvap.xva_xoptattrs.xoa_hidden);
 	FLAG_CHECK(UF_REPARSE, XAT_REPARSE,
 	    xvap.xva_xoptattrs.xoa_reparse);
 	FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE,
 	    xvap.xva_xoptattrs.xoa_offline);
 	FLAG_CHECK(UF_SPARSE, XAT_SPARSE,
 	    xvap.xva_xoptattrs.xoa_sparse);
 
 #undef	FLAG_CHECK
 	*vap = xvap.xva_vattr;
 	vap->va_flags = fflags;
 	return (0);
 }
 
 static int
 zfs_freebsd_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	vnode_t *vp = ap->a_vp;
 	vattr_t *vap = ap->a_vap;
 	cred_t *cred = ap->a_cred;
 	xvattr_t xvap;
 	u_long fflags;
 	uint64_t zflags;
 
 	vattr_init_mask(vap);
 	vap->va_mask &= ~AT_NOSET;
 
 	xva_init(&xvap);
 	xvap.xva_vattr = *vap;
 
 	zflags = VTOZ(vp)->z_pflags;
 
 	if (vap->va_flags != VNOVAL) {
 		zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
 		int error;
 
 		if (zfsvfs->z_use_fuids == B_FALSE)
 			return (EOPNOTSUPP);
 
 		fflags = vap->va_flags;
 		/*
 		 * XXX KDM 
 		 * We need to figure out whether it makes sense to allow
 		 * UF_REPARSE through, since we don't really have other
 		 * facilities to handle reparse points and zfs_setattr()
 		 * doesn't currently allow setting that attribute anyway.
 		 */
 		if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE|
 		     UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE|
 		     UF_OFFLINE|UF_SPARSE)) != 0)
 			return (EOPNOTSUPP);
 		/*
 		 * Unprivileged processes are not permitted to unset system
 		 * flags, or modify flags if any system flags are set.
 		 * Privileged non-jail processes may not modify system flags
 		 * if securelevel > 0 and any existing system flags are set.
 		 * Privileged jail processes behave like privileged non-jail
 		 * processes if the PR_ALLOW_CHFLAGS permission bit is set;
 		 * otherwise, they behave like unprivileged processes.
 		 */
 		if (secpolicy_fs_owner(vp->v_mount, cred) == 0 ||
 		    priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) {
 			if (zflags &
 			    (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
 				error = securelevel_gt(cred, 0);
 				if (error != 0)
 					return (error);
 			}
 		} else {
 			/*
 			 * Callers may only modify the file flags on objects they
 			 * have VADMIN rights for.
 			 */
 			if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0)
 				return (error);
 			if (zflags &
 			    (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
 				return (EPERM);
 			}
 			if (fflags &
 			    (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) {
 				return (EPERM);
 			}
 		}
 
 #define	FLAG_CHANGE(fflag, zflag, xflag, xfield)	do {		\
 	if (((fflags & (fflag)) && !(zflags & (zflag))) ||		\
 	    ((zflags & (zflag)) && !(fflags & (fflag)))) {		\
 		XVA_SET_REQ(&xvap, (xflag));				\
 		(xfield) = ((fflags & (fflag)) != 0);			\
 	}								\
 } while (0)
 		/* Convert chflags into ZFS-type flags. */
 		/* XXX: what about SF_SETTABLE?. */
 		FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
 		    xvap.xva_xoptattrs.xoa_immutable);
 		FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
 		    xvap.xva_xoptattrs.xoa_appendonly);
 		FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
 		    xvap.xva_xoptattrs.xoa_nounlink);
 		FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE,
 		    xvap.xva_xoptattrs.xoa_archive);
 		FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
 		    xvap.xva_xoptattrs.xoa_nodump);
 		FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY,
 		    xvap.xva_xoptattrs.xoa_readonly);
 		FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM,
 		    xvap.xva_xoptattrs.xoa_system);
 		FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN,
 		    xvap.xva_xoptattrs.xoa_hidden);
 		FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE,
-		    xvap.xva_xoptattrs.xoa_hidden);
+		    xvap.xva_xoptattrs.xoa_reparse);
 		FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE,
 		    xvap.xva_xoptattrs.xoa_offline);
 		FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE,
 		    xvap.xva_xoptattrs.xoa_sparse);
 #undef	FLAG_CHANGE
 	}
 	if (vap->va_birthtime.tv_sec != VNOVAL) {
 		xvap.xva_vattr.va_mask |= AT_XVATTR;
 		XVA_SET_REQ(&xvap, XAT_CREATETIME);
 	}
 	return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL));
 }
 
 static int
 zfs_freebsd_rename(ap)
 	struct vop_rename_args  /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap;
 {
 	vnode_t *fdvp = ap->a_fdvp;
 	vnode_t *fvp = ap->a_fvp;
 	vnode_t *tdvp = ap->a_tdvp;
 	vnode_t *tvp = ap->a_tvp;
 	int error;
 
 	ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
 	ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
 
 	error = zfs_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
 	    ap->a_tcnp, ap->a_fcnp->cn_cred);
 
 	vrele(fdvp);
 	vrele(fvp);
 	vrele(tdvp);
 	if (tvp != NULL)
 		vrele(tvp);
 
 	return (error);
 }
 
 static int
 zfs_freebsd_symlink(ap)
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		char *a_target;
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
 	vattr_t *vap = ap->a_vap;
 
 	ASSERT(cnp->cn_flags & SAVENAME);
 
 	vap->va_type = VLNK;	/* FreeBSD: Syscall only sets va_mode. */
 	vattr_init_mask(vap);
 
 	return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap,
 	    __DECONST(char *, ap->a_target), cnp->cn_cred, cnp->cn_thread));
 }
 
 static int
 zfs_freebsd_readlink(ap)
 	struct vop_readlink_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 
 	return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL));
 }
 
 static int
 zfs_freebsd_link(ap)
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
 	vnode_t *vp = ap->a_vp;
 	vnode_t *tdvp = ap->a_tdvp;
 
 	if (tdvp->v_mount != vp->v_mount)
 		return (EXDEV);
 
 	ASSERT(cnp->cn_flags & SAVENAME);
 
 	return (zfs_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0));
 }
 
 static int
 zfs_freebsd_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 		struct thread *a_td;
 	} */ *ap;
 {
 	vnode_t *vp = ap->a_vp;
 
 	zfs_inactive(vp, ap->a_td->td_ucred, NULL);
 	return (0);
 }
 
 static int
 zfs_freebsd_reclaim(ap)
 	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 		struct thread *a_td;
 	} */ *ap;
 {
 	vnode_t	*vp = ap->a_vp;
 	znode_t	*zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 
 	ASSERT(zp != NULL);
 
 	/*
 	 * z_teardown_inactive_lock protects from a race with
 	 * zfs_znode_dmu_fini in zfsvfs_teardown during
 	 * force unmount.
 	 */
 	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
 	if (zp->z_sa_hdl == NULL)
 		zfs_znode_free(zp);
 	else
 		zfs_zinactive(zp);
 	rw_exit(&zfsvfs->z_teardown_inactive_lock);
 
 	vp->v_data = NULL;
 	return (0);
 }
 
 static int
 zfs_freebsd_fid(ap)
 	struct vop_fid_args /* {
 		struct vnode *a_vp;
 		struct fid *a_fid;
 	} */ *ap;
 {
 
 	return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
 }
 
 static int
 zfs_freebsd_pathconf(ap)
 	struct vop_pathconf_args /* {
 		struct vnode *a_vp;
 		int a_name;
 		register_t *a_retval;
 	} */ *ap;
 {
 	ulong_t val;
 	int error;
 
 	error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL);
 	if (error == 0) {
 		*ap->a_retval = val;
 		return (error);
 	}
 	if (error != EOPNOTSUPP)
 		return (error);
 
 	switch (ap->a_name) {
 	case _PC_NAME_MAX:
 		*ap->a_retval = NAME_MAX;
 		return (0);
 	case _PC_PIPE_BUF:
 		if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
 			*ap->a_retval = PIPE_BUF;
 			return (0);
 		}
 		return (EINVAL);
 	default:
 		return (vop_stdpathconf(ap));
 	}
 }
 
 /*
  * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
  * extended attribute name:
  *
  *	NAMESPACE	PREFIX	
  *	system		freebsd:system:
  *	user		(none, can be used to access ZFS fsattr(5) attributes
  *			created on Solaris)
  */
 static int
 zfs_create_attrname(int attrnamespace, const char *name, char *attrname,
     size_t size)
 {
 	const char *namespace, *prefix, *suffix;
 
 	/* We don't allow '/' character in attribute name. */
 	if (strchr(name, '/') != NULL)
 		return (EINVAL);
 	/* We don't allow attribute names that start with "freebsd:" string. */
 	if (strncmp(name, "freebsd:", 8) == 0)
 		return (EINVAL);
 
 	bzero(attrname, size);
 
 	switch (attrnamespace) {
 	case EXTATTR_NAMESPACE_USER:
 #if 0
 		prefix = "freebsd:";
 		namespace = EXTATTR_NAMESPACE_USER_STRING;
 		suffix = ":";
 #else
 		/*
 		 * This is the default namespace by which we can access all
 		 * attributes created on Solaris.
 		 */
 		prefix = namespace = suffix = "";
 #endif
 		break;
 	case EXTATTR_NAMESPACE_SYSTEM:
 		prefix = "freebsd:";
 		namespace = EXTATTR_NAMESPACE_SYSTEM_STRING;
 		suffix = ":";
 		break;
 	case EXTATTR_NAMESPACE_EMPTY:
 	default:
 		return (EINVAL);
 	}
 	if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix,
 	    name) >= size) {
 		return (ENAMETOOLONG);
 	}
 	return (0);
 }
 
 /*
  * Vnode operating to retrieve a named extended attribute.
  */
 static int
 zfs_getextattr(struct vop_getextattr_args *ap)
 /*
 vop_getextattr {
 	IN struct vnode *a_vp;
 	IN int a_attrnamespace;
 	IN const char *a_name;
 	INOUT struct uio *a_uio;
 	OUT size_t *a_size;
 	IN struct ucred *a_cred;
 	IN struct thread *a_td;
 };
 */
 {
 	zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
 	struct thread *td = ap->a_td;
 	struct nameidata nd;
 	char attrname[255];
 	struct vattr va;
 	vnode_t *xvp = NULL, *vp;
 	int error, flags;
 
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VREAD);
 	if (error != 0)
 		return (error);
 
 	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
 	    sizeof(attrname));
 	if (error != 0)
 		return (error);
 
 	ZFS_ENTER(zfsvfs);
 
 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
 	    LOOKUP_XATTR);
 	if (error != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	flags = FREAD;
 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
 	    xvp, td);
 	error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL);
 	vp = nd.ni_vp;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (error != 0) {
 		ZFS_EXIT(zfsvfs);
 		if (error == ENOENT)
 			error = ENOATTR;
 		return (error);
 	}
 
 	if (ap->a_size != NULL) {
 		error = VOP_GETATTR(vp, &va, ap->a_cred);
 		if (error == 0)
 			*ap->a_size = (size_t)va.va_size;
 	} else if (ap->a_uio != NULL)
 		error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred);
 
 	VOP_UNLOCK(vp, 0);
 	vn_close(vp, flags, ap->a_cred, td);
 	ZFS_EXIT(zfsvfs);
 
 	return (error);
 }
 
 /*
  * Vnode operation to remove a named attribute.
  */
 int
 zfs_deleteextattr(struct vop_deleteextattr_args *ap)
 /*
 vop_deleteextattr {
 	IN struct vnode *a_vp;
 	IN int a_attrnamespace;
 	IN const char *a_name;
 	IN struct ucred *a_cred;
 	IN struct thread *a_td;
 };
 */
 {
 	zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
 	struct thread *td = ap->a_td;
 	struct nameidata nd;
 	char attrname[255];
 	struct vattr va;
 	vnode_t *xvp = NULL, *vp;
 	int error, flags;
 
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VWRITE);
 	if (error != 0)
 		return (error);
 
 	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
 	    sizeof(attrname));
 	if (error != 0)
 		return (error);
 
 	ZFS_ENTER(zfsvfs);
 
 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
 	    LOOKUP_XATTR);
 	if (error != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
 	    UIO_SYSSPACE, attrname, xvp, td);
 	error = namei(&nd);
 	vp = nd.ni_vp;
 	if (error != 0) {
 		ZFS_EXIT(zfsvfs);
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (error == ENOENT)
 			error = ENOATTR;
 		return (error);
 	}
 
 	error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	vput(nd.ni_dvp);
 	if (vp == nd.ni_dvp)
 		vrele(vp);
 	else
 		vput(vp);
 	ZFS_EXIT(zfsvfs);
 
 	return (error);
 }
 
 /*
  * Vnode operation to set a named attribute.
  */
 static int
 zfs_setextattr(struct vop_setextattr_args *ap)
 /*
 vop_setextattr {
 	IN struct vnode *a_vp;
 	IN int a_attrnamespace;
 	IN const char *a_name;
 	INOUT struct uio *a_uio;
 	IN struct ucred *a_cred;
 	IN struct thread *a_td;
 };
 */
 {
 	zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
 	struct thread *td = ap->a_td;
 	struct nameidata nd;
 	char attrname[255];
 	struct vattr va;
 	vnode_t *xvp = NULL, *vp;
 	int error, flags;
 
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VWRITE);
 	if (error != 0)
 		return (error);
 
 	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
 	    sizeof(attrname));
 	if (error != 0)
 		return (error);
 
 	ZFS_ENTER(zfsvfs);
 
 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
 	    LOOKUP_XATTR | CREATE_XATTR_DIR);
 	if (error != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	flags = FFLAGS(O_WRONLY | O_CREAT);
 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
 	    xvp, td);
 	error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL);
 	vp = nd.ni_vp;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (error != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	VATTR_NULL(&va);
 	va.va_size = 0;
 	error = VOP_SETATTR(vp, &va, ap->a_cred);
 	if (error == 0)
 		VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred);
 
 	VOP_UNLOCK(vp, 0);
 	vn_close(vp, flags, ap->a_cred, td);
 	ZFS_EXIT(zfsvfs);
 
 	return (error);
 }
 
 /*
  * Vnode operation to retrieve extended attributes on a vnode.
  */
 static int
 zfs_listextattr(struct vop_listextattr_args *ap)
 /*
 vop_listextattr {
 	IN struct vnode *a_vp;
 	IN int a_attrnamespace;
 	INOUT struct uio *a_uio;
 	OUT size_t *a_size;
 	IN struct ucred *a_cred;
 	IN struct thread *a_td;
 };
 */
 {
 	zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
 	struct thread *td = ap->a_td;
 	struct nameidata nd;
 	char attrprefix[16];
 	u_char dirbuf[sizeof(struct dirent)];
 	struct dirent *dp;
 	struct iovec aiov;
 	struct uio auio, *uio = ap->a_uio;
 	size_t *sizep = ap->a_size;
 	size_t plen;
 	vnode_t *xvp = NULL, *vp;
 	int done, error, eof, pos;
 
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VREAD);
 	if (error != 0)
 		return (error);
 
 	error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
 	    sizeof(attrprefix));
 	if (error != 0)
 		return (error);
 	plen = strlen(attrprefix);
 
 	ZFS_ENTER(zfsvfs);
 
 	if (sizep != NULL)
 		*sizep = 0;
 
 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
 	    LOOKUP_XATTR);
 	if (error != 0) {
 		ZFS_EXIT(zfsvfs);
 		/*
 		 * ENOATTR means that the EA directory does not yet exist,
 		 * i.e. there are no extended attributes there.
 		 */
 		if (error == ENOATTR)
 			error = 0;
 		return (error);
 	}
 
 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
 	    UIO_SYSSPACE, ".", xvp, td);
 	error = namei(&nd);
 	vp = nd.ni_vp;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (error != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_td = td;
 	auio.uio_rw = UIO_READ;
 	auio.uio_offset = 0;
 
 	do {
 		u_char nlen;
 
 		aiov.iov_base = (void *)dirbuf;
 		aiov.iov_len = sizeof(dirbuf);
 		auio.uio_resid = sizeof(dirbuf);
 		error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL);
 		done = sizeof(dirbuf) - auio.uio_resid;
 		if (error != 0)
 			break;
 		for (pos = 0; pos < done;) {
 			dp = (struct dirent *)(dirbuf + pos);
 			pos += dp->d_reclen;
 			/*
 			 * XXX: Temporarily we also accept DT_UNKNOWN, as this
 			 * is what we get when attribute was created on Solaris.
 			 */
 			if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN)
 				continue;
 			if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0)
 				continue;
 			else if (strncmp(dp->d_name, attrprefix, plen) != 0)
 				continue;
 			nlen = dp->d_namlen - plen;
 			if (sizep != NULL)
 				*sizep += 1 + nlen;
 			else if (uio != NULL) {
 				/*
 				 * Format of extattr name entry is one byte for
 				 * length and the rest for name.
 				 */
 				error = uiomove(&nlen, 1, uio->uio_rw, uio);
 				if (error == 0) {
 					error = uiomove(dp->d_name + plen, nlen,
 					    uio->uio_rw, uio);
 				}
 				if (error != 0)
 					break;
 			}
 		}
 	} while (!eof && error == 0);
 
 	vput(vp);
 	ZFS_EXIT(zfsvfs);
 
 	return (error);
 }
 
 int
 zfs_freebsd_getacl(ap)
 	struct vop_getacl_args /* {
 		struct vnode *vp;
 		acl_type_t type;
 		struct acl *aclp;
 		struct ucred *cred;
 		struct thread *td;
 	} */ *ap;
 {
 	int		error;
 	vsecattr_t      vsecattr;
 
 	if (ap->a_type != ACL_TYPE_NFS4)
 		return (EINVAL);
 
 	vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
 	if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL))
 		return (error);
 
 	error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt);
 	if (vsecattr.vsa_aclentp != NULL)
 		kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz);
 
 	return (error);
 }
 
 int
 zfs_freebsd_setacl(ap)
 	struct vop_setacl_args /* {
 		struct vnode *vp;
 		acl_type_t type;
 		struct acl *aclp;
 		struct ucred *cred;
 		struct thread *td;
 	} */ *ap;
 {
 	int		error;
 	vsecattr_t      vsecattr;
 	int		aclbsize;	/* size of acl list in bytes */
 	aclent_t	*aaclp;
 
 	if (ap->a_type != ACL_TYPE_NFS4)
 		return (EINVAL);
 
 	if (ap->a_aclp == NULL)
 		return (EINVAL);
 
 	if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
 		return (EINVAL);
 
 	/*
 	 * With NFSv4 ACLs, chmod(2) may need to add additional entries,
 	 * splitting every entry into two and appending "canonical six"
 	 * entries at the end.  Don't allow for setting an ACL that would
 	 * cause chmod(2) to run out of ACL entries.
 	 */
 	if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES)
 		return (ENOSPC);
 
 	error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR);
 	if (error != 0)
 		return (error);
 
 	vsecattr.vsa_mask = VSA_ACE;
 	aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t);
 	vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP);
 	aaclp = vsecattr.vsa_aclentp;
 	vsecattr.vsa_aclentsz = aclbsize;
 
 	aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp);
 	error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL);
 	kmem_free(aaclp, aclbsize);
 
 	return (error);
 }
 
 int
 zfs_freebsd_aclcheck(ap)
 	struct vop_aclcheck_args /* {
 		struct vnode *vp;
 		acl_type_t type;
 		struct acl *aclp;
 		struct ucred *cred;
 		struct thread *td;
 	} */ *ap;
 {
 
 	return (EOPNOTSUPP);
 }
 
 static int
 zfs_vptocnp(struct vop_vptocnp_args *ap)
 {
 	vnode_t *covered_vp;
 	vnode_t *vp = ap->a_vp;;
 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 	znode_t *zp = VTOZ(vp);
 	enum vgetstate vs;
 	int ltype;
 	int error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	/*
 	 * If we are a snapshot mounted under .zfs, run the operation
 	 * on the covered vnode.
 	 */
 	if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) {
 		char name[MAXNAMLEN + 1];
 		znode_t *dzp;
 		size_t len;
 
 		error = zfs_znode_parent_and_name(zp, &dzp, name);
 		if (error == 0) {
 			len = strlen(name);
 			if (*ap->a_buflen < len)
 				error = SET_ERROR(ENOMEM);
 		}
 		if (error == 0) {
 			*ap->a_buflen -= len;
 			bcopy(name, ap->a_buf + *ap->a_buflen, len);
 			*ap->a_vpp = ZTOV(dzp);
 		}
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	ZFS_EXIT(zfsvfs);
 
 	covered_vp = vp->v_mount->mnt_vnodecovered;
 	vs = vget_prep(covered_vp);
 	ltype = VOP_ISLOCKED(vp);
 	VOP_UNLOCK(vp, 0);
 	error = vget_finish(covered_vp, LK_SHARED, vs);
 	if (error == 0) {
 		error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred,
 		    ap->a_buf, ap->a_buflen);
 		vput(covered_vp);
 	}
 	vn_lock(vp, ltype | LK_RETRY);
 	if ((vp->v_iflag & VI_DOOMED) != 0)
 		error = SET_ERROR(ENOENT);
 	return (error);
 }
 
 #ifdef DIAGNOSTIC
 static int
 zfs_lock(ap)
 	struct vop_lock1_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 		char *file;
 		int line;
 	} */ *ap;
 {
 	vnode_t *vp;
 	znode_t *zp;
 	int err;
 
 	err = vop_stdlock(ap);
 	if (err == 0 && (ap->a_flags & LK_NOWAIT) == 0) {
 		vp = ap->a_vp;
 		zp = vp->v_data;
 		if (vp->v_mount != NULL && (vp->v_iflag & VI_DOOMED) == 0 &&
 		    zp != NULL && (zp->z_pflags & ZFS_XATTR) == 0)
 			VERIFY(!RRM_LOCK_HELD(&zp->z_zfsvfs->z_teardown_lock));
 	}
 	return (err);
 }
 #endif
 
 struct vop_vector zfs_vnodeops;
 struct vop_vector zfs_fifoops;
 struct vop_vector zfs_shareops;
 
 struct vop_vector zfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 	.vop_inactive =		zfs_freebsd_inactive,
 	.vop_reclaim =		zfs_freebsd_reclaim,
 	.vop_access =		zfs_freebsd_access,
 	.vop_allocate =		VOP_EINVAL,
 	.vop_lookup =		zfs_cache_lookup,
 	.vop_cachedlookup =	zfs_freebsd_lookup,
 	.vop_getattr =		zfs_freebsd_getattr,
 	.vop_setattr =		zfs_freebsd_setattr,
 	.vop_create =		zfs_freebsd_create,
 	.vop_mknod =		zfs_freebsd_create,
 	.vop_mkdir =		zfs_freebsd_mkdir,
 	.vop_readdir =		zfs_freebsd_readdir,
 	.vop_fsync =		zfs_freebsd_fsync,
 	.vop_open =		zfs_freebsd_open,
 	.vop_close =		zfs_freebsd_close,
 	.vop_rmdir =		zfs_freebsd_rmdir,
 	.vop_ioctl =		zfs_freebsd_ioctl,
 	.vop_link =		zfs_freebsd_link,
 	.vop_symlink =		zfs_freebsd_symlink,
 	.vop_readlink =		zfs_freebsd_readlink,
 	.vop_read =		zfs_freebsd_read,
 	.vop_write =		zfs_freebsd_write,
 	.vop_remove =		zfs_freebsd_remove,
 	.vop_rename =		zfs_freebsd_rename,
 	.vop_pathconf =		zfs_freebsd_pathconf,
 	.vop_bmap =		zfs_freebsd_bmap,
 	.vop_fid =		zfs_freebsd_fid,
 	.vop_getextattr =	zfs_getextattr,
 	.vop_deleteextattr =	zfs_deleteextattr,
 	.vop_setextattr =	zfs_setextattr,
 	.vop_listextattr =	zfs_listextattr,
 	.vop_getacl =		zfs_freebsd_getacl,
 	.vop_setacl =		zfs_freebsd_setacl,
 	.vop_aclcheck =		zfs_freebsd_aclcheck,
 	.vop_getpages =		zfs_freebsd_getpages,
 	.vop_putpages =		zfs_freebsd_putpages,
 	.vop_vptocnp =		zfs_vptocnp,
 #ifdef DIAGNOSTIC
 	.vop_lock1 =		zfs_lock,
 #endif
 };
 
 struct vop_vector zfs_fifoops = {
 	.vop_default =		&fifo_specops,
 	.vop_fsync =		zfs_freebsd_fsync,
 	.vop_access =		zfs_freebsd_access,
 	.vop_getattr =		zfs_freebsd_getattr,
 	.vop_inactive =		zfs_freebsd_inactive,
 	.vop_read =		VOP_PANIC,
 	.vop_reclaim =		zfs_freebsd_reclaim,
 	.vop_setattr =		zfs_freebsd_setattr,
 	.vop_write =		VOP_PANIC,
 	.vop_pathconf = 	zfs_freebsd_pathconf,
 	.vop_fid =		zfs_freebsd_fid,
 	.vop_getacl =		zfs_freebsd_getacl,
 	.vop_setacl =		zfs_freebsd_setacl,
 	.vop_aclcheck =		zfs_freebsd_aclcheck,
 };
 
 /*
  * special share hidden files vnode operations template
  */
 struct vop_vector zfs_shareops = {
 	.vop_default =		&default_vnodeops,
 	.vop_access =		zfs_freebsd_access,
 	.vop_inactive =		zfs_freebsd_inactive,
 	.vop_reclaim =		zfs_freebsd_reclaim,
 	.vop_fid =		zfs_freebsd_fid,
 	.vop_pathconf =		zfs_freebsd_pathconf,
 };
Index: projects/clang900-import/sys/cddl/contrib/opensolaris
===================================================================
--- projects/clang900-import/sys/cddl/contrib/opensolaris	(revision 352536)
+++ projects/clang900-import/sys/cddl/contrib/opensolaris	(revision 352537)

Property changes on: projects/clang900-import/sys/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/cddl/contrib/opensolaris:r352308-352536
Index: projects/clang900-import/sys/conf/files.mips
===================================================================
--- projects/clang900-import/sys/conf/files.mips	(revision 352536)
+++ projects/clang900-import/sys/conf/files.mips	(revision 352537)
@@ -1,115 +1,117 @@
 # This file tells config what files go into building a kernel,
 # files marked standard are always included.
 #
 # $FreeBSD$
 #
 
 # Arch dependent files
 mips/mips/autoconf.c			standard
 mips/mips/bus_space_generic.c		standard
 mips/mips/busdma_machdep.c		standard
 mips/mips/cache.c			standard
 mips/mips/cache_mipsNN.c		standard
 mips/mips/cpu.c				standard
 mips/mips/db_disasm.c			optional	ddb
 mips/mips/db_interface.c		optional	ddb
 mips/mips/db_trace.c			optional	ddb
 mips/mips/dump_machdep.c		standard
 mips/mips/elf_machdep.c			standard
 mips/mips/exception.S			standard
 mips/mips/fp.S				standard
 mips/mips/freebsd32_machdep.c		optional	compat_freebsd32
 mips/mips/gdb_machdep.c			standard
 mips/mips/in_cksum.c			optional	inet
 mips/mips/libkern_machdep.c		standard
 mips/mips/locore.S			standard	no-obj
 mips/mips/machdep.c			standard
 mips/mips/mem.c				optional	mem
 mips/mips/minidump_machdep.c		standard
 mips/mips/mp_machdep.c			optional	smp
 mips/mips/mpboot.S			optional	smp
 mips/mips/nexus.c			standard
 mips/mips/ofw_machdep.c			optional	fdt
 mips/mips/pm_machdep.c			standard
 mips/mips/pmap.c			standard
 mips/mips/ptrace_machdep.c		standard
 mips/mips/sc_machdep.c			standard
 mips/mips/stack_machdep.c		optional	ddb | stack
 mips/mips/stdatomic.c			standard \
 	compile-with "${NORMAL_C:N-Wmissing-prototypes}"
 mips/mips/support.S			standard
 mips/mips/bcopy.S			standard
 mips/mips/swtch.S			standard
 mips/mips/sys_machdep.c			standard
 mips/mips/tlb.c				standard
 mips/mips/trap.c			standard
 mips/mips/uio_machdep.c			standard
 mips/mips/uma_machdep.c			standard
 mips/mips/vm_machdep.c			standard
 
 # misc opt-in bits
 kern/kern_clocksource.c			standard
 kern/link_elf_obj.c			standard
 kern/subr_busdma_bufalloc.c		standard
 kern/subr_dummy_vdso_tc.c		standard
 kern/subr_sfbuf.c			optional	mips | mipsel | mipsn32
 kern/subr_sfbuf.c			optional	mipshf | mipselhf
 
 # gcc/clang runtime
 libkern/ffsl.c				standard
 libkern/ffsll.c				standard
 libkern/fls.c				standard
 libkern/flsl.c				standard
 libkern/flsll.c				standard
 libkern/cmpdi2.c			optional	mips | mipshf | mipsel | mipselhf
 libkern/ucmpdi2.c			optional	mips | mipshf | mipsel | mipselhf
 libkern/ashldi3.c			standard
 libkern/ashrdi3.c			standard
 libkern/memcmp.c			standard
 
 # cfe support
 dev/cfe/cfe_api.c			optional	cfe
 dev/cfe/cfe_console.c			optional	cfe_console
 dev/cfe/cfe_env.c			optional	cfe_env
 
 # syscons support
 dev/fb/fb.c				optional	sc
 dev/syscons/scgfbrndr.c			optional	sc
 mips/mips/sc_machdep.c			optional	sc
 
 # FDT support
 dev/uart/uart_cpu_fdt.c			optional	uart fdt
 
 # crypto support -- use generic
 crypto/blowfish/bf_enc.c		optional	crypto | ipsec | \
 	ipsec_support
 crypto/des/des_enc.c			optional	crypto | ipsec | \
 	ipsec_support | netsmb
 
 # AP common nvram interface MIPS specific, but maybe should be more generic
 dev/nvram2env/nvram2env_mips.c		optional	nvram2env
 dev/nvram2env/nvram2env.c		optional	nvram2env
 
 # hwpmc support
-dev/hwpmc/hwpmc_mips.c			optional	hwpmc
+dev/hwpmc/hwpmc_beri.c			optional	hwpmc_beri
+dev/hwpmc/hwpmc_mips.c			optional	hwpmc_mips24k | \
+	hwpmc_mips74k
 dev/hwpmc/hwpmc_mips24k.c		optional	hwpmc_mips24k
 dev/hwpmc/hwpmc_mips74k.c		optional	hwpmc_mips74k
 
 # ofw support
 dev/ofw/ofwpci.c			optional	fdt pci
 
 # INTRNG support code
 kern/msi_if.m				optional	intrng
 kern/pic_if.m				optional	intrng
 kern/subr_intr.c			optional	intrng
 # INTRNG compatible MIPS32 interrupt controller
 mips/mips/mips_pic.c			optional	intrng
 
 # DTrace
 cddl/compat/opensolaris/kern/opensolaris_atomic.c	optional zfs | dtrace compile-with "${CDDL_C}"
 cddl/dev/dtrace/mips/dtrace_asm.S			optional dtrace compile-with "${DTRACE_S}"
 cddl/dev/dtrace/mips/dtrace_subr.c			optional dtrace compile-with "${DTRACE_C}"
 cddl/dev/fbt/mips/fbt_isa.c				optional dtrace_fbt | dtraceall compile-with "${FBT_C}"
 
 # Zstd
 contrib/zstd/lib/freebsd/zstd_kfreebsd.c		optional zstdio compile-with ${ZSTD_C}
Index: projects/clang900-import/sys/dev/hwpmc/hwpmc_beri.c
===================================================================
--- projects/clang900-import/sys/dev/hwpmc/hwpmc_beri.c	(nonexistent)
+++ projects/clang900-import/sys/dev/hwpmc/hwpmc_beri.c	(revision 352537)
@@ -0,0 +1,540 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2019 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory (Department of Computer Science and
+ * Technology) under DARPA contract HR0011-18-C-0016 ("ECATS"), as part of the
+ * DARPA SSITH research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_hwpmc_hooks.h"
+
+#include <sys/param.h>
+#include <sys/pmckern.h>
+
+#include <dev/hwpmc/hwpmc_beri.h>
+
+#define	BERI_NCOUNTERS	56
+#define	BERI_PMC_CAPS	(PMC_CAP_USER |	PMC_CAP_SYSTEM | \
+			 PMC_CAP_READ | PMC_CAP_WRITE )
+
+struct beri_event_code_map {
+	uint32_t	pe_ev;       /* enum value */
+	uint64_t	(*get_func)(void);
+};
+
+const struct beri_event_code_map beri_event_codes[BERI_NCOUNTERS] = {
+	{ PMC_EV_BERI_CYCLE,
+		statcounters_get_cycle_count },
+	{ PMC_EV_BERI_INST,
+		statcounters_get_inst_count },
+	{ PMC_EV_BERI_INST_USER,
+		statcounters_get_inst_user_count },
+	{ PMC_EV_BERI_INST_KERNEL,
+		statcounters_get_inst_kernel_count },
+	{ PMC_EV_BERI_IMPRECISE_SETBOUNDS,
+		statcounters_get_imprecise_setbounds_count },
+	{ PMC_EV_BERI_UNREPRESENTABLE_CAPS,
+		statcounters_get_unrepresentable_caps_count },
+	{ PMC_EV_BERI_ITLB_MISS,
+		statcounters_get_itlb_miss_count },
+	{ PMC_EV_BERI_DTLB_MISS,
+		statcounters_get_dtlb_miss_count },
+	{ PMC_EV_BERI_ICACHE_WRITE_HIT,
+		statcounters_get_icache_write_hit_count },
+	{ PMC_EV_BERI_ICACHE_WRITE_MISS,
+		statcounters_get_icache_write_miss_count },
+	{ PMC_EV_BERI_ICACHE_READ_HIT,
+		statcounters_get_icache_read_hit_count },
+	{ PMC_EV_BERI_ICACHE_READ_MISS,
+		statcounters_get_icache_read_miss_count },
+	{ PMC_EV_BERI_ICACHE_EVICT,
+		statcounters_get_icache_evict_count },
+	{ PMC_EV_BERI_DCACHE_WRITE_HIT,
+		statcounters_get_dcache_write_hit_count },
+	{ PMC_EV_BERI_DCACHE_WRITE_MISS,
+		statcounters_get_dcache_write_miss_count },
+	{ PMC_EV_BERI_DCACHE_READ_HIT,
+		statcounters_get_dcache_read_hit_count },
+	{ PMC_EV_BERI_DCACHE_READ_MISS,
+		statcounters_get_dcache_read_miss_count },
+	{ PMC_EV_BERI_DCACHE_EVICT,
+		statcounters_get_dcache_evict_count },
+	{ PMC_EV_BERI_DCACHE_SET_TAG_WRITE,
+		statcounters_get_dcache_set_tag_write_count },
+	{ PMC_EV_BERI_DCACHE_SET_TAG_READ,
+		statcounters_get_dcache_set_tag_read_count },
+	{ PMC_EV_BERI_L2CACHE_WRITE_HIT,
+		statcounters_get_l2cache_write_hit_count },
+	{ PMC_EV_BERI_L2CACHE_WRITE_MISS,
+		statcounters_get_l2cache_write_miss_count },
+	{ PMC_EV_BERI_L2CACHE_READ_HIT,
+		statcounters_get_l2cache_read_hit_count },
+	{ PMC_EV_BERI_L2CACHE_READ_MISS,
+		statcounters_get_l2cache_read_miss_count },
+	{ PMC_EV_BERI_L2CACHE_EVICT,
+		statcounters_get_l2cache_evict_count },
+	{ PMC_EV_BERI_L2CACHE_SET_TAG_WRITE,
+		statcounters_get_l2cache_set_tag_write_count },
+	{ PMC_EV_BERI_L2CACHE_SET_TAG_READ,
+		statcounters_get_l2cache_set_tag_read_count },
+	{ PMC_EV_BERI_MEM_BYTE_READ,
+		statcounters_get_mem_byte_read_count },
+	{ PMC_EV_BERI_MEM_BYTE_WRITE,
+		statcounters_get_mem_byte_write_count },
+	{ PMC_EV_BERI_MEM_HWORD_READ,
+		statcounters_get_mem_hword_read_count },
+	{ PMC_EV_BERI_MEM_HWORD_WRITE,
+		statcounters_get_mem_hword_write_count },
+	{ PMC_EV_BERI_MEM_WORD_READ,
+		statcounters_get_mem_word_read_count },
+	{ PMC_EV_BERI_MEM_WORD_WRITE,
+		statcounters_get_mem_word_write_count },
+	{ PMC_EV_BERI_MEM_DWORD_READ,
+		statcounters_get_mem_dword_read_count },
+	{ PMC_EV_BERI_MEM_DWORD_WRITE,
+		statcounters_get_mem_dword_write_count },
+	{ PMC_EV_BERI_MEM_CAP_READ,
+		statcounters_get_mem_cap_read_count },
+	{ PMC_EV_BERI_MEM_CAP_WRITE,
+		statcounters_get_mem_cap_write_count },
+	{ PMC_EV_BERI_MEM_CAP_READ_TAG_SET,
+		statcounters_get_mem_cap_read_tag_set_count },
+	{ PMC_EV_BERI_MEM_CAP_WRITE_TAG_SET,
+		statcounters_get_mem_cap_write_tag_set_count },
+	{ PMC_EV_BERI_TAGCACHE_WRITE_HIT,
+		statcounters_get_tagcache_write_hit_count },
+	{ PMC_EV_BERI_TAGCACHE_WRITE_MISS,
+		statcounters_get_tagcache_write_miss_count },
+	{ PMC_EV_BERI_TAGCACHE_READ_HIT,
+		statcounters_get_tagcache_read_hit_count },
+	{ PMC_EV_BERI_TAGCACHE_READ_MISS,
+		statcounters_get_tagcache_read_miss_count },
+	{ PMC_EV_BERI_TAGCACHE_EVICT,
+		statcounters_get_tagcache_evict_count },
+	{ PMC_EV_BERI_L2CACHEMASTER_READ_REQ,
+		statcounters_get_l2cachemaster_read_req_count },
+	{ PMC_EV_BERI_L2CACHEMASTER_WRITE_REQ,
+		statcounters_get_l2cachemaster_write_req_count },
+	{ PMC_EV_BERI_L2CACHEMASTER_WRITE_REQ_FLIT,
+		statcounters_get_l2cachemaster_write_req_flit_count },
+	{ PMC_EV_BERI_L2CACHEMASTER_READ_RSP,
+		statcounters_get_l2cachemaster_read_rsp_count },
+	{ PMC_EV_BERI_L2CACHEMASTER_READ_RSP_FLIT,
+		statcounters_get_l2cachemaster_read_rsp_flit_count },
+	{ PMC_EV_BERI_L2CACHEMASTER_WRITE_RSP,
+		statcounters_get_l2cachemaster_write_rsp_count },
+	{ PMC_EV_BERI_TAGCACHEMASTER_READ_REQ,
+		statcounters_get_tagcachemaster_read_req_count },
+	{ PMC_EV_BERI_TAGCACHEMASTER_WRITE_REQ,
+		statcounters_get_tagcachemaster_write_req_count },
+	{ PMC_EV_BERI_TAGCACHEMASTER_WRITE_REQ_FLIT,
+		statcounters_get_tagcachemaster_write_req_flit_count },
+	{ PMC_EV_BERI_TAGCACHEMASTER_READ_RSP,
+		statcounters_get_tagcachemaster_read_rsp_count },
+	{ PMC_EV_BERI_TAGCACHEMASTER_READ_RSP_FLIT,
+		statcounters_get_tagcachemaster_read_rsp_flit_count },
+	{ PMC_EV_BERI_TAGCACHEMASTER_WRITE_RSP,
+		statcounters_get_tagcachemaster_write_rsp_count },
+};
+
+struct mips_pmc_spec beri_pmc_spec = {
+	.ps_cpuclass = PMC_CLASS_BERI,
+	.ps_cputype = PMC_CPU_MIPS_BERI,
+	.ps_capabilities = BERI_PMC_CAPS,
+	.ps_counter_width = 64
+};
+
+/*
+ * Per-processor information.
+ */
+struct beri_cpu {
+	struct pmc_hw	*pc_beripmcs;
+	uint64_t	start_values[BERI_NCOUNTERS];
+	uint64_t	stop_values[BERI_NCOUNTERS];
+	uint64_t	saved_values[BERI_NCOUNTERS];
+};
+
+int beri_npmcs;
+static struct beri_cpu **beri_pcpu;
+
+static int
+beri_allocate_pmc(int cpu, int ri, struct pmc *pm,
+  const struct pmc_op_pmcallocate *a)
+{
+	uint32_t config;
+	int i;
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[beri,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri >= 0 && ri < beri_npmcs,
+	    ("[beri,%d] illegal row index %d", __LINE__, ri));
+
+	if (a->pm_class != beri_pmc_spec.ps_cpuclass)
+		return (EINVAL);
+
+	for (i = 0; i < BERI_NCOUNTERS; i++) {
+		if (beri_event_codes[i].pe_ev == a->pm_ev) {
+			config = i;
+			break;
+		}
+	}
+
+	if (i == BERI_NCOUNTERS)
+		return (EINVAL);
+
+	pm->pm_md.pm_mips_evsel = config;
+
+	PMCDBG2(MDP,ALL,2,"beri-allocate ri=%d -> config=0x%x", ri, config);
+
+	return (0);
+}
+
+static int
+beri_read_pmc(int cpu, int ri, pmc_value_t *v)
+{
+	uint32_t config;
+	struct pmc *pm;
+	pmc_value_t new;
+	pmc_value_t start_val;
+	pmc_value_t stop_val;
+	pmc_value_t saved_val;
+	pmc_value_t result;
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[beri,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri >= 0 && ri < beri_npmcs,
+	    ("[beri,%d] illegal row index %d", __LINE__, ri));
+
+	pm = beri_pcpu[cpu]->pc_beripmcs[ri].phw_pmc;
+	config = pm->pm_md.pm_mips_evsel;
+
+	start_val = beri_pcpu[cpu]->start_values[config];
+	if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
+		stop_val = beri_event_codes[config].get_func();
+	} else
+		stop_val = beri_pcpu[cpu]->stop_values[config];
+
+	if (start_val <= stop_val)
+		result = stop_val - start_val;
+	else {
+		if (config == 0) /* CYCLE counter is 48 bit */
+			result = 0x00ffffffffffffffUL;
+		else
+			result = 0xffffffffffffffffUL;
+		result -= start_val;
+		result += stop_val;
+	}
+
+	saved_val = beri_pcpu[cpu]->saved_values[config];
+
+	*v = result + saved_val;
+
+	return (0);
+}
+
+static int
+beri_write_pmc(int cpu, int ri, pmc_value_t v)
+{
+	struct pmc *pm;
+	uint32_t config;
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[beri,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri >= 0 && ri < beri_npmcs,
+	    ("[beri,%d] illegal row-index %d", __LINE__, ri));
+
+	pm = beri_pcpu[cpu]->pc_beripmcs[ri].phw_pmc;
+	config = pm->pm_md.pm_mips_evsel;
+
+	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+		v = (1UL << (beri_pmc_spec.ps_counter_width - 1)) - v;
+	
+	PMCDBG3(MDP,WRI,1,"beri-write cpu=%d ri=%d v=%jx", cpu, ri, v);
+
+	if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm)))
+		beri_pcpu[cpu]->saved_values[config] = 0;
+	else
+		beri_pcpu[cpu]->saved_values[config] = v;
+
+	return (0);
+}
+
+static int
+beri_config_pmc(int cpu, int ri, struct pmc *pm)
+{
+	struct pmc_hw *phw;
+
+	PMCDBG3(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm);
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[beri,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri >= 0 && ri < beri_npmcs,
+	    ("[beri,%d] illegal row-index %d", __LINE__, ri));
+
+	phw = &beri_pcpu[cpu]->pc_beripmcs[ri];
+
+	KASSERT(pm == NULL || phw->phw_pmc == NULL,
+	    ("[beri,%d] pm=%p phw->pm=%p hwpmc not unconfigured",
+	    __LINE__, pm, phw->phw_pmc));
+
+	phw->phw_pmc = pm;
+
+	return (0);
+}
+
+static int
+beri_start_pmc(int cpu, int ri)
+{
+	uint32_t config;
+        struct pmc *pm;
+        struct pmc_hw *phw;
+	pmc_value_t v;
+
+	phw = &beri_pcpu[cpu]->pc_beripmcs[ri];
+	pm = phw->phw_pmc;
+	config = pm->pm_md.pm_mips_evsel;
+
+	v = beri_event_codes[config].get_func();
+	beri_pcpu[cpu]->start_values[config] = v;
+
+	return (0);
+}
+
+static int
+beri_stop_pmc(int cpu, int ri)
+{
+	uint32_t config;
+        struct pmc *pm;
+        struct pmc_hw *phw;
+	pmc_value_t v;
+
+	phw = &beri_pcpu[cpu]->pc_beripmcs[ri];
+	pm = phw->phw_pmc;
+	config = pm->pm_md.pm_mips_evsel;
+
+	v = beri_event_codes[config].get_func();
+	beri_pcpu[cpu]->stop_values[config] = v;
+
+	return (0);
+}
+
+static int
+beri_release_pmc(int cpu, int ri, struct pmc *pmc)
+{
+	struct pmc_hw *phw;
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[beri,%d] illegal CPU value %d", __LINE__, cpu));
+	KASSERT(ri >= 0 && ri < beri_npmcs,
+	    ("[beri,%d] illegal row-index %d", __LINE__, ri));
+
+	phw = &beri_pcpu[cpu]->pc_beripmcs[ri];
+	KASSERT(phw->phw_pmc == NULL,
+	    ("[beri,%d] PHW pmc %p non-NULL", __LINE__, phw->phw_pmc));
+
+	return (0);
+}
+
+static int
+beri_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
+{
+	struct pmc_hw *phw;
+	char beri_name[PMC_NAME_MAX];
+	int error;
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[beri,%d], illegal CPU %d", __LINE__, cpu));
+	KASSERT(ri >= 0 && ri < beri_npmcs,
+	    ("[beri,%d] row-index %d out of range", __LINE__, ri));
+
+	phw = &beri_pcpu[cpu]->pc_beripmcs[ri];
+	snprintf(beri_name, sizeof(beri_name), "MIPS-%d", ri);
+	if ((error = copystr(beri_name, pi->pm_name, PMC_NAME_MAX,
+	    NULL)) != 0)
+		return error;
+	pi->pm_class = beri_pmc_spec.ps_cpuclass;
+	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
+		pi->pm_enabled = TRUE;
+		*ppmc = phw->phw_pmc;
+	} else {
+		pi->pm_enabled = FALSE;
+		*ppmc = NULL;
+	}
+
+	return (0);
+}
+
+static int
+beri_get_config(int cpu, int ri, struct pmc **ppm)
+{
+
+	*ppm = beri_pcpu[cpu]->pc_beripmcs[ri].phw_pmc;
+
+	return (0);
+}
+
+static int
+beri_pmc_switch_in(struct pmc_cpu *pc, struct pmc_process *pp)
+{
+
+	return (0);
+}
+
+static int
+beri_pmc_switch_out(struct pmc_cpu *pc, struct pmc_process *pp)
+{
+
+	return (0);
+}
+
+static int
+beri_pcpu_init(struct pmc_mdep *md, int cpu)
+{
+	int first_ri, i;
+	struct pmc_cpu *pc;
+	struct beri_cpu *pac;
+	struct pmc_hw  *phw;
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[beri,%d] wrong cpu number %d", __LINE__, cpu));
+	PMCDBG1(MDP,INI,1,"beri-init cpu=%d", cpu);
+
+	beri_pcpu[cpu] = pac = malloc(sizeof(struct beri_cpu), M_PMC,
+	    M_WAITOK|M_ZERO);
+	pac->pc_beripmcs = malloc(sizeof(struct pmc_hw) * beri_npmcs,
+	    M_PMC, M_WAITOK|M_ZERO);
+	pc = pmc_pcpu[cpu];
+	first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_MIPS].pcd_ri;
+	KASSERT(pc != NULL, ("[beri,%d] NULL per-cpu pointer", __LINE__));
+
+	for (i = 0, phw = pac->pc_beripmcs; i < beri_npmcs; i++, phw++) {
+		phw->phw_state = PMC_PHW_FLAG_IS_ENABLED |
+		    PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(i);
+		phw->phw_pmc = NULL;
+		pc->pc_hwpmcs[i + first_ri] = phw;
+	}
+
+	return (0);
+}
+
+static int
+beri_pcpu_fini(struct pmc_mdep *md, int cpu)
+{
+
+	return (0);
+}
+
+struct pmc_mdep *
+pmc_beri_initialize()
+{
+	struct pmc_mdep *pmc_mdep;
+	struct pmc_classdep *pcd;
+	
+	snprintf(pmc_cpuid, sizeof(pmc_cpuid), "beri");
+
+	beri_npmcs = 2;
+	
+	PMCDBG1(MDP,INI,1,"beri-init npmcs=%d", beri_npmcs);
+
+	/*
+	 * Allocate space for pointers to PMC HW descriptors and for
+	 * the MDEP structure used by MI code.
+	 */
+	beri_pcpu = malloc(sizeof(struct beri_cpu *) * pmc_cpu_max(), M_PMC,
+			   M_WAITOK|M_ZERO);
+
+	/* Just one class */
+	pmc_mdep = pmc_mdep_alloc(1);
+
+	pmc_mdep->pmd_cputype = beri_pmc_spec.ps_cputype;
+
+	pcd = &pmc_mdep->pmd_classdep[PMC_MDEP_CLASS_INDEX_MIPS];
+	pcd->pcd_caps = beri_pmc_spec.ps_capabilities;
+	pcd->pcd_class = beri_pmc_spec.ps_cpuclass;
+	pcd->pcd_num = beri_npmcs;
+	pcd->pcd_ri = pmc_mdep->pmd_npmc;
+	pcd->pcd_width = beri_pmc_spec.ps_counter_width;
+
+	pcd->pcd_allocate_pmc   = beri_allocate_pmc;
+	pcd->pcd_config_pmc     = beri_config_pmc;
+	pcd->pcd_pcpu_fini      = beri_pcpu_fini;
+	pcd->pcd_pcpu_init      = beri_pcpu_init;
+	pcd->pcd_describe       = beri_describe;
+	pcd->pcd_get_config	= beri_get_config;
+	pcd->pcd_read_pmc       = beri_read_pmc;
+	pcd->pcd_release_pmc    = beri_release_pmc;
+	pcd->pcd_start_pmc      = beri_start_pmc;
+	pcd->pcd_stop_pmc       = beri_stop_pmc;
+ 	pcd->pcd_write_pmc      = beri_write_pmc;
+
+	pmc_mdep->pmd_intr       = NULL;
+	pmc_mdep->pmd_switch_in  = beri_pmc_switch_in;
+	pmc_mdep->pmd_switch_out = beri_pmc_switch_out;
+	
+	pmc_mdep->pmd_npmc += beri_npmcs;
+
+	return (pmc_mdep);
+}
+
+void
+pmc_beri_finalize(struct pmc_mdep *md)
+{
+
+}
+
+struct pmc_mdep *
+pmc_md_initialize()
+{
+
+	return (pmc_beri_initialize());
+}
+
+void
+pmc_md_finalize(struct pmc_mdep *md)
+{
+
+	return (pmc_beri_finalize(md));
+}
+
+int
+pmc_save_kernel_callchain(uintptr_t *cc, int nframes,
+    struct trapframe *tf)
+{
+
+	return (0);
+}
+
+int
+pmc_save_user_callchain(uintptr_t *cc, int nframes,
+    struct trapframe *tf)
+{
+
+	return (0);
+}

Property changes on: projects/clang900-import/sys/dev/hwpmc/hwpmc_beri.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/clang900-import/sys/dev/hwpmc/hwpmc_beri.h
===================================================================
--- projects/clang900-import/sys/dev/hwpmc/hwpmc_beri.h	(nonexistent)
+++ projects/clang900-import/sys/dev/hwpmc/hwpmc_beri.h	(revision 352537)
@@ -0,0 +1,107 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2019 Alex Richardson
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory (Department of Computer Science and
+ * Technology) under DARPA contract HR0011-18-C-0016 ("ECATS"), as part of the
+ * DARPA SSITH research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_DEV_HWPMC_HWPMC_BERI_H_
+#define	_DEV_HWPMC_HWPMC_BERI_H_
+
+#define STATCOUNTER_ITEM(name, X, Y)				\
+static inline uint64_t statcounters_get_##name##_count(void)	\
+{								\
+	uint64_t ret;						\
+	__asm __volatile(					\
+			".word (0x1f << 26) | (0x0 << 21) |	\
+				(12 << 16) | ("#X" << 11) |	\
+				( "#Y"  << 6) | 0x3b\n\t"	\
+			"move %0,$12" : "=r" (ret) :: "$12");	\
+	return (ret);						\
+}
+
+STATCOUNTER_ITEM(cycle,2,0)
+STATCOUNTER_ITEM(inst,4,0)
+STATCOUNTER_ITEM(inst_user,4,1)
+STATCOUNTER_ITEM(inst_kernel,4,2)
+STATCOUNTER_ITEM(imprecise_setbounds,4,3)
+STATCOUNTER_ITEM(unrepresentable_caps,4,4)
+STATCOUNTER_ITEM(itlb_miss,5,0)
+STATCOUNTER_ITEM(dtlb_miss,6,0)
+STATCOUNTER_ITEM(icache_write_hit,8,0)
+STATCOUNTER_ITEM(icache_write_miss,8,1)
+STATCOUNTER_ITEM(icache_read_hit,8,2)
+STATCOUNTER_ITEM(icache_read_miss,8,3)
+STATCOUNTER_ITEM(icache_evict,8,6)
+STATCOUNTER_ITEM(dcache_write_hit,9,0)
+STATCOUNTER_ITEM(dcache_write_miss,9,1)
+STATCOUNTER_ITEM(dcache_read_hit,9,2)
+STATCOUNTER_ITEM(dcache_read_miss,9,3)
+STATCOUNTER_ITEM(dcache_evict,9,6)
+STATCOUNTER_ITEM(dcache_set_tag_write,9,8)
+STATCOUNTER_ITEM(dcache_set_tag_read,9,9)
+STATCOUNTER_ITEM(l2cache_write_hit,10,0)
+STATCOUNTER_ITEM(l2cache_write_miss,10,1)
+STATCOUNTER_ITEM(l2cache_read_hit,10,2)
+STATCOUNTER_ITEM(l2cache_read_miss,10,3)
+STATCOUNTER_ITEM(l2cache_evict,10,6)
+STATCOUNTER_ITEM(l2cache_set_tag_write,10,8)
+STATCOUNTER_ITEM(l2cache_set_tag_read,10,9)
+STATCOUNTER_ITEM(mem_byte_read,11,0)
+STATCOUNTER_ITEM(mem_byte_write,11,1)
+STATCOUNTER_ITEM(mem_hword_read,11,2)
+STATCOUNTER_ITEM(mem_hword_write,11,3)
+STATCOUNTER_ITEM(mem_word_read,11,4)
+STATCOUNTER_ITEM(mem_word_write,11,5)
+STATCOUNTER_ITEM(mem_dword_read,11,6)
+STATCOUNTER_ITEM(mem_dword_write,11,7)
+STATCOUNTER_ITEM(mem_cap_read,11,8)
+STATCOUNTER_ITEM(mem_cap_write,11,9)
+STATCOUNTER_ITEM(mem_cap_read_tag_set,11,10)
+STATCOUNTER_ITEM(mem_cap_write_tag_set,11,11)
+STATCOUNTER_ITEM(tagcache_write_hit,12,0)
+STATCOUNTER_ITEM(tagcache_write_miss,12,1)
+STATCOUNTER_ITEM(tagcache_read_hit,12,2)
+STATCOUNTER_ITEM(tagcache_read_miss,12,3)
+STATCOUNTER_ITEM(tagcache_evict,12,6)
+STATCOUNTER_ITEM(l2cachemaster_read_req,13,0)
+STATCOUNTER_ITEM(l2cachemaster_write_req,13,1)
+STATCOUNTER_ITEM(l2cachemaster_write_req_flit,13,2)
+STATCOUNTER_ITEM(l2cachemaster_read_rsp,13,3)
+STATCOUNTER_ITEM(l2cachemaster_read_rsp_flit,13,4)
+STATCOUNTER_ITEM(l2cachemaster_write_rsp,13,5)
+STATCOUNTER_ITEM(tagcachemaster_read_req,14,0)
+STATCOUNTER_ITEM(tagcachemaster_write_req,14,1)
+STATCOUNTER_ITEM(tagcachemaster_write_req_flit,14,2)
+STATCOUNTER_ITEM(tagcachemaster_read_rsp,14,3)
+STATCOUNTER_ITEM(tagcachemaster_read_rsp_flit,14,4)
+STATCOUNTER_ITEM(tagcachemaster_write_rsp,14,5)
+
+#endif	/* !_DEV_HWPMC_HWPMC_BERI_H_ */

Property changes on: projects/clang900-import/sys/dev/hwpmc/hwpmc_beri.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/clang900-import/sys/dev/hwpmc/pmc_events.h
===================================================================
--- projects/clang900-import/sys/dev/hwpmc/pmc_events.h	(revision 352536)
+++ projects/clang900-import/sys/dev/hwpmc/pmc_events.h	(revision 352537)
@@ -1,1817 +1,1881 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005  Joseph Koshy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _DEV_HWPMC_PMC_EVENTS_H_
 #define	_DEV_HWPMC_PMC_EVENTS_H_
 
 /*
  * Note: Documentation on adding events can be found both in
  * the source tree at src/share/doc/papers/hwpmc/hwpmc.ms
  * as well as on-line at:
  *
  * https://wiki.freebsd.org/PmcTools/PmcHardwareHowTo
  *
  * Please refer to those resources before you attempt to modify
  * this file or the hwpmc driver/subsystem.
  */
 
 /* * PMC event codes.
  *
  * __PMC_EV(CLASS, SYMBOLIC-NAME)
  *
  */
 /* timestamp counters. */
 #define        __PMC_EV_TSC()                                                  \
 	__PMC_EV(TSC, TSC)
 
 #define        PMC_EV_TSC_FIRST        PMC_EV_TSC_TSC
 #define        PMC_EV_TSC_LAST         PMC_EV_TSC_TSC
 
 /*
  * Software events are dynamically defined.
  */
 
 #define        PMC_EV_DYN_COUNT        0x1000
 
 #define        PMC_EV_SOFT_FIRST       0x20000
 #define        PMC_EV_SOFT_LAST        (PMC_EV_SOFT_FIRST + PMC_EV_DYN_COUNT - 1)
 
 /*
  * AMD K7 Events, from "The AMD Athlon(tm) Processor x86 Code
  * Optimization Guide" [Doc#22007K, Feb 2002]
  */
 
 #define	__PMC_EV_K7()					\
 __PMC_EV(K7, DC_ACCESSES)				\
 __PMC_EV(K7, DC_MISSES)					\
 __PMC_EV(K7, DC_REFILLS_FROM_L2)			\
 __PMC_EV(K7, DC_REFILLS_FROM_SYSTEM)			\
 __PMC_EV(K7, DC_WRITEBACKS)				\
 __PMC_EV(K7, L1_DTLB_MISS_AND_L2_DTLB_HITS)		\
 __PMC_EV(K7, L1_AND_L2_DTLB_MISSES)			\
 __PMC_EV(K7, MISALIGNED_REFERENCES)			\
 __PMC_EV(K7, IC_FETCHES)				\
 __PMC_EV(K7, IC_MISSES)					\
 __PMC_EV(K7, L1_ITLB_MISSES)				\
 __PMC_EV(K7, L1_L2_ITLB_MISSES)				\
 __PMC_EV(K7, RETIRED_INSTRUCTIONS)			\
 __PMC_EV(K7, RETIRED_OPS)				\
 __PMC_EV(K7, RETIRED_BRANCHES)				\
 __PMC_EV(K7, RETIRED_BRANCHES_MISPREDICTED)		\
 __PMC_EV(K7, RETIRED_TAKEN_BRANCHES)			\
 __PMC_EV(K7, RETIRED_TAKEN_BRANCHES_MISPREDICTED)	\
 __PMC_EV(K7, RETIRED_FAR_CONTROL_TRANSFERS)		\
 __PMC_EV(K7, RETIRED_RESYNC_BRANCHES)			\
 __PMC_EV(K7, INTERRUPTS_MASKED_CYCLES)			\
 __PMC_EV(K7, INTERRUPTS_MASKED_WHILE_PENDING_CYCLES)	\
 __PMC_EV(K7, HARDWARE_INTERRUPTS)
 
 #define	PMC_EV_K7_FIRST	PMC_EV_K7_DC_ACCESSES
 #define	PMC_EV_K7_LAST	PMC_EV_K7_HARDWARE_INTERRUPTS
 
 /* AMD K8 PMCs */
 
 #define	__PMC_EV_K8()							\
 __PMC_EV(K8, FP_DISPATCHED_FPU_OPS)					\
 __PMC_EV(K8, FP_CYCLES_WITH_NO_FPU_OPS_RETIRED)				\
 __PMC_EV(K8, FP_DISPATCHED_FPU_FAST_FLAG_OPS)				\
 __PMC_EV(K8, LS_SEGMENT_REGISTER_LOAD)					\
 __PMC_EV(K8, LS_MICROARCHITECTURAL_RESYNC_BY_SELF_MODIFYING_CODE)	\
 __PMC_EV(K8, LS_MICROARCHITECTURAL_RESYNC_BY_SNOOP)			\
 __PMC_EV(K8, LS_BUFFER2_FULL)						\
 __PMC_EV(K8, LS_LOCKED_OPERATION)					\
 __PMC_EV(K8, LS_MICROARCHITECTURAL_LATE_CANCEL)				\
 __PMC_EV(K8, LS_RETIRED_CFLUSH_INSTRUCTIONS)				\
 __PMC_EV(K8, LS_RETIRED_CPUID_INSTRUCTIONS)				\
 __PMC_EV(K8, DC_ACCESS)							\
 __PMC_EV(K8, DC_MISS)							\
 __PMC_EV(K8, DC_REFILL_FROM_L2)						\
 __PMC_EV(K8, DC_REFILL_FROM_SYSTEM)					\
 __PMC_EV(K8, DC_COPYBACK)						\
 __PMC_EV(K8, DC_L1_DTLB_MISS_AND_L2_DTLB_HIT)				\
 __PMC_EV(K8, DC_L1_DTLB_MISS_AND_L2_DTLB_MISS)				\
 __PMC_EV(K8, DC_MISALIGNED_DATA_REFERENCE)				\
 __PMC_EV(K8, DC_MICROARCHITECTURAL_LATE_CANCEL)				\
 __PMC_EV(K8, DC_MICROARCHITECTURAL_EARLY_CANCEL)			\
 __PMC_EV(K8, DC_ONE_BIT_ECC_ERROR)					\
 __PMC_EV(K8, DC_DISPATCHED_PREFETCH_INSTRUCTIONS)			\
 __PMC_EV(K8, DC_DCACHE_ACCESSES_BY_LOCKS)				\
 __PMC_EV(K8, BU_CPU_CLK_UNHALTED)					\
 __PMC_EV(K8, BU_INTERNAL_L2_REQUEST)					\
 __PMC_EV(K8, BU_FILL_REQUEST_L2_MISS)					\
 __PMC_EV(K8, BU_FILL_INTO_L2)						\
 __PMC_EV(K8, IC_FETCH)							\
 __PMC_EV(K8, IC_MISS)							\
 __PMC_EV(K8, IC_REFILL_FROM_L2)						\
 __PMC_EV(K8, IC_REFILL_FROM_SYSTEM)					\
 __PMC_EV(K8, IC_L1_ITLB_MISS_AND_L2_ITLB_HIT)				\
 __PMC_EV(K8, IC_L1_ITLB_MISS_AND_L2_ITLB_MISS)				\
 __PMC_EV(K8, IC_MICROARCHITECTURAL_RESYNC_BY_SNOOP)			\
 __PMC_EV(K8, IC_INSTRUCTION_FETCH_STALL)				\
 __PMC_EV(K8, IC_RETURN_STACK_HIT)					\
 __PMC_EV(K8, IC_RETURN_STACK_OVERFLOW)					\
 __PMC_EV(K8, FR_RETIRED_X86_INSTRUCTIONS)				\
 __PMC_EV(K8, FR_RETIRED_UOPS)						\
 __PMC_EV(K8, FR_RETIRED_BRANCHES)					\
 __PMC_EV(K8, FR_RETIRED_BRANCHES_MISPREDICTED)				\
 __PMC_EV(K8, FR_RETIRED_TAKEN_BRANCHES)					\
 __PMC_EV(K8, FR_RETIRED_TAKEN_BRANCHES_MISPREDICTED)			\
 __PMC_EV(K8, FR_RETIRED_FAR_CONTROL_TRANSFERS)				\
 __PMC_EV(K8, FR_RETIRED_RESYNCS)					\
 __PMC_EV(K8, FR_RETIRED_NEAR_RETURNS)					\
 __PMC_EV(K8, FR_RETIRED_NEAR_RETURNS_MISPREDICTED)			\
 __PMC_EV(K8, FR_RETIRED_TAKEN_BRANCHES_MISPREDICTED_BY_ADDR_MISCOMPARE)	\
 __PMC_EV(K8, FR_RETIRED_FPU_INSTRUCTIONS)				\
 __PMC_EV(K8, FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS)		\
 __PMC_EV(K8, FR_INTERRUPTS_MASKED_CYCLES)				\
 __PMC_EV(K8, FR_INTERRUPTS_MASKED_WHILE_PENDING_CYCLES)			\
 __PMC_EV(K8, FR_TAKEN_HARDWARE_INTERRUPTS)				\
 __PMC_EV(K8, FR_DECODER_EMPTY)						\
 __PMC_EV(K8, FR_DISPATCH_STALLS)					\
 __PMC_EV(K8, FR_DISPATCH_STALL_FROM_BRANCH_ABORT_TO_RETIRE)		\
 __PMC_EV(K8, FR_DISPATCH_STALL_FOR_SERIALIZATION)			\
 __PMC_EV(K8, FR_DISPATCH_STALL_FOR_SEGMENT_LOAD)			\
 __PMC_EV(K8, FR_DISPATCH_STALL_WHEN_REORDER_BUFFER_IS_FULL)		\
 __PMC_EV(K8, FR_DISPATCH_STALL_WHEN_RESERVATION_STATIONS_ARE_FULL)	\
 __PMC_EV(K8, FR_DISPATCH_STALL_WHEN_FPU_IS_FULL)			\
 __PMC_EV(K8, FR_DISPATCH_STALL_WHEN_LS_IS_FULL)				\
 __PMC_EV(K8, FR_DISPATCH_STALL_WHEN_WAITING_FOR_ALL_TO_BE_QUIET)	\
 __PMC_EV(K8, FR_DISPATCH_STALL_WHEN_FAR_XFER_OR_RESYNC_BRANCH_PENDING)	\
 __PMC_EV(K8, FR_FPU_EXCEPTIONS)						\
 __PMC_EV(K8, FR_NUMBER_OF_BREAKPOINTS_FOR_DR0)				\
 __PMC_EV(K8, FR_NUMBER_OF_BREAKPOINTS_FOR_DR1)				\
 __PMC_EV(K8, FR_NUMBER_OF_BREAKPOINTS_FOR_DR2)				\
 __PMC_EV(K8, FR_NUMBER_OF_BREAKPOINTS_FOR_DR3)				\
 __PMC_EV(K8, NB_MEMORY_CONTROLLER_PAGE_ACCESS_EVENT)			\
 __PMC_EV(K8, NB_MEMORY_CONTROLLER_PAGE_TABLE_OVERFLOW)			\
 __PMC_EV(K8, NB_MEMORY_CONTROLLER_DRAM_COMMAND_SLOTS_MISSED)		\
 __PMC_EV(K8, NB_MEMORY_CONTROLLER_TURNAROUND)				\
 __PMC_EV(K8, NB_MEMORY_CONTROLLER_BYPASS_SATURATION)			\
 __PMC_EV(K8, NB_SIZED_COMMANDS)						\
 __PMC_EV(K8, NB_PROBE_RESULT)						\
 __PMC_EV(K8, NB_HT_BUS0_BANDWIDTH)					\
 __PMC_EV(K8, NB_HT_BUS1_BANDWIDTH)					\
 __PMC_EV(K8, NB_HT_BUS2_BANDWIDTH)
 
 #define	PMC_EV_K8_FIRST		PMC_EV_K8_FP_DISPATCHED_FPU_OPS
 #define	PMC_EV_K8_LAST		PMC_EV_K8_NB_HT_BUS2_BANDWIDTH
 
 /*
  * Events supported by Intel architectural fixed function counters,
  * from the "Intel 64 and IA-32 Architectures Software Developer's
  * Manual Volume 3B: System Programming Guide, Part 2", July 2008.
  */
 #define	__PMC_EV_IAF()							\
 __PMC_EV(IAF, INSTR_RETIRED_ANY)					\
 __PMC_EV(IAF, CPU_CLK_UNHALTED_CORE)					\
 __PMC_EV(IAF, CPU_CLK_UNHALTED_REF)
 
 #define	PMC_EV_IAF_FIRST	PMC_EV_IAF_INSTR_RETIRED_ANY
 #define	PMC_EV_IAF_LAST		PMC_EV_IAF_CPU_CLK_UNHALTED_REF
 
 #define	__PMC_EV_ALIAS_IAF()						\
 __PMC_EV_ALIAS("instruction-retired",	IAF_INSTR_RETIRED_ANY)		\
 __PMC_EV_ALIAS("unhalted-core-cycles",	IAF_CPU_CLK_UNHALTED_CORE)	\
 __PMC_EV_ALIAS("unhalted-reference-cycles", IAF_CPU_CLK_UNHALTED_REF)
 
 
 #define	PMC_EV_IAP_FIRST	PMC_EV_IAP_ARCH_BR_INS_RET
 #define	PMC_EV_IAP_LAST		PMC_EV_IAP_EVENT_FDH_40H
 
 /*
  * Map "architectural" event names to event ids.
  */
 #define	__PMC_EV_ALIAS_INTEL_ARCHITECTURAL()				\
 __PMC_EV_ALIAS("branch-instruction-retired",	IAP_ARCH_BR_INS_RET)	\
 __PMC_EV_ALIAS("branch-misses-retired",		IAP_ARCH_BR_MIS_RET)	\
 __PMC_EV_ALIAS("instruction-retired",		IAP_ARCH_INS_RET)	\
 __PMC_EV_ALIAS("llc-misses",			IAP_ARCH_LLC_MIS)	\
 __PMC_EV_ALIAS("llc-reference",			IAP_ARCH_LLC_REF)	\
 __PMC_EV_ALIAS("unhalted-reference-cycles",	IAP_ARCH_UNH_REF_CYC)	\
 __PMC_EV_ALIAS("unhalted-core-cycles",		IAP_ARCH_UNH_COR_CYC)
 
 #define        __PMC_EV_UCP()                          \
 	__PMC_EV(UCP, EVENT_0CH_04H_E)					   \
 	__PMC_EV(UCP, EVENT_0CH_04H_F)					   \
 	__PMC_EV(UCP, EVENT_0CH_04H_M)					   \
 	__PMC_EV(UCP, EVENT_0CH_04H_S)					   \
 	__PMC_EV(UCP, EVENT_0CH_08H_E)					   \
 	__PMC_EV(UCP, EVENT_0CH_08H_F)					   \
 	__PMC_EV(UCP, EVENT_0CH_08H_M)					   \
 	__PMC_EV(UCP, EVENT_0CH_08H_S)					   \
 
 /*
  * Intel XScale events from:
  *
  * Intel XScale Core Developer's Manual
  * January, 2004, #27347302
  *
  * 3rd Generation Intel XScale Microarchitecture
  * Developer's Manual
  * May 2007, #31628302
  *
  * First 14 events are for 1st and 2nd Generation Intel XScale cores. The
  * remaining are available only on 3rd Generation Intel XScale cores.
  */
 #define	__PMC_EV_XSCALE()				\
 	__PMC_EV(XSCALE, IC_FETCH)			\
 	__PMC_EV(XSCALE, IC_MISS)			\
 	__PMC_EV(XSCALE, DATA_DEPENDENCY_STALLED)	\
 	__PMC_EV(XSCALE, ITLB_MISS)			\
 	__PMC_EV(XSCALE, DTLB_MISS)			\
 	__PMC_EV(XSCALE, BRANCH_RETIRED)		\
 	__PMC_EV(XSCALE, BRANCH_MISPRED)		\
 	__PMC_EV(XSCALE, INSTR_RETIRED)			\
 	__PMC_EV(XSCALE, DC_FULL_CYCLE)			\
 	__PMC_EV(XSCALE, DC_FULL_CONTIG)		\
 	__PMC_EV(XSCALE, DC_ACCESS)			\
 	__PMC_EV(XSCALE, DC_MISS)			\
 	__PMC_EV(XSCALE, DC_WRITEBACK)			\
 	__PMC_EV(XSCALE, PC_CHANGE)			\
 	__PMC_EV(XSCALE, BRANCH_RETIRED_ALL)		\
 	__PMC_EV(XSCALE, INSTR_CYCLE)			\
 	__PMC_EV(XSCALE, CP_STALL)			\
 	__PMC_EV(XSCALE, PC_CHANGE_ALL)			\
 	__PMC_EV(XSCALE, PIPELINE_FLUSH)		\
 	__PMC_EV(XSCALE, BACKEND_STALL)			\
 	__PMC_EV(XSCALE, MULTIPLIER_USE)		\
 	__PMC_EV(XSCALE, MULTIPLIER_STALLED)		\
 	__PMC_EV(XSCALE, DATA_CACHE_STALLED)		\
 	__PMC_EV(XSCALE, L2_CACHE_REQ)			\
 	__PMC_EV(XSCALE, L2_CACHE_MISS)			\
 	__PMC_EV(XSCALE, ADDRESS_BUS_TRANS)		\
 	__PMC_EV(XSCALE, SELF_ADDRESS_BUS_TRANS)	\
 	__PMC_EV(XSCALE, DATA_BUS_TRANS)
 
 #define	PMC_EV_XSCALE_FIRST	PMC_EV_XSCALE_IC_FETCH
 #define	PMC_EV_XSCALE_LAST	PMC_EV_XSCALE_DATA_BUS_TRANS
 
 /*
  * ARMv7 Events
  */
 
 #define	__PMC_EV_ARMV7()			\
 	__PMC_EV(ARMV7, EVENT_00H)		\
 	__PMC_EV(ARMV7, EVENT_01H)		\
 	__PMC_EV(ARMV7, EVENT_02H)		\
 	__PMC_EV(ARMV7, EVENT_03H)		\
 	__PMC_EV(ARMV7, EVENT_04H)		\
 	__PMC_EV(ARMV7, EVENT_05H)		\
 	__PMC_EV(ARMV7, EVENT_06H)		\
 	__PMC_EV(ARMV7, EVENT_07H)		\
 	__PMC_EV(ARMV7, EVENT_08H)		\
 	__PMC_EV(ARMV7, EVENT_09H)		\
 	__PMC_EV(ARMV7, EVENT_0AH)		\
 	__PMC_EV(ARMV7, EVENT_0BH)		\
 	__PMC_EV(ARMV7, EVENT_0CH)		\
 	__PMC_EV(ARMV7, EVENT_0DH)		\
 	__PMC_EV(ARMV7, EVENT_0EH)		\
 	__PMC_EV(ARMV7, EVENT_0FH)		\
 	__PMC_EV(ARMV7, EVENT_10H)		\
 	__PMC_EV(ARMV7, EVENT_11H)		\
 	__PMC_EV(ARMV7, EVENT_12H)		\
 	__PMC_EV(ARMV7, EVENT_13H)		\
 	__PMC_EV(ARMV7, EVENT_14H)		\
 	__PMC_EV(ARMV7, EVENT_15H)		\
 	__PMC_EV(ARMV7, EVENT_16H)		\
 	__PMC_EV(ARMV7, EVENT_17H)		\
 	__PMC_EV(ARMV7, EVENT_18H)		\
 	__PMC_EV(ARMV7, EVENT_19H)		\
 	__PMC_EV(ARMV7, EVENT_1AH)		\
 	__PMC_EV(ARMV7, EVENT_1BH)		\
 	__PMC_EV(ARMV7, EVENT_1CH)		\
 	__PMC_EV(ARMV7, EVENT_1DH)		\
 	__PMC_EV(ARMV7, EVENT_1EH)		\
 	__PMC_EV(ARMV7, EVENT_1FH)		\
 	__PMC_EV(ARMV7, EVENT_20H)		\
 	__PMC_EV(ARMV7, EVENT_21H)		\
 	__PMC_EV(ARMV7, EVENT_22H)		\
 	__PMC_EV(ARMV7, EVENT_23H)		\
 	__PMC_EV(ARMV7, EVENT_24H)		\
 	__PMC_EV(ARMV7, EVENT_25H)		\
 	__PMC_EV(ARMV7, EVENT_26H)		\
 	__PMC_EV(ARMV7, EVENT_27H)		\
 	__PMC_EV(ARMV7, EVENT_28H)		\
 	__PMC_EV(ARMV7, EVENT_29H)		\
 	__PMC_EV(ARMV7, EVENT_2AH)		\
 	__PMC_EV(ARMV7, EVENT_2BH)		\
 	__PMC_EV(ARMV7, EVENT_2CH)		\
 	__PMC_EV(ARMV7, EVENT_2DH)		\
 	__PMC_EV(ARMV7, EVENT_2EH)		\
 	__PMC_EV(ARMV7, EVENT_2FH)		\
 	__PMC_EV(ARMV7, EVENT_30H)		\
 	__PMC_EV(ARMV7, EVENT_31H)		\
 	__PMC_EV(ARMV7, EVENT_32H)		\
 	__PMC_EV(ARMV7, EVENT_33H)		\
 	__PMC_EV(ARMV7, EVENT_34H)		\
 	__PMC_EV(ARMV7, EVENT_35H)		\
 	__PMC_EV(ARMV7, EVENT_36H)		\
 	__PMC_EV(ARMV7, EVENT_37H)		\
 	__PMC_EV(ARMV7, EVENT_38H)		\
 	__PMC_EV(ARMV7, EVENT_39H)		\
 	__PMC_EV(ARMV7, EVENT_3AH)		\
 	__PMC_EV(ARMV7, EVENT_3BH)		\
 	__PMC_EV(ARMV7, EVENT_3CH)		\
 	__PMC_EV(ARMV7, EVENT_3DH)		\
 	__PMC_EV(ARMV7, EVENT_3EH)		\
 	__PMC_EV(ARMV7, EVENT_3FH)		\
 	__PMC_EV(ARMV7, EVENT_40H)		\
 	__PMC_EV(ARMV7, EVENT_41H)		\
 	__PMC_EV(ARMV7, EVENT_42H)		\
 	__PMC_EV(ARMV7, EVENT_43H)		\
 	__PMC_EV(ARMV7, EVENT_44H)		\
 	__PMC_EV(ARMV7, EVENT_45H)		\
 	__PMC_EV(ARMV7, EVENT_46H)		\
 	__PMC_EV(ARMV7, EVENT_47H)		\
 	__PMC_EV(ARMV7, EVENT_48H)		\
 	__PMC_EV(ARMV7, EVENT_49H)		\
 	__PMC_EV(ARMV7, EVENT_4AH)		\
 	__PMC_EV(ARMV7, EVENT_4BH)		\
 	__PMC_EV(ARMV7, EVENT_4CH)		\
 	__PMC_EV(ARMV7, EVENT_4DH)		\
 	__PMC_EV(ARMV7, EVENT_4EH)		\
 	__PMC_EV(ARMV7, EVENT_4FH)		\
 	__PMC_EV(ARMV7, EVENT_50H)		\
 	__PMC_EV(ARMV7, EVENT_51H)		\
 	__PMC_EV(ARMV7, EVENT_52H)		\
 	__PMC_EV(ARMV7, EVENT_53H)		\
 	__PMC_EV(ARMV7, EVENT_54H)		\
 	__PMC_EV(ARMV7, EVENT_55H)		\
 	__PMC_EV(ARMV7, EVENT_56H)		\
 	__PMC_EV(ARMV7, EVENT_57H)		\
 	__PMC_EV(ARMV7, EVENT_58H)		\
 	__PMC_EV(ARMV7, EVENT_59H)		\
 	__PMC_EV(ARMV7, EVENT_5AH)		\
 	__PMC_EV(ARMV7, EVENT_5BH)		\
 	__PMC_EV(ARMV7, EVENT_5CH)		\
 	__PMC_EV(ARMV7, EVENT_5DH)		\
 	__PMC_EV(ARMV7, EVENT_5EH)		\
 	__PMC_EV(ARMV7, EVENT_5FH)		\
 	__PMC_EV(ARMV7, EVENT_60H)		\
 	__PMC_EV(ARMV7, EVENT_61H)		\
 	__PMC_EV(ARMV7, EVENT_62H)		\
 	__PMC_EV(ARMV7, EVENT_63H)		\
 	__PMC_EV(ARMV7, EVENT_64H)		\
 	__PMC_EV(ARMV7, EVENT_65H)		\
 	__PMC_EV(ARMV7, EVENT_66H)		\
 	__PMC_EV(ARMV7, EVENT_67H)		\
 	__PMC_EV(ARMV7, EVENT_68H)		\
 	__PMC_EV(ARMV7, EVENT_69H)		\
 	__PMC_EV(ARMV7, EVENT_6AH)		\
 	__PMC_EV(ARMV7, EVENT_6BH)		\
 	__PMC_EV(ARMV7, EVENT_6CH)		\
 	__PMC_EV(ARMV7, EVENT_6DH)		\
 	__PMC_EV(ARMV7, EVENT_6EH)		\
 	__PMC_EV(ARMV7, EVENT_6FH)		\
 	__PMC_EV(ARMV7, EVENT_70H)		\
 	__PMC_EV(ARMV7, EVENT_71H)		\
 	__PMC_EV(ARMV7, EVENT_72H)		\
 	__PMC_EV(ARMV7, EVENT_73H)		\
 	__PMC_EV(ARMV7, EVENT_74H)		\
 	__PMC_EV(ARMV7, EVENT_75H)		\
 	__PMC_EV(ARMV7, EVENT_76H)		\
 	__PMC_EV(ARMV7, EVENT_77H)		\
 	__PMC_EV(ARMV7, EVENT_78H)		\
 	__PMC_EV(ARMV7, EVENT_79H)		\
 	__PMC_EV(ARMV7, EVENT_7AH)		\
 	__PMC_EV(ARMV7, EVENT_7BH)		\
 	__PMC_EV(ARMV7, EVENT_7CH)		\
 	__PMC_EV(ARMV7, EVENT_7DH)		\
 	__PMC_EV(ARMV7, EVENT_7EH)		\
 	__PMC_EV(ARMV7, EVENT_7FH)		\
 	__PMC_EV(ARMV7, EVENT_80H)		\
 	__PMC_EV(ARMV7, EVENT_81H)		\
 	__PMC_EV(ARMV7, EVENT_82H)		\
 	__PMC_EV(ARMV7, EVENT_83H)		\
 	__PMC_EV(ARMV7, EVENT_84H)		\
 	__PMC_EV(ARMV7, EVENT_85H)		\
 	__PMC_EV(ARMV7, EVENT_86H)		\
 	__PMC_EV(ARMV7, EVENT_87H)		\
 	__PMC_EV(ARMV7, EVENT_88H)		\
 	__PMC_EV(ARMV7, EVENT_89H)		\
 	__PMC_EV(ARMV7, EVENT_8AH)		\
 	__PMC_EV(ARMV7, EVENT_8BH)		\
 	__PMC_EV(ARMV7, EVENT_8CH)		\
 	__PMC_EV(ARMV7, EVENT_8DH)		\
 	__PMC_EV(ARMV7, EVENT_8EH)		\
 	__PMC_EV(ARMV7, EVENT_8FH)		\
 	__PMC_EV(ARMV7, EVENT_90H)		\
 	__PMC_EV(ARMV7, EVENT_91H)		\
 	__PMC_EV(ARMV7, EVENT_92H)		\
 	__PMC_EV(ARMV7, EVENT_93H)		\
 	__PMC_EV(ARMV7, EVENT_94H)		\
 	__PMC_EV(ARMV7, EVENT_95H)		\
 	__PMC_EV(ARMV7, EVENT_96H)		\
 	__PMC_EV(ARMV7, EVENT_97H)		\
 	__PMC_EV(ARMV7, EVENT_98H)		\
 	__PMC_EV(ARMV7, EVENT_99H)		\
 	__PMC_EV(ARMV7, EVENT_9AH)		\
 	__PMC_EV(ARMV7, EVENT_9BH)		\
 	__PMC_EV(ARMV7, EVENT_9CH)		\
 	__PMC_EV(ARMV7, EVENT_9DH)		\
 	__PMC_EV(ARMV7, EVENT_9EH)		\
 	__PMC_EV(ARMV7, EVENT_9FH)		\
 	__PMC_EV(ARMV7, EVENT_A0H)		\
 	__PMC_EV(ARMV7, EVENT_A1H)		\
 	__PMC_EV(ARMV7, EVENT_A2H)		\
 	__PMC_EV(ARMV7, EVENT_A3H)		\
 	__PMC_EV(ARMV7, EVENT_A4H)		\
 	__PMC_EV(ARMV7, EVENT_A5H)		\
 	__PMC_EV(ARMV7, EVENT_A6H)		\
 	__PMC_EV(ARMV7, EVENT_A7H)		\
 	__PMC_EV(ARMV7, EVENT_A8H)		\
 	__PMC_EV(ARMV7, EVENT_A9H)		\
 	__PMC_EV(ARMV7, EVENT_AAH)		\
 	__PMC_EV(ARMV7, EVENT_ABH)		\
 	__PMC_EV(ARMV7, EVENT_ACH)		\
 	__PMC_EV(ARMV7, EVENT_ADH)		\
 	__PMC_EV(ARMV7, EVENT_AEH)		\
 	__PMC_EV(ARMV7, EVENT_AFH)		\
 	__PMC_EV(ARMV7, EVENT_B0H)		\
 	__PMC_EV(ARMV7, EVENT_B1H)		\
 	__PMC_EV(ARMV7, EVENT_B2H)		\
 	__PMC_EV(ARMV7, EVENT_B3H)		\
 	__PMC_EV(ARMV7, EVENT_B4H)		\
 	__PMC_EV(ARMV7, EVENT_B5H)		\
 	__PMC_EV(ARMV7, EVENT_B6H)		\
 	__PMC_EV(ARMV7, EVENT_B7H)		\
 	__PMC_EV(ARMV7, EVENT_B8H)		\
 	__PMC_EV(ARMV7, EVENT_B9H)		\
 	__PMC_EV(ARMV7, EVENT_BAH)		\
 	__PMC_EV(ARMV7, EVENT_BBH)		\
 	__PMC_EV(ARMV7, EVENT_BCH)		\
 	__PMC_EV(ARMV7, EVENT_BDH)		\
 	__PMC_EV(ARMV7, EVENT_BEH)		\
 	__PMC_EV(ARMV7, EVENT_BFH)		\
 	__PMC_EV(ARMV7, EVENT_C0H)		\
 	__PMC_EV(ARMV7, EVENT_C1H)		\
 	__PMC_EV(ARMV7, EVENT_C2H)		\
 	__PMC_EV(ARMV7, EVENT_C3H)		\
 	__PMC_EV(ARMV7, EVENT_C4H)		\
 	__PMC_EV(ARMV7, EVENT_C5H)		\
 	__PMC_EV(ARMV7, EVENT_C6H)		\
 	__PMC_EV(ARMV7, EVENT_C7H)		\
 	__PMC_EV(ARMV7, EVENT_C8H)		\
 	__PMC_EV(ARMV7, EVENT_C9H)		\
 	__PMC_EV(ARMV7, EVENT_CAH)		\
 	__PMC_EV(ARMV7, EVENT_CBH)		\
 	__PMC_EV(ARMV7, EVENT_CCH)		\
 	__PMC_EV(ARMV7, EVENT_CDH)		\
 	__PMC_EV(ARMV7, EVENT_CEH)		\
 	__PMC_EV(ARMV7, EVENT_CFH)		\
 	__PMC_EV(ARMV7, EVENT_D0H)		\
 	__PMC_EV(ARMV7, EVENT_D1H)		\
 	__PMC_EV(ARMV7, EVENT_D2H)		\
 	__PMC_EV(ARMV7, EVENT_D3H)		\
 	__PMC_EV(ARMV7, EVENT_D4H)		\
 	__PMC_EV(ARMV7, EVENT_D5H)		\
 	__PMC_EV(ARMV7, EVENT_D6H)		\
 	__PMC_EV(ARMV7, EVENT_D7H)		\
 	__PMC_EV(ARMV7, EVENT_D8H)		\
 	__PMC_EV(ARMV7, EVENT_D9H)		\
 	__PMC_EV(ARMV7, EVENT_DAH)		\
 	__PMC_EV(ARMV7, EVENT_DBH)		\
 	__PMC_EV(ARMV7, EVENT_DCH)		\
 	__PMC_EV(ARMV7, EVENT_DDH)		\
 	__PMC_EV(ARMV7, EVENT_DEH)		\
 	__PMC_EV(ARMV7, EVENT_DFH)		\
 	__PMC_EV(ARMV7, EVENT_E0H)		\
 	__PMC_EV(ARMV7, EVENT_E1H)		\
 	__PMC_EV(ARMV7, EVENT_E2H)		\
 	__PMC_EV(ARMV7, EVENT_E3H)		\
 	__PMC_EV(ARMV7, EVENT_E4H)		\
 	__PMC_EV(ARMV7, EVENT_E5H)		\
 	__PMC_EV(ARMV7, EVENT_E6H)		\
 	__PMC_EV(ARMV7, EVENT_E7H)		\
 	__PMC_EV(ARMV7, EVENT_E8H)		\
 	__PMC_EV(ARMV7, EVENT_E9H)		\
 	__PMC_EV(ARMV7, EVENT_EAH)		\
 	__PMC_EV(ARMV7, EVENT_EBH)		\
 	__PMC_EV(ARMV7, EVENT_ECH)		\
 	__PMC_EV(ARMV7, EVENT_EDH)		\
 	__PMC_EV(ARMV7, EVENT_EEH)		\
 	__PMC_EV(ARMV7, EVENT_EFH)		\
 	__PMC_EV(ARMV7, EVENT_F0H)		\
 	__PMC_EV(ARMV7, EVENT_F1H)		\
 	__PMC_EV(ARMV7, EVENT_F2H)		\
 	__PMC_EV(ARMV7, EVENT_F3H)		\
 	__PMC_EV(ARMV7, EVENT_F4H)		\
 	__PMC_EV(ARMV7, EVENT_F5H)		\
 	__PMC_EV(ARMV7, EVENT_F6H)		\
 	__PMC_EV(ARMV7, EVENT_F7H)		\
 	__PMC_EV(ARMV7, EVENT_F8H)		\
 	__PMC_EV(ARMV7, EVENT_F9H)		\
 	__PMC_EV(ARMV7, EVENT_FAH)		\
 	__PMC_EV(ARMV7, EVENT_FBH)		\
 	__PMC_EV(ARMV7, EVENT_FCH)		\
 	__PMC_EV(ARMV7, EVENT_FDH)		\
 	__PMC_EV(ARMV7, EVENT_FEH)		\
 	__PMC_EV(ARMV7, EVENT_FFH)
 
 #define	PMC_EV_ARMV7_FIRST	PMC_EV_ARMV7_EVENT_00H
 #define	PMC_EV_ARMV7_LAST	PMC_EV_ARMV7_EVENT_FFH
 
 #define	__PMC_EV_ALIAS_ARMV7_COMMON()					\
 	__PMC_EV_ALIAS("PMNC_SW_INCR",		ARMV7_EVENT_00H)	\
 	__PMC_EV_ALIAS("L1_ICACHE_REFILL",	ARMV7_EVENT_01H)	\
 	__PMC_EV_ALIAS("ITLB_REFILL",		ARMV7_EVENT_02H)	\
 	__PMC_EV_ALIAS("L1_DCACHE_REFILL",	ARMV7_EVENT_03H)	\
 	__PMC_EV_ALIAS("L1_DCACHE_ACCESS",	ARMV7_EVENT_04H)	\
 	__PMC_EV_ALIAS("DTLB_REFILL",		ARMV7_EVENT_05H)	\
 	__PMC_EV_ALIAS("MEM_READ",		ARMV7_EVENT_06H)	\
 	__PMC_EV_ALIAS("MEM_WRITE",		ARMV7_EVENT_07H)	\
 	__PMC_EV_ALIAS("EXC_TAKEN",		ARMV7_EVENT_09H)	\
 	__PMC_EV_ALIAS("EXC_EXECUTED",		ARMV7_EVENT_0AH)	\
 	__PMC_EV_ALIAS("CID_WRITE",		ARMV7_EVENT_0BH)	\
 	__PMC_EV_ALIAS("PC_WRITE",		ARMV7_EVENT_0CH)	\
 	__PMC_EV_ALIAS("PC_IMM_BRANCH",		ARMV7_EVENT_0DH)	\
 	__PMC_EV_ALIAS("MEM_UNALIGNED_ACCESS",	ARMV7_EVENT_0FH)	\
 	__PMC_EV_ALIAS("PC_BRANCH_MIS_PRED",	ARMV7_EVENT_10H)	\
 	__PMC_EV_ALIAS("CLOCK_CYCLES",		ARMV7_EVENT_11H)	\
 	__PMC_EV_ALIAS("PC_BRANCH_PRED",	ARMV7_EVENT_12H)
 
 #define	__PMC_EV_ALIAS_ARMV7_COMMON_A8()				\
 	__PMC_EV_ALIAS_ARMV7_COMMON()					\
 	__PMC_EV_ALIAS("INSTR_EXECUTED",	ARMV7_EVENT_08H)	\
 	__PMC_EV_ALIAS("PC_PROC_RETURN",	ARMV7_EVENT_0EH)	\
 	__PMC_EV_ALIAS("MEM_ACCESS",		ARMV7_EVENT_13H)	\
 	__PMC_EV_ALIAS("L1_ICACHE_ACCESS",	ARMV7_EVENT_14H)	\
 	__PMC_EV_ALIAS("L1_DCACHE_WB",		ARMV7_EVENT_15H)	\
 	__PMC_EV_ALIAS("L2_CACHE_ACCESS",	ARMV7_EVENT_16H)	\
 	__PMC_EV_ALIAS("L2_CACHE_REFILL",	ARMV7_EVENT_17H)	\
 	__PMC_EV_ALIAS("L2_CACHE_WB",		ARMV7_EVENT_18H)	\
 	__PMC_EV_ALIAS("BUS_ACCESS",		ARMV7_EVENT_19H)	\
 	__PMC_EV_ALIAS("MEM_ERROR",		ARMV7_EVENT_1AH)	\
 	__PMC_EV_ALIAS("INSTR_SPEC",		ARMV7_EVENT_1BH)	\
 	__PMC_EV_ALIAS("TTBR_WRITE",		ARMV7_EVENT_1CH)	\
 	__PMC_EV_ALIAS("BUS_CYCLES",		ARMV7_EVENT_1DH)	\
 	__PMC_EV_ALIAS("CPU_CYCLES",		ARMV7_EVENT_FFH)
 
 #define	__PMC_EV_ALIAS_ARMV7_CORTEX_A8()				 \
 	__PMC_EV_ALIAS_ARMV7_COMMON_A8()				 \
 	__PMC_EV_ALIAS("WRITE_BUF_FULL",		ARMV7_EVENT_40H) \
 	__PMC_EV_ALIAS("L2_STORE_MERGED",		ARMV7_EVENT_41H) \
 	__PMC_EV_ALIAS("L2_STORE_BUFFERABLE",		ARMV7_EVENT_42H) \
 	__PMC_EV_ALIAS("L2_ACCESS",			ARMV7_EVENT_43H) \
 	__PMC_EV_ALIAS("L2_CACHE_MISS",			ARMV7_EVENT_44H) \
 	__PMC_EV_ALIAS("AXI_READ",			ARMV7_EVENT_45H) \
 	__PMC_EV_ALIAS("AXI_WRITE",			ARMV7_EVENT_46H) \
 	__PMC_EV_ALIAS("MEM_REPLAY_EVT",		ARMV7_EVENT_47H) \
 	__PMC_EV_ALIAS("MEM_UNALIGNED_ACCESS_REPLAY",	ARMV7_EVENT_48H) \
 	__PMC_EV_ALIAS("L1_DCACHE_HASH_MISS",		ARMV7_EVENT_49H) \
 	__PMC_EV_ALIAS("L1_ICACHE_HASH_MISS",		ARMV7_EVENT_4AH) \
 	__PMC_EV_ALIAS("L1_CACHE_PAGECOL_ALIAS",	ARMV7_EVENT_4BH) \
 	__PMC_EV_ALIAS("L1_DCACHE_NEON_ACCESS",		ARMV7_EVENT_4CH) \
 	__PMC_EV_ALIAS("L1_DCACHE_NEON_CACHEABLE",	ARMV7_EVENT_4DH) \
 	__PMC_EV_ALIAS("L2_CACHE_NEON_MEM_ACCESS",	ARMV7_EVENT_4EH) \
 	__PMC_EV_ALIAS("L2_CACHE_NEON_HIT",		ARMV7_EVENT_4FH) \
 	__PMC_EV_ALIAS("L1_CACHE_ACCESS_NOCP15",	ARMV7_EVENT_50H) \
 	__PMC_EV_ALIAS("RET_STACK_MISPREDICT",		ARMV7_EVENT_51H) \
 	__PMC_EV_ALIAS("BRANCH_DIR_MISPREDICT",		ARMV7_EVENT_52H) \
 	__PMC_EV_ALIAS("PRED_BRANCH_PRED_TAKEN",	ARMV7_EVENT_53H) \
 	__PMC_EV_ALIAS("PRED_BRANCH_EXEC_TAKEN",	ARMV7_EVENT_54H) \
 	__PMC_EV_ALIAS("OPS_ISSUED",			ARMV7_EVENT_55H) \
 	__PMC_EV_ALIAS("CYCLES_NO_INSTRUCTION",		ARMV7_EVENT_56H) \
 	__PMC_EV_ALIAS("INSTRUCTIONS_ISSUED_CYCLE",	ARMV7_EVENT_57H) \
 	__PMC_EV_ALIAS("CYCLES_STALLED_NEON_MRC",	ARMV7_EVENT_58H) \
 	__PMC_EV_ALIAS("CYCLES_STALLED_NEON_FULLQ",	ARMV7_EVENT_59H) \
 	__PMC_EV_ALIAS("CYCLES_NONIDLE_NEON_INT",	ARMV7_EVENT_5AH) \
 	__PMC_EV_ALIAS("PMUEXTIN0_EVT",			ARMV7_EVENT_70H) \
 	__PMC_EV_ALIAS("PMUEXTIN1_EVT",			ARMV7_EVENT_71H) \
 	__PMC_EV_ALIAS("PMUEXTIN_EVT",			ARMV7_EVENT_72H)
 #define	PMC_EV_ARMV7_CORTEX_A8_FIRST	PMC_EV_ARMV7_PMNC_SW_INCR
 #define	PMC_EV_ARMV7_CORTEX_A8_LAST	PMC_EV_ARMV7_PMUEXTIN_EVT
 
 #define	__PMC_EV_ALIAS_ARMV7_CORTEX_A9()					\
 	__PMC_EV_ALIAS_ARMV7_COMMON()						\
 	__PMC_EV_ALIAS("JAVA_BYTECODE",			ARMV7_EVENT_40H)	\
 	__PMC_EV_ALIAS("SOFTWARE_JAVA_BYTECODE",	ARMV7_EVENT_41H)	\
 	__PMC_EV_ALIAS("JAZELLE_BACKWARD_BRANCH",	ARMV7_EVENT_42H)	\
 	__PMC_EV_ALIAS("COHERENT_LINEFILL_MISSC",	ARMV7_EVENT_50H)	\
 	__PMC_EV_ALIAS("COHERENT_LINEFILL_HITC",	ARMV7_EVENT_51H)	\
 	__PMC_EV_ALIAS("INSTR_CACHE_DEPENDENT_STALL",	ARMV7_EVENT_60H)	\
 	__PMC_EV_ALIAS("DATA_CACHE_DEPENDENT_STALL",	ARMV7_EVENT_61H)	\
 	__PMC_EV_ALIAS("MAIN_TLB_MISS_STALL",		ARMV7_EVENT_62H)	\
 	__PMC_EV_ALIAS("STREX_PASSED",			ARMV7_EVENT_63H)	\
 	__PMC_EV_ALIAS("STREX_FAILED",			ARMV7_EVENT_64H)	\
 	__PMC_EV_ALIAS("DATA_EVICTION",			ARMV7_EVENT_65H)	\
 	__PMC_EV_ALIAS("ISSUE_DNOT_DISPATCH_ANY_INSTR",	ARMV7_EVENT_66H)	\
 	__PMC_EV_ALIAS("ISSUE_IS_EMPTY",		ARMV7_EVENT_67H)	\
 	__PMC_EV_ALIAS("INSTR_RENAMED",			ARMV7_EVENT_68H)	\
 	__PMC_EV_ALIAS("PREDICTABLE_FUNCTION_RETURN",	ARMV7_EVENT_6EH)	\
 	__PMC_EV_ALIAS("MAIN_EXECUTION_UNIT_PIPE",	ARMV7_EVENT_70H)	\
 	__PMC_EV_ALIAS("SECOND_EXECUTION_UNIT_PIPE",	ARMV7_EVENT_71H)	\
 	__PMC_EV_ALIAS("LOAD_STORE_PIPE",		ARMV7_EVENT_72H)	\
 	__PMC_EV_ALIAS("FLOATING_POINT_INSTR_RENAMED",	ARMV7_EVENT_73H)	\
 	__PMC_EV_ALIAS("NEON_INSTRS_RENAMED",		ARMV7_EVENT_74H)	\
 	__PMC_EV_ALIAS("PLD_STALL",			ARMV7_EVENT_80H)	\
 	__PMC_EV_ALIAS("WRITE_STALL",			ARMV7_EVENT_81H)	\
 	__PMC_EV_ALIAS("INSTR_MAIN_TLB_MISS_STALL",	ARMV7_EVENT_82H)	\
 	__PMC_EV_ALIAS("DATA_MAIN_TLB_MISS_STALL",	ARMV7_EVENT_83H)	\
 	__PMC_EV_ALIAS("INSTR_MICRO_TLB_MISS_STALL",	ARMV7_EVENT_84H)	\
 	__PMC_EV_ALIAS("DATA_MICRO_TLB_MISS_STALL",	ARMV7_EVENT_85H)	\
 	__PMC_EV_ALIAS("DMB_STALL",			ARMV7_EVENT_86H)	\
 	__PMC_EV_ALIAS("INTEGER_CORE_CLOCK_ENABLED",	ARMV7_EVENT_8AH)	\
 	__PMC_EV_ALIAS("DATA_ENGINE_CLOCK_ENABLED",	ARMV7_EVENT_8BH)	\
 	__PMC_EV_ALIAS("ISB",				ARMV7_EVENT_90H)	\
 	__PMC_EV_ALIAS("DSB",				ARMV7_EVENT_91H)	\
 	__PMC_EV_ALIAS("DMB",				ARMV7_EVENT_92H)	\
 	__PMC_EV_ALIAS("EXTERNAL_INTERRUPT",		ARMV7_EVENT_93H)	\
 	__PMC_EV_ALIAS("PLE_CACHE_LINE_REQ_COMPLETED",	ARMV7_EVENT_A0H)	\
 	__PMC_EV_ALIAS("PLE_CACHE_LINE_REQ_SKIPPED",	ARMV7_EVENT_A1H)	\
 	__PMC_EV_ALIAS("PLE_FIFO_FLUSH",		ARMV7_EVENT_A2H)	\
 	__PMC_EV_ALIAS("PLE_REQUEST_COMPLETED",		ARMV7_EVENT_A3H)	\
 	__PMC_EV_ALIAS("PLE_FIFO_OVERFLOW",		ARMV7_EVENT_A4H)	\
 	__PMC_EV_ALIAS("PLE_REQUEST_PROGRAMMED",	ARMV7_EVENT_A5H)
 
 /*
  * ARMv8 Events
  */
 
 #define	__PMC_EV_ARMV8()			\
 	__PMC_EV(ARMV8, EVENT_00H)		\
 	__PMC_EV(ARMV8, EVENT_01H)		\
 	__PMC_EV(ARMV8, EVENT_02H)		\
 	__PMC_EV(ARMV8, EVENT_03H)		\
 	__PMC_EV(ARMV8, EVENT_04H)		\
 	__PMC_EV(ARMV8, EVENT_05H)		\
 	__PMC_EV(ARMV8, EVENT_06H)		\
 	__PMC_EV(ARMV8, EVENT_07H)		\
 	__PMC_EV(ARMV8, EVENT_08H)		\
 	__PMC_EV(ARMV8, EVENT_09H)		\
 	__PMC_EV(ARMV8, EVENT_0AH)		\
 	__PMC_EV(ARMV8, EVENT_0BH)		\
 	__PMC_EV(ARMV8, EVENT_0CH)		\
 	__PMC_EV(ARMV8, EVENT_0DH)		\
 	__PMC_EV(ARMV8, EVENT_0EH)		\
 	__PMC_EV(ARMV8, EVENT_0FH)		\
 	__PMC_EV(ARMV8, EVENT_10H)		\
 	__PMC_EV(ARMV8, EVENT_11H)		\
 	__PMC_EV(ARMV8, EVENT_12H)		\
 	__PMC_EV(ARMV8, EVENT_13H)		\
 	__PMC_EV(ARMV8, EVENT_14H)		\
 	__PMC_EV(ARMV8, EVENT_15H)		\
 	__PMC_EV(ARMV8, EVENT_16H)		\
 	__PMC_EV(ARMV8, EVENT_17H)		\
 	__PMC_EV(ARMV8, EVENT_18H)		\
 	__PMC_EV(ARMV8, EVENT_19H)		\
 	__PMC_EV(ARMV8, EVENT_1AH)		\
 	__PMC_EV(ARMV8, EVENT_1BH)		\
 	__PMC_EV(ARMV8, EVENT_1CH)		\
 	__PMC_EV(ARMV8, EVENT_1DH)		\
 	__PMC_EV(ARMV8, EVENT_1EH)		\
 	__PMC_EV(ARMV8, EVENT_1FH)		\
 	__PMC_EV(ARMV8, EVENT_20H)		\
 	__PMC_EV(ARMV8, EVENT_21H)		\
 	__PMC_EV(ARMV8, EVENT_22H)		\
 	__PMC_EV(ARMV8, EVENT_23H)		\
 	__PMC_EV(ARMV8, EVENT_24H)		\
 	__PMC_EV(ARMV8, EVENT_25H)		\
 	__PMC_EV(ARMV8, EVENT_26H)		\
 	__PMC_EV(ARMV8, EVENT_27H)		\
 	__PMC_EV(ARMV8, EVENT_28H)		\
 	__PMC_EV(ARMV8, EVENT_29H)		\
 	__PMC_EV(ARMV8, EVENT_2AH)		\
 	__PMC_EV(ARMV8, EVENT_2BH)		\
 	__PMC_EV(ARMV8, EVENT_2CH)		\
 	__PMC_EV(ARMV8, EVENT_2DH)		\
 	__PMC_EV(ARMV8, EVENT_2EH)		\
 	__PMC_EV(ARMV8, EVENT_2FH)		\
 	__PMC_EV(ARMV8, EVENT_30H)		\
 	__PMC_EV(ARMV8, EVENT_31H)		\
 	__PMC_EV(ARMV8, EVENT_32H)		\
 	__PMC_EV(ARMV8, EVENT_33H)		\
 	__PMC_EV(ARMV8, EVENT_34H)		\
 	__PMC_EV(ARMV8, EVENT_35H)		\
 	__PMC_EV(ARMV8, EVENT_36H)		\
 	__PMC_EV(ARMV8, EVENT_37H)		\
 	__PMC_EV(ARMV8, EVENT_38H)		\
 	__PMC_EV(ARMV8, EVENT_39H)		\
 	__PMC_EV(ARMV8, EVENT_3AH)		\
 	__PMC_EV(ARMV8, EVENT_3BH)		\
 	__PMC_EV(ARMV8, EVENT_3CH)		\
 	__PMC_EV(ARMV8, EVENT_3DH)		\
 	__PMC_EV(ARMV8, EVENT_3EH)		\
 	__PMC_EV(ARMV8, EVENT_3FH)		\
 	__PMC_EV(ARMV8, EVENT_40H)		\
 	__PMC_EV(ARMV8, EVENT_41H)		\
 	__PMC_EV(ARMV8, EVENT_42H)		\
 	__PMC_EV(ARMV8, EVENT_43H)		\
 	__PMC_EV(ARMV8, EVENT_44H)		\
 	__PMC_EV(ARMV8, EVENT_45H)		\
 	__PMC_EV(ARMV8, EVENT_46H)		\
 	__PMC_EV(ARMV8, EVENT_47H)		\
 	__PMC_EV(ARMV8, EVENT_48H)		\
 	__PMC_EV(ARMV8, EVENT_49H)		\
 	__PMC_EV(ARMV8, EVENT_4AH)		\
 	__PMC_EV(ARMV8, EVENT_4BH)		\
 	__PMC_EV(ARMV8, EVENT_4CH)		\
 	__PMC_EV(ARMV8, EVENT_4DH)		\
 	__PMC_EV(ARMV8, EVENT_4EH)		\
 	__PMC_EV(ARMV8, EVENT_4FH)		\
 	__PMC_EV(ARMV8, EVENT_50H)		\
 	__PMC_EV(ARMV8, EVENT_51H)		\
 	__PMC_EV(ARMV8, EVENT_52H)		\
 	__PMC_EV(ARMV8, EVENT_53H)		\
 	__PMC_EV(ARMV8, EVENT_54H)		\
 	__PMC_EV(ARMV8, EVENT_55H)		\
 	__PMC_EV(ARMV8, EVENT_56H)		\
 	__PMC_EV(ARMV8, EVENT_57H)		\
 	__PMC_EV(ARMV8, EVENT_58H)		\
 	__PMC_EV(ARMV8, EVENT_59H)		\
 	__PMC_EV(ARMV8, EVENT_5AH)		\
 	__PMC_EV(ARMV8, EVENT_5BH)		\
 	__PMC_EV(ARMV8, EVENT_5CH)		\
 	__PMC_EV(ARMV8, EVENT_5DH)		\
 	__PMC_EV(ARMV8, EVENT_5EH)		\
 	__PMC_EV(ARMV8, EVENT_5FH)		\
 	__PMC_EV(ARMV8, EVENT_60H)		\
 	__PMC_EV(ARMV8, EVENT_61H)		\
 	__PMC_EV(ARMV8, EVENT_62H)		\
 	__PMC_EV(ARMV8, EVENT_63H)		\
 	__PMC_EV(ARMV8, EVENT_64H)		\
 	__PMC_EV(ARMV8, EVENT_65H)		\
 	__PMC_EV(ARMV8, EVENT_66H)		\
 	__PMC_EV(ARMV8, EVENT_67H)		\
 	__PMC_EV(ARMV8, EVENT_68H)		\
 	__PMC_EV(ARMV8, EVENT_69H)		\
 	__PMC_EV(ARMV8, EVENT_6AH)		\
 	__PMC_EV(ARMV8, EVENT_6BH)		\
 	__PMC_EV(ARMV8, EVENT_6CH)		\
 	__PMC_EV(ARMV8, EVENT_6DH)		\
 	__PMC_EV(ARMV8, EVENT_6EH)		\
 	__PMC_EV(ARMV8, EVENT_6FH)		\
 	__PMC_EV(ARMV8, EVENT_70H)		\
 	__PMC_EV(ARMV8, EVENT_71H)		\
 	__PMC_EV(ARMV8, EVENT_72H)		\
 	__PMC_EV(ARMV8, EVENT_73H)		\
 	__PMC_EV(ARMV8, EVENT_74H)		\
 	__PMC_EV(ARMV8, EVENT_75H)		\
 	__PMC_EV(ARMV8, EVENT_76H)		\
 	__PMC_EV(ARMV8, EVENT_77H)		\
 	__PMC_EV(ARMV8, EVENT_78H)		\
 	__PMC_EV(ARMV8, EVENT_79H)		\
 	__PMC_EV(ARMV8, EVENT_7AH)		\
 	__PMC_EV(ARMV8, EVENT_7BH)		\
 	__PMC_EV(ARMV8, EVENT_7CH)		\
 	__PMC_EV(ARMV8, EVENT_7DH)		\
 	__PMC_EV(ARMV8, EVENT_7EH)		\
 	__PMC_EV(ARMV8, EVENT_7FH)		\
 	__PMC_EV(ARMV8, EVENT_80H)		\
 	__PMC_EV(ARMV8, EVENT_81H)		\
 	__PMC_EV(ARMV8, EVENT_82H)		\
 	__PMC_EV(ARMV8, EVENT_83H)		\
 	__PMC_EV(ARMV8, EVENT_84H)		\
 	__PMC_EV(ARMV8, EVENT_85H)		\
 	__PMC_EV(ARMV8, EVENT_86H)		\
 	__PMC_EV(ARMV8, EVENT_87H)		\
 	__PMC_EV(ARMV8, EVENT_88H)		\
 	__PMC_EV(ARMV8, EVENT_89H)		\
 	__PMC_EV(ARMV8, EVENT_8AH)		\
 	__PMC_EV(ARMV8, EVENT_8BH)		\
 	__PMC_EV(ARMV8, EVENT_8CH)		\
 	__PMC_EV(ARMV8, EVENT_8DH)		\
 	__PMC_EV(ARMV8, EVENT_8EH)		\
 	__PMC_EV(ARMV8, EVENT_8FH)		\
 	__PMC_EV(ARMV8, EVENT_90H)		\
 	__PMC_EV(ARMV8, EVENT_91H)		\
 	__PMC_EV(ARMV8, EVENT_92H)		\
 	__PMC_EV(ARMV8, EVENT_93H)		\
 	__PMC_EV(ARMV8, EVENT_94H)		\
 	__PMC_EV(ARMV8, EVENT_95H)		\
 	__PMC_EV(ARMV8, EVENT_96H)		\
 	__PMC_EV(ARMV8, EVENT_97H)		\
 	__PMC_EV(ARMV8, EVENT_98H)		\
 	__PMC_EV(ARMV8, EVENT_99H)		\
 	__PMC_EV(ARMV8, EVENT_9AH)		\
 	__PMC_EV(ARMV8, EVENT_9BH)		\
 	__PMC_EV(ARMV8, EVENT_9CH)		\
 	__PMC_EV(ARMV8, EVENT_9DH)		\
 	__PMC_EV(ARMV8, EVENT_9EH)		\
 	__PMC_EV(ARMV8, EVENT_9FH)		\
 	__PMC_EV(ARMV8, EVENT_A0H)		\
 	__PMC_EV(ARMV8, EVENT_A1H)		\
 	__PMC_EV(ARMV8, EVENT_A2H)		\
 	__PMC_EV(ARMV8, EVENT_A3H)		\
 	__PMC_EV(ARMV8, EVENT_A4H)		\
 	__PMC_EV(ARMV8, EVENT_A5H)		\
 	__PMC_EV(ARMV8, EVENT_A6H)		\
 	__PMC_EV(ARMV8, EVENT_A7H)		\
 	__PMC_EV(ARMV8, EVENT_A8H)		\
 	__PMC_EV(ARMV8, EVENT_A9H)		\
 	__PMC_EV(ARMV8, EVENT_AAH)		\
 	__PMC_EV(ARMV8, EVENT_ABH)		\
 	__PMC_EV(ARMV8, EVENT_ACH)		\
 	__PMC_EV(ARMV8, EVENT_ADH)		\
 	__PMC_EV(ARMV8, EVENT_AEH)		\
 	__PMC_EV(ARMV8, EVENT_AFH)		\
 	__PMC_EV(ARMV8, EVENT_B0H)		\
 	__PMC_EV(ARMV8, EVENT_B1H)		\
 	__PMC_EV(ARMV8, EVENT_B2H)		\
 	__PMC_EV(ARMV8, EVENT_B3H)		\
 	__PMC_EV(ARMV8, EVENT_B4H)		\
 	__PMC_EV(ARMV8, EVENT_B5H)		\
 	__PMC_EV(ARMV8, EVENT_B6H)		\
 	__PMC_EV(ARMV8, EVENT_B7H)		\
 	__PMC_EV(ARMV8, EVENT_B8H)		\
 	__PMC_EV(ARMV8, EVENT_B9H)		\
 	__PMC_EV(ARMV8, EVENT_BAH)		\
 	__PMC_EV(ARMV8, EVENT_BBH)		\
 	__PMC_EV(ARMV8, EVENT_BCH)		\
 	__PMC_EV(ARMV8, EVENT_BDH)		\
 	__PMC_EV(ARMV8, EVENT_BEH)		\
 	__PMC_EV(ARMV8, EVENT_BFH)		\
 	__PMC_EV(ARMV8, EVENT_C0H)		\
 	__PMC_EV(ARMV8, EVENT_C1H)		\
 	__PMC_EV(ARMV8, EVENT_C2H)		\
 	__PMC_EV(ARMV8, EVENT_C3H)		\
 	__PMC_EV(ARMV8, EVENT_C4H)		\
 	__PMC_EV(ARMV8, EVENT_C5H)		\
 	__PMC_EV(ARMV8, EVENT_C6H)		\
 	__PMC_EV(ARMV8, EVENT_C7H)		\
 	__PMC_EV(ARMV8, EVENT_C8H)		\
 	__PMC_EV(ARMV8, EVENT_C9H)		\
 	__PMC_EV(ARMV8, EVENT_CAH)		\
 	__PMC_EV(ARMV8, EVENT_CBH)		\
 	__PMC_EV(ARMV8, EVENT_CCH)		\
 	__PMC_EV(ARMV8, EVENT_CDH)		\
 	__PMC_EV(ARMV8, EVENT_CEH)		\
 	__PMC_EV(ARMV8, EVENT_CFH)		\
 	__PMC_EV(ARMV8, EVENT_D0H)		\
 	__PMC_EV(ARMV8, EVENT_D1H)		\
 	__PMC_EV(ARMV8, EVENT_D2H)		\
 	__PMC_EV(ARMV8, EVENT_D3H)		\
 	__PMC_EV(ARMV8, EVENT_D4H)		\
 	__PMC_EV(ARMV8, EVENT_D5H)		\
 	__PMC_EV(ARMV8, EVENT_D6H)		\
 	__PMC_EV(ARMV8, EVENT_D7H)		\
 	__PMC_EV(ARMV8, EVENT_D8H)		\
 	__PMC_EV(ARMV8, EVENT_D9H)		\
 	__PMC_EV(ARMV8, EVENT_DAH)		\
 	__PMC_EV(ARMV8, EVENT_DBH)		\
 	__PMC_EV(ARMV8, EVENT_DCH)		\
 	__PMC_EV(ARMV8, EVENT_DDH)		\
 	__PMC_EV(ARMV8, EVENT_DEH)		\
 	__PMC_EV(ARMV8, EVENT_DFH)		\
 	__PMC_EV(ARMV8, EVENT_E0H)		\
 	__PMC_EV(ARMV8, EVENT_E1H)		\
 	__PMC_EV(ARMV8, EVENT_E2H)		\
 	__PMC_EV(ARMV8, EVENT_E3H)		\
 	__PMC_EV(ARMV8, EVENT_E4H)		\
 	__PMC_EV(ARMV8, EVENT_E5H)		\
 	__PMC_EV(ARMV8, EVENT_E6H)		\
 	__PMC_EV(ARMV8, EVENT_E7H)		\
 	__PMC_EV(ARMV8, EVENT_E8H)		\
 	__PMC_EV(ARMV8, EVENT_E9H)		\
 	__PMC_EV(ARMV8, EVENT_EAH)		\
 	__PMC_EV(ARMV8, EVENT_EBH)		\
 	__PMC_EV(ARMV8, EVENT_ECH)		\
 	__PMC_EV(ARMV8, EVENT_EDH)		\
 	__PMC_EV(ARMV8, EVENT_EEH)		\
 	__PMC_EV(ARMV8, EVENT_EFH)		\
 	__PMC_EV(ARMV8, EVENT_F0H)		\
 	__PMC_EV(ARMV8, EVENT_F1H)		\
 	__PMC_EV(ARMV8, EVENT_F2H)		\
 	__PMC_EV(ARMV8, EVENT_F3H)		\
 	__PMC_EV(ARMV8, EVENT_F4H)		\
 	__PMC_EV(ARMV8, EVENT_F5H)		\
 	__PMC_EV(ARMV8, EVENT_F6H)		\
 	__PMC_EV(ARMV8, EVENT_F7H)		\
 	__PMC_EV(ARMV8, EVENT_F8H)		\
 	__PMC_EV(ARMV8, EVENT_F9H)		\
 	__PMC_EV(ARMV8, EVENT_FAH)		\
 	__PMC_EV(ARMV8, EVENT_FBH)		\
 	__PMC_EV(ARMV8, EVENT_FCH)		\
 	__PMC_EV(ARMV8, EVENT_FDH)		\
 	__PMC_EV(ARMV8, EVENT_FEH)		\
 	__PMC_EV(ARMV8, EVENT_FFH)
 
 #define	PMC_EV_ARMV8_FIRST	PMC_EV_ARMV8_EVENT_00H
 #define	PMC_EV_ARMV8_LAST	PMC_EV_ARMV8_EVENT_FFH
 
 #define	__PMC_EV_ALIAS_ARMV8_COMMON()					\
 	__PMC_EV_ALIAS("SW_INCR",		ARMV8_EVENT_00H)	\
 	__PMC_EV_ALIAS("L1I_CACHE_REFILL",	ARMV8_EVENT_01H)	\
 	__PMC_EV_ALIAS("L1I_TLB_REFILL",	ARMV8_EVENT_02H)	\
 	__PMC_EV_ALIAS("L1D_CACHE_REFILL",	ARMV8_EVENT_03H)	\
 	__PMC_EV_ALIAS("L1D_CACHE",		ARMV8_EVENT_04H)	\
 	__PMC_EV_ALIAS("L1D_TLB_REFILL",	ARMV8_EVENT_05H)	\
 	__PMC_EV_ALIAS("INST_RETIRED",		ARMV8_EVENT_08H)	\
 	__PMC_EV_ALIAS("EXC_TAKEN",		ARMV8_EVENT_09H)	\
 	__PMC_EV_ALIAS("EXC_RETURN",		ARMV8_EVENT_0AH)	\
 	__PMC_EV_ALIAS("CID_WRITE_RETIRED",	ARMV8_EVENT_0BH)	\
 	__PMC_EV_ALIAS("BR_MIS_PRED",		ARMV8_EVENT_10H)	\
 	__PMC_EV_ALIAS("CPU_CYCLES",		ARMV8_EVENT_11H)	\
 	__PMC_EV_ALIAS("BR_PRED",		ARMV8_EVENT_12H)	\
 	__PMC_EV_ALIAS("MEM_ACCESS",		ARMV8_EVENT_13H)	\
 	__PMC_EV_ALIAS("L1I_CACHE",		ARMV8_EVENT_14H)	\
 	__PMC_EV_ALIAS("L1D_CACHE_WB",		ARMV8_EVENT_15H)	\
 	__PMC_EV_ALIAS("L2D_CACHE",		ARMV8_EVENT_16H)	\
 	__PMC_EV_ALIAS("L2D_CACHE_REFILL",	ARMV8_EVENT_17H)	\
 	__PMC_EV_ALIAS("L2D_CACHE_WB",		ARMV8_EVENT_18H)	\
 	__PMC_EV_ALIAS("BUS_ACCESS",		ARMV8_EVENT_19H)	\
 	__PMC_EV_ALIAS("MEMORY_ERROR",		ARMV8_EVENT_1AH)	\
 	__PMC_EV_ALIAS("BUS_CYCLES",		ARMV8_EVENT_1DH)	\
 	__PMC_EV_ALIAS("CHAIN",			ARMV8_EVENT_1EH)	\
 	__PMC_EV_ALIAS("BUS_ACCESS_LD",		ARMV8_EVENT_60H)	\
 	__PMC_EV_ALIAS("BUS_ACCESS_ST",		ARMV8_EVENT_61H)	\
 	__PMC_EV_ALIAS("BR_INDIRECT_SPEC",	ARMV8_EVENT_7AH)	\
 	__PMC_EV_ALIAS("EXC_IRQ",		ARMV8_EVENT_86H)	\
 	__PMC_EV_ALIAS("EXC_FIQ",		ARMV8_EVENT_87H)
 
 #define	__PMC_EV_ALIAS_ARMV8_CORTEX_A53()				\
 	__PMC_EV_ALIAS_ARMV8_COMMON()					\
 	__PMC_EV_ALIAS("LD_RETIRED",		ARMV8_EVENT_06H)	\
 	__PMC_EV_ALIAS("ST_RETIRED",		ARMV8_EVENT_07H)	\
 	__PMC_EV_ALIAS("PC_WRITE_RETIRED",	ARMV8_EVENT_0CH)	\
 	__PMC_EV_ALIAS("BR_IMMED_RETIRED",	ARMV8_EVENT_0DH)	\
 	__PMC_EV_ALIAS("BR_RETURN_RETIRED",	ARMV8_EVENT_0EH)	\
 	__PMC_EV_ALIAS("UNALIGNED_LDST_RETIRED",ARMV8_EVENT_0FH)
 
 #define	__PMC_EV_ALIAS_ARMV8_CORTEX_A57()				\
 	__PMC_EV_ALIAS_ARMV8_COMMON()					\
 	__PMC_EV_ALIAS("INST_SPEC",		ARMV8_EVENT_1BH)	\
 	__PMC_EV_ALIAS("TTBR_WRITE_RETIRED",	ARMV8_EVENT_1CH)	\
 	__PMC_EV_ALIAS("L1D_CACHE_LD",		ARMV8_EVENT_40H)	\
 	__PMC_EV_ALIAS("L1D_CACHE_ST",		ARMV8_EVENT_41H)	\
 	__PMC_EV_ALIAS("L1D_CACHE_REFILL_LD",	ARMV8_EVENT_42H)	\
 	__PMC_EV_ALIAS("L1D_CACHE_REFILL_ST",	ARMV8_EVENT_43H)	\
 	__PMC_EV_ALIAS("L1D_CACHE_WB_VICTIM",	ARMV8_EVENT_46H)	\
 	__PMC_EV_ALIAS("L1D_CACHE_WB_CLEAN",	ARMV8_EVENT_47H)	\
 	__PMC_EV_ALIAS("L1D_CACHE_INVAL",	ARMV8_EVENT_48H)	\
 	__PMC_EV_ALIAS("L1D_TLB_REFILL_LD",	ARMV8_EVENT_4CH)	\
 	__PMC_EV_ALIAS("L1D_TLB_REFILL_ST",	ARMV8_EVENT_4DH)	\
 	__PMC_EV_ALIAS("L2D_CACHE_LD",		ARMV8_EVENT_50H)	\
 	__PMC_EV_ALIAS("L2D_CACHE_ST",		ARMV8_EVENT_51H)	\
 	__PMC_EV_ALIAS("L2D_CACHE_REFILL_LD",	ARMV8_EVENT_52H)	\
 	__PMC_EV_ALIAS("L2D_CACHE_REFILL_ST",	ARMV8_EVENT_53H)	\
 	__PMC_EV_ALIAS("L2D_CACHE_WB_VICTIM",	ARMV8_EVENT_56H)	\
 	__PMC_EV_ALIAS("L2D_CACHE_WB_CLEAN",	ARMV8_EVENT_57H)	\
 	__PMC_EV_ALIAS("L2D_CACHE_INVAL",	ARMV8_EVENT_58H)	\
 	__PMC_EV_ALIAS("BUS_ACCESS_SHARED",	ARMV8_EVENT_62H)	\
 	__PMC_EV_ALIAS("BUS_ACCESS_NOT_SHARED",	ARMV8_EVENT_63H)	\
 	__PMC_EV_ALIAS("BUS_ACCESS_NORMAL",	ARMV8_EVENT_64H)	\
 	__PMC_EV_ALIAS("BUS_ACCESS_PERIPH",	ARMV8_EVENT_65H)	\
 	__PMC_EV_ALIAS("MEM_ACCESS_LD",		ARMV8_EVENT_66H)	\
 	__PMC_EV_ALIAS("MEM_ACCESS_ST",		ARMV8_EVENT_67H)	\
 	__PMC_EV_ALIAS("UNALIGNED_LD_SPEC",	ARMV8_EVENT_68H)	\
 	__PMC_EV_ALIAS("UNALIGNED_ST_SPEC",	ARMV8_EVENT_69H)	\
 	__PMC_EV_ALIAS("UNALIGNED_LDST_SPEC",	ARMV8_EVENT_6AH)	\
 	__PMC_EV_ALIAS("LDREX_SPEC",		ARMV8_EVENT_6CH)	\
 	__PMC_EV_ALIAS("STREX_PASS_SPEC",	ARMV8_EVENT_6DH)	\
 	__PMC_EV_ALIAS("STREX_FAIL_SPEC",	ARMV8_EVENT_6EH)	\
 	__PMC_EV_ALIAS("LD_SPEC",		ARMV8_EVENT_70H)	\
 	__PMC_EV_ALIAS("ST_SPEC",		ARMV8_EVENT_71H)	\
 	__PMC_EV_ALIAS("LDST_SPEC",		ARMV8_EVENT_72H)	\
 	__PMC_EV_ALIAS("DP_SPEC",		ARMV8_EVENT_73H)	\
 	__PMC_EV_ALIAS("ASE_SPEC",		ARMV8_EVENT_74H)	\
 	__PMC_EV_ALIAS("VFP_SPEC",		ARMV8_EVENT_75H)	\
 	__PMC_EV_ALIAS("PC_WRITE_SPEC",		ARMV8_EVENT_76H)	\
 	__PMC_EV_ALIAS("CRYPTO_SPEC",		ARMV8_EVENT_77H)	\
 	__PMC_EV_ALIAS("BR_IMMED_SPEC",		ARMV8_EVENT_78H)	\
 	__PMC_EV_ALIAS("BR_RETURN_SPEC",	ARMV8_EVENT_79H)	\
 	__PMC_EV_ALIAS("ISB_SPEC",		ARMV8_EVENT_7CH)	\
 	__PMC_EV_ALIAS("DSB_SPEC",		ARMV8_EVENT_7DH)	\
 	__PMC_EV_ALIAS("DMB_SPEC",		ARMV8_EVENT_7EH)	\
 	__PMC_EV_ALIAS("EXC_UNDEF",		ARMV8_EVENT_81H)	\
 	__PMC_EV_ALIAS("EXC_SVC",		ARMV8_EVENT_82H)	\
 	__PMC_EV_ALIAS("EXC_PABORT",		ARMV8_EVENT_83H)	\
 	__PMC_EV_ALIAS("EXC_DABORT",		ARMV8_EVENT_84H)	\
 	__PMC_EV_ALIAS("EXC_SMC",		ARMV8_EVENT_88H)	\
 	__PMC_EV_ALIAS("EXC_HVC",		ARMV8_EVENT_8AH)	\
 	__PMC_EV_ALIAS("EXC_TRAP_PABORT",	ARMV8_EVENT_8BH)	\
 	__PMC_EV_ALIAS("EXC_TRAP_DABORT",	ARMV8_EVENT_8CH)	\
 	__PMC_EV_ALIAS("EXC_TRAP_OTHER",	ARMV8_EVENT_8DH)	\
 	__PMC_EV_ALIAS("EXC_TRAP_IRQ",		ARMV8_EVENT_8EH)	\
 	__PMC_EV_ALIAS("EXC_TRAP_FIQ",		ARMV8_EVENT_8FH)	\
 	__PMC_EV_ALIAS("RC_LD_SPEC",		ARMV8_EVENT_90H)	\
 	__PMC_EV_ALIAS("RC_ST_SPEC",		ARMV8_EVENT_91H)
 
 /*
  * MIPS Events from "Programming the MIPS32 24K Core Family",
  * Document Number: MD00355 Revision 04.63 December 19, 2008
  * These events are kept in the order found in Table 7.4.
  * For counters which are different between the left hand
  * column (0/2) and the right hand column (1/3) the left
  * hand is given first, e.g. BRANCH_COMPLETED and BRANCH_MISPRED
  * in the definition below.
  */
 
 #define __PMC_EV_MIPS24K()                         \
 	__PMC_EV(MIPS24K, CYCLE)                   \
 	__PMC_EV(MIPS24K, INSTR_EXECUTED)          \
 	__PMC_EV(MIPS24K, BRANCH_COMPLETED)        \
 	__PMC_EV(MIPS24K, BRANCH_MISPRED)          \
 	__PMC_EV(MIPS24K, RETURN)                  \
 	__PMC_EV(MIPS24K, RETURN_MISPRED)          \
 	__PMC_EV(MIPS24K, RETURN_NOT_31)           \
 	__PMC_EV(MIPS24K, RETURN_NOTPRED)          \
 	__PMC_EV(MIPS24K, ITLB_ACCESS)             \
 	__PMC_EV(MIPS24K, ITLB_MISS)               \
 	__PMC_EV(MIPS24K, DTLB_ACCESS)             \
 	__PMC_EV(MIPS24K, DTLB_MISS)               \
 	__PMC_EV(MIPS24K, JTLB_IACCESS)            \
 	__PMC_EV(MIPS24K, JTLB_IMISS)              \
 	__PMC_EV(MIPS24K, JTLB_DACCESS)            \
 	__PMC_EV(MIPS24K, JTLB_DMISS)              \
 	__PMC_EV(MIPS24K, IC_FETCH)                \
 	__PMC_EV(MIPS24K, IC_MISS)                 \
 	__PMC_EV(MIPS24K, DC_LOADSTORE)            \
 	__PMC_EV(MIPS24K, DC_WRITEBACK)            \
 	__PMC_EV(MIPS24K, DC_MISS)                 \
 	__PMC_EV(MIPS24K, STORE_MISS)              \
 	__PMC_EV(MIPS24K, LOAD_MISS)               \
 	__PMC_EV(MIPS24K, INTEGER_COMPLETED)       \
 	__PMC_EV(MIPS24K, FP_COMPLETED)            \
 	__PMC_EV(MIPS24K, LOAD_COMPLETED)          \
 	__PMC_EV(MIPS24K, STORE_COMPLETED)         \
 	__PMC_EV(MIPS24K, BARRIER_COMPLETED)       \
 	__PMC_EV(MIPS24K, MIPS16_COMPLETED)        \
 	__PMC_EV(MIPS24K, NOP_COMPLETED)           \
 	__PMC_EV(MIPS24K, INTEGER_MULDIV_COMPLETED)\
 	__PMC_EV(MIPS24K, RF_STALL)                \
 	__PMC_EV(MIPS24K, INSTR_REFETCH)           \
 	__PMC_EV(MIPS24K, STORE_COND_COMPLETED)    \
 	__PMC_EV(MIPS24K, STORE_COND_FAILED)       \
 	__PMC_EV(MIPS24K, ICACHE_REQUESTS)         \
 	__PMC_EV(MIPS24K, ICACHE_HIT)              \
 	__PMC_EV(MIPS24K, L2_WRITEBACK)            \
 	__PMC_EV(MIPS24K, L2_ACCESS)               \
 	__PMC_EV(MIPS24K, L2_MISS)                 \
 	__PMC_EV(MIPS24K, L2_ERR_CORRECTED)        \
 	__PMC_EV(MIPS24K, EXCEPTIONS)              \
 	__PMC_EV(MIPS24K, RF_CYCLES_STALLED)       \
 	__PMC_EV(MIPS24K, IFU_CYCLES_STALLED)      \
 	__PMC_EV(MIPS24K, ALU_CYCLES_STALLED)      \
 	__PMC_EV(MIPS24K, UNCACHED_LOAD)           \
 	__PMC_EV(MIPS24K, UNCACHED_STORE)          \
 	__PMC_EV(MIPS24K, CP2_REG_TO_REG_COMPLETED)\
 	__PMC_EV(MIPS24K, MFTC_COMPLETED)          \
 	__PMC_EV(MIPS24K, IC_BLOCKED_CYCLES)       \
 	__PMC_EV(MIPS24K, DC_BLOCKED_CYCLES)       \
 	__PMC_EV(MIPS24K, L2_IMISS_STALL_CYCLES)   \
 	__PMC_EV(MIPS24K, L2_DMISS_STALL_CYCLES)   \
 	__PMC_EV(MIPS24K, DMISS_CYCLES)            \
 	__PMC_EV(MIPS24K, L2_MISS_CYCLES)          \
 	__PMC_EV(MIPS24K, UNCACHED_BLOCK_CYCLES)   \
 	__PMC_EV(MIPS24K, MDU_STALL_CYCLES)        \
 	__PMC_EV(MIPS24K, FPU_STALL_CYCLES)        \
 	__PMC_EV(MIPS24K, CP2_STALL_CYCLES)        \
 	__PMC_EV(MIPS24K, COREXTEND_STALL_CYCLES)  \
 	__PMC_EV(MIPS24K, ISPRAM_STALL_CYCLES)     \
 	__PMC_EV(MIPS24K, DSPRAM_STALL_CYCLES)     \
 	__PMC_EV(MIPS24K, CACHE_STALL_CYCLES)      \
 	__PMC_EV(MIPS24K, LOAD_TO_USE_STALLS)      \
 	__PMC_EV(MIPS24K, BASE_MISPRED_STALLS)     \
 	__PMC_EV(MIPS24K, CPO_READ_STALLS)         \
 	__PMC_EV(MIPS24K, BRANCH_MISPRED_CYCLES)   \
 	__PMC_EV(MIPS24K, IFETCH_BUFFER_FULL)      \
 	__PMC_EV(MIPS24K, FETCH_BUFFER_ALLOCATED)  \
 	__PMC_EV(MIPS24K, EJTAG_ITRIGGER)          \
 	__PMC_EV(MIPS24K, EJTAG_DTRIGGER)          \
 	__PMC_EV(MIPS24K, FSB_LT_QUARTER)          \
 	__PMC_EV(MIPS24K, FSB_QUARTER_TO_HALF)     \
 	__PMC_EV(MIPS24K, FSB_GT_HALF)             \
 	__PMC_EV(MIPS24K, FSB_FULL_PIPELINE_STALLS)\
 	__PMC_EV(MIPS24K, LDQ_LT_QUARTER)          \
 	__PMC_EV(MIPS24K, LDQ_QUARTER_TO_HALF)     \
 	__PMC_EV(MIPS24K, LDQ_GT_HALF)             \
 	__PMC_EV(MIPS24K, LDQ_FULL_PIPELINE_STALLS)\
 	__PMC_EV(MIPS24K, WBB_LT_QUARTER)          \
 	__PMC_EV(MIPS24K, WBB_QUARTER_TO_HALF)     \
 	__PMC_EV(MIPS24K, WBB_GT_HALF)             \
 	__PMC_EV(MIPS24K, WBB_FULL_PIPELINE_STALLS) \
 	__PMC_EV(MIPS24K, REQUEST_LATENCY)         \
 	__PMC_EV(MIPS24K, REQUEST_COUNT)
 
 #define	PMC_EV_MIPS24K_FIRST	PMC_EV_MIPS24K_CYCLE
 #define	PMC_EV_MIPS24K_LAST	PMC_EV_MIPS24K_WBB_FULL_PIPELINE_STALLS
 
 /*
  * MIPS74k events.  Similar to MIPS24k, the arrangement
  * is (0,2) then (1,3) events.
  */
 #define __PMC_EV_MIPS74K()			\
 	__PMC_EV(MIPS74K, CYCLES)		\
 	__PMC_EV(MIPS74K, INSTR_EXECUTED)	\
 	__PMC_EV(MIPS74K, PREDICTED_JR_31)	\
 	__PMC_EV(MIPS74K, JR_31_MISPREDICTIONS)	\
 	__PMC_EV(MIPS74K, REDIRECT_STALLS)	\
 	__PMC_EV(MIPS74K, JR_31_NO_PREDICTIONS)	\
 	__PMC_EV(MIPS74K, ITLB_ACCESSES)	\
 	__PMC_EV(MIPS74K, ITLB_MISSES)		\
 	__PMC_EV(MIPS74K, JTLB_INSN_MISSES)	\
 	__PMC_EV(MIPS74K, ICACHE_ACCESSES)	\
 	__PMC_EV(MIPS74K, ICACHE_MISSES)	\
 	__PMC_EV(MIPS74K, ICACHE_MISS_STALLS)	\
 	__PMC_EV(MIPS74K, UNCACHED_IFETCH_STALLS)	\
 	__PMC_EV(MIPS74K, PDTRACE_BACK_STALLS)	\
 	__PMC_EV(MIPS74K, IFU_REPLAYS)		\
 	__PMC_EV(MIPS74K, KILLED_FETCH_SLOTS)	\
 	__PMC_EV(MIPS74K, IFU_IDU_MISS_PRED_UPSTREAM_CYCLES)	\
 	__PMC_EV(MIPS74K, IFU_IDU_NO_FETCH_CYCLES)	\
 	__PMC_EV(MIPS74K, IFU_IDU_CLOGED_DOWNSTREAM_CYCLES)	\
 	__PMC_EV(MIPS74K, DDQ0_FULL_DR_STALLS)	\
 	__PMC_EV(MIPS74K, DDQ1_FULL_DR_STALLS)	\
 	__PMC_EV(MIPS74K, ALCB_FULL_DR_STALLS)	\
 	__PMC_EV(MIPS74K, AGCB_FULL_DR_STALLS)	\
 	__PMC_EV(MIPS74K, CLDQ_FULL_DR_STALLS)	\
 	__PMC_EV(MIPS74K, IODQ_FULL_DR_STALLS)	\
 	__PMC_EV(MIPS74K, ALU_EMPTY_CYCLES)	\
 	__PMC_EV(MIPS74K, AGEN_EMPTY_CYCLES)	\
 	__PMC_EV(MIPS74K, ALU_OPERANDS_NOT_READY_CYCLES)	\
 	__PMC_EV(MIPS74K, AGEN_OPERANDS_NOT_READY_CYCLES)	\
 	__PMC_EV(MIPS74K, ALU_NO_ISSUES_CYCLES)	\
 	__PMC_EV(MIPS74K, AGEN_NO_ISSUES_CYCLES)	\
 	__PMC_EV(MIPS74K, ALU_BUBBLE_CYCLES)	\
 	__PMC_EV(MIPS74K, AGEN_BUBBLE_CYCLES)	\
 	__PMC_EV(MIPS74K, SINGLE_ISSUE_CYCLES)	\
 	__PMC_EV(MIPS74K, DUAL_ISSUE_CYCLES)	\
 	__PMC_EV(MIPS74K, OOO_ALU_ISSUE_CYCLES)	\
 	__PMC_EV(MIPS74K, OOO_AGEN_ISSUE_CYCLES)	\
 	__PMC_EV(MIPS74K, JALR_JALR_HB_INSNS)	\
 	__PMC_EV(MIPS74K, DCACHE_LINE_REFILL_REQUESTS)	\
 	__PMC_EV(MIPS74K, DCACHE_LOAD_ACCESSES)	\
 	__PMC_EV(MIPS74K, DCACHE_ACCESSES)	\
 	__PMC_EV(MIPS74K, DCACHE_WRITEBACKS)	\
 	__PMC_EV(MIPS74K, DCACHE_MISSES)	\
 	__PMC_EV(MIPS74K, JTLB_DATA_ACCESSES)	\
 	__PMC_EV(MIPS74K, JTLB_DATA_MISSES)	\
 	__PMC_EV(MIPS74K, LOAD_STORE_REPLAYS)	\
 	__PMC_EV(MIPS74K, VA_TRANSALTION_CORNER_CASES)	\
 	__PMC_EV(MIPS74K, LOAD_STORE_BLOCKED_CYCLES)	\
 	__PMC_EV(MIPS74K, LOAD_STORE_NO_FILL_REQUESTS)	\
 	__PMC_EV(MIPS74K, L2_CACHE_WRITEBACKS)	\
 	__PMC_EV(MIPS74K, L2_CACHE_ACCESSES)	\
 	__PMC_EV(MIPS74K, L2_CACHE_MISSES)	\
 	__PMC_EV(MIPS74K, L2_CACHE_MISS_CYCLES)	\
 	__PMC_EV(MIPS74K, FSB_FULL_STALLS)	\
 	__PMC_EV(MIPS74K, FSB_OVER_50_FULL)	\
 	__PMC_EV(MIPS74K, LDQ_FULL_STALLS)	\
 	__PMC_EV(MIPS74K, LDQ_OVER_50_FULL)	\
 	__PMC_EV(MIPS74K, WBB_FULL_STALLS)	\
 	__PMC_EV(MIPS74K, WBB_OVER_50_FULL)	\
 	__PMC_EV(MIPS74K, LOAD_MISS_CONSUMER_REPLAYS)	\
 	__PMC_EV(MIPS74K, CP1_CP2_LOAD_INSNS)	\
 	__PMC_EV(MIPS74K, JR_NON_31_INSNS)	\
 	__PMC_EV(MIPS74K, MISPREDICTED_JR_31_INSNS)	\
 	__PMC_EV(MIPS74K, BRANCH_INSNS)		\
 	__PMC_EV(MIPS74K, CP1_CP2_COND_BRANCH_INSNS)	\
 	__PMC_EV(MIPS74K, BRANCH_LIKELY_INSNS)	\
 	__PMC_EV(MIPS74K, MISPREDICTED_BRANCH_LIKELY_INSNS)	\
 	__PMC_EV(MIPS74K, COND_BRANCH_INSNS)	\
 	__PMC_EV(MIPS74K, MISPREDICTED_BRANCH_INSNS)	\
 	__PMC_EV(MIPS74K, INTEGER_INSNS)	\
 	__PMC_EV(MIPS74K, FPU_INSNS)		\
 	__PMC_EV(MIPS74K, LOAD_INSNS)		\
 	__PMC_EV(MIPS74K, STORE_INSNS)		\
 	__PMC_EV(MIPS74K, J_JAL_INSNS)		\
 	__PMC_EV(MIPS74K, MIPS16_INSNS)		\
 	__PMC_EV(MIPS74K, NOP_INSNS)		\
 	__PMC_EV(MIPS74K, NT_MUL_DIV_INSNS)	\
 	__PMC_EV(MIPS74K, DSP_INSNS)		\
 	__PMC_EV(MIPS74K, ALU_DSP_SATURATION_INSNS)	\
 	__PMC_EV(MIPS74K, DSP_BRANCH_INSNS)	\
 	__PMC_EV(MIPS74K, MDU_DSP_SATURATION_INSNS)	\
 	__PMC_EV(MIPS74K, UNCACHED_LOAD_INSNS)	\
 	__PMC_EV(MIPS74K, UNCACHED_STORE_INSNS)	\
 	__PMC_EV(MIPS74K, EJTAG_INSN_TRIGGERS)	\
 	__PMC_EV(MIPS74K, CP1_BRANCH_MISPREDICTIONS)	\
 	__PMC_EV(MIPS74K, SC_INSNS)		\
 	__PMC_EV(MIPS74K, FAILED_SC_INSNS)	\
 	__PMC_EV(MIPS74K, PREFETCH_INSNS)	\
 	__PMC_EV(MIPS74K, CACHE_HIT_PREFETCH_INSNS)	\
 	__PMC_EV(MIPS74K, NO_INSN_CYCLES)	\
 	__PMC_EV(MIPS74K, LOAD_MISS_INSNS)	\
 	__PMC_EV(MIPS74K, ONE_INSN_CYCLES)	\
 	__PMC_EV(MIPS74K, TWO_INSNS_CYCLES)	\
 	__PMC_EV(MIPS74K, GFIFO_BLOCKED_CYCLES)	\
 	__PMC_EV(MIPS74K, CP1_CP2_STORE_INSNS)	\
 	__PMC_EV(MIPS74K, MISPREDICTION_STALLS)	\
 	__PMC_EV(MIPS74K, MISPREDICTED_BRANCH_INSNS_CYCLES)	\
 	__PMC_EV(MIPS74K, EXCEPTIONS_TAKEN)	\
 	__PMC_EV(MIPS74K, GRADUATION_REPLAYS)	\
 	__PMC_EV(MIPS74K, COREEXTEND_EVENTS)	\
 	__PMC_EV(MIPS74K, ISPRAM_EVENTS)	\
 	__PMC_EV(MIPS74K, DSPRAM_EVENTS)	\
 	__PMC_EV(MIPS74K, L2_CACHE_SINGLE_BIT_ERRORS)	\
 	__PMC_EV(MIPS74K, SYSTEM_EVENT_0)	\
 	__PMC_EV(MIPS74K, SYSTEM_EVENT_1)	\
 	__PMC_EV(MIPS74K, SYSTEM_EVENT_2)	\
 	__PMC_EV(MIPS74K, SYSTEM_EVENT_3)	\
 	__PMC_EV(MIPS74K, SYSTEM_EVENT_4)	\
 	__PMC_EV(MIPS74K, SYSTEM_EVENT_5)	\
 	__PMC_EV(MIPS74K, SYSTEM_EVENT_6)	\
 	__PMC_EV(MIPS74K, SYSTEM_EVENT_7)	\
 	__PMC_EV(MIPS74K, OCP_ALL_REQUESTS)	\
 	__PMC_EV(MIPS74K, OCP_ALL_CACHEABLE_REQUESTS)	\
 	__PMC_EV(MIPS74K, OCP_READ_REQUESTS)	\
 	__PMC_EV(MIPS74K, OCP_READ_CACHEABLE_REQUESTS)	\
 	__PMC_EV(MIPS74K, OCP_WRITE_REQUESTS)	\
 	__PMC_EV(MIPS74K, OCP_WRITE_CACHEABLE_REQUESTS)	\
 	__PMC_EV(MIPS74K, FSB_LESS_25_FULL)	\
 	__PMC_EV(MIPS74K, FSB_25_50_FULL)	\
 	__PMC_EV(MIPS74K, LDQ_LESS_25_FULL)	\
 	__PMC_EV(MIPS74K, LDQ_25_50_FULL)	\
 	__PMC_EV(MIPS74K, WBB_LESS_25_FULL)	\
 	__PMC_EV(MIPS74K, WBB_25_50_FULL)
 
 #define	PMC_EV_MIPS74K_FIRST	PMC_EV_MIPS74K_CYCLES
 #define	PMC_EV_MIPS74K_LAST	PMC_EV_MIPS74K_WBB_25_50_FULL
 
+#define __PMC_EV_BERI()					\
+	__PMC_EV(BERI, CYCLE)				\
+	__PMC_EV(BERI, INST)				\
+	__PMC_EV(BERI, INST_USER)			\
+	__PMC_EV(BERI, INST_KERNEL)			\
+	__PMC_EV(BERI, IMPRECISE_SETBOUNDS)		\
+	__PMC_EV(BERI, UNREPRESENTABLE_CAPS)		\
+	__PMC_EV(BERI, ITLB_MISS)			\
+	__PMC_EV(BERI, DTLB_MISS)			\
+	__PMC_EV(BERI, ICACHE_WRITE_HIT)		\
+	__PMC_EV(BERI, ICACHE_WRITE_MISS)		\
+	__PMC_EV(BERI, ICACHE_READ_HIT)			\
+	__PMC_EV(BERI, ICACHE_READ_MISS)		\
+	__PMC_EV(BERI, ICACHE_EVICT)			\
+	__PMC_EV(BERI, DCACHE_WRITE_HIT)		\
+	__PMC_EV(BERI, DCACHE_WRITE_MISS)		\
+	__PMC_EV(BERI, DCACHE_READ_HIT)			\
+	__PMC_EV(BERI, DCACHE_READ_MISS)		\
+	__PMC_EV(BERI, DCACHE_EVICT)			\
+	__PMC_EV(BERI, DCACHE_SET_TAG_WRITE)		\
+	__PMC_EV(BERI, DCACHE_SET_TAG_READ)		\
+	__PMC_EV(BERI, L2CACHE_WRITE_HIT)		\
+	__PMC_EV(BERI, L2CACHE_WRITE_MISS)		\
+	__PMC_EV(BERI, L2CACHE_READ_HIT)		\
+	__PMC_EV(BERI, L2CACHE_READ_MISS)		\
+	__PMC_EV(BERI, L2CACHE_EVICT)			\
+	__PMC_EV(BERI, L2CACHE_SET_TAG_WRITE)		\
+	__PMC_EV(BERI, L2CACHE_SET_TAG_READ)		\
+	__PMC_EV(BERI, MEM_BYTE_READ)			\
+	__PMC_EV(BERI, MEM_BYTE_WRITE)			\
+	__PMC_EV(BERI, MEM_HWORD_READ)			\
+	__PMC_EV(BERI, MEM_HWORD_WRITE)			\
+	__PMC_EV(BERI, MEM_WORD_READ)			\
+	__PMC_EV(BERI, MEM_WORD_WRITE)			\
+	__PMC_EV(BERI, MEM_DWORD_READ)			\
+	__PMC_EV(BERI, MEM_DWORD_WRITE)			\
+	__PMC_EV(BERI, MEM_CAP_READ)			\
+	__PMC_EV(BERI, MEM_CAP_WRITE)			\
+	__PMC_EV(BERI, MEM_CAP_READ_TAG_SET)		\
+	__PMC_EV(BERI, MEM_CAP_WRITE_TAG_SET)		\
+	__PMC_EV(BERI, TAGCACHE_WRITE_HIT)		\
+	__PMC_EV(BERI, TAGCACHE_WRITE_MISS)		\
+	__PMC_EV(BERI, TAGCACHE_READ_HIT)		\
+	__PMC_EV(BERI, TAGCACHE_READ_MISS)		\
+	__PMC_EV(BERI, TAGCACHE_EVICT)			\
+	__PMC_EV(BERI, L2CACHEMASTER_READ_REQ)		\
+	__PMC_EV(BERI, L2CACHEMASTER_WRITE_REQ)		\
+	__PMC_EV(BERI, L2CACHEMASTER_WRITE_REQ_FLIT)	\
+	__PMC_EV(BERI, L2CACHEMASTER_READ_RSP)		\
+	__PMC_EV(BERI, L2CACHEMASTER_READ_RSP_FLIT)	\
+	__PMC_EV(BERI, L2CACHEMASTER_WRITE_RSP)		\
+	__PMC_EV(BERI, TAGCACHEMASTER_READ_REQ)		\
+	__PMC_EV(BERI, TAGCACHEMASTER_WRITE_REQ)	\
+	__PMC_EV(BERI, TAGCACHEMASTER_WRITE_REQ_FLIT)	\
+	__PMC_EV(BERI, TAGCACHEMASTER_READ_RSP)		\
+	__PMC_EV(BERI, TAGCACHEMASTER_READ_RSP_FLIT)	\
+	__PMC_EV(BERI, TAGCACHEMASTER_WRITE_RSP)
+
+#define	PMC_EV_BERI_FIRST	PMC_EV_BERI_CYCLE
+#define	PMC_EV_BERI_LAST	PMC_EV_BERI_TAGCACHEMASTER_WRITE_RSP
+
 /*
  * Cavium Octeon counters. Obtained from cvmx-core.h
  */
 #define __PMC_EV_OCTEON()                         \
     __PMC_EV(OCTEON, CLK)                         \
     __PMC_EV(OCTEON, ISSUE)                       \
     __PMC_EV(OCTEON, RET)                         \
     __PMC_EV(OCTEON, NISSUE)                      \
     __PMC_EV(OCTEON, SISSUE)                      \
     __PMC_EV(OCTEON, DISSUE)                      \
     __PMC_EV(OCTEON, IFI)                         \
     __PMC_EV(OCTEON, BR)                          \
     __PMC_EV(OCTEON, BRMIS)                       \
     __PMC_EV(OCTEON, J)                           \
     __PMC_EV(OCTEON, JMIS)                        \
     __PMC_EV(OCTEON, REPLAY)                      \
     __PMC_EV(OCTEON, IUNA)                        \
     __PMC_EV(OCTEON, TRAP)                        \
     __PMC_EV(OCTEON, UULOAD)                      \
     __PMC_EV(OCTEON, UUSTORE)                     \
     __PMC_EV(OCTEON, ULOAD)                       \
     __PMC_EV(OCTEON, USTORE)                      \
     __PMC_EV(OCTEON, EC)                          \
     __PMC_EV(OCTEON, MC)                          \
     __PMC_EV(OCTEON, CC)                          \
     __PMC_EV(OCTEON, CSRC)                        \
     __PMC_EV(OCTEON, CFETCH)                      \
     __PMC_EV(OCTEON, CPREF)                       \
     __PMC_EV(OCTEON, ICA)                         \
     __PMC_EV(OCTEON, II)                          \
     __PMC_EV(OCTEON, IP)                          \
     __PMC_EV(OCTEON, CIMISS)                      \
     __PMC_EV(OCTEON, WBUF)                        \
     __PMC_EV(OCTEON, WDAT)                        \
     __PMC_EV(OCTEON, WBUFLD)                      \
     __PMC_EV(OCTEON, WBUFFL)                      \
     __PMC_EV(OCTEON, WBUFTR)                      \
     __PMC_EV(OCTEON, BADD)                        \
     __PMC_EV(OCTEON, BADDL2)                      \
     __PMC_EV(OCTEON, BFILL)                       \
     __PMC_EV(OCTEON, DDIDS)                       \
     __PMC_EV(OCTEON, IDIDS)                       \
     __PMC_EV(OCTEON, DIDNA)                       \
     __PMC_EV(OCTEON, LDS)                         \
     __PMC_EV(OCTEON, LMLDS)                       \
     __PMC_EV(OCTEON, IOLDS)                       \
     __PMC_EV(OCTEON, DMLDS)                       \
     __PMC_EV(OCTEON, STS)                         \
     __PMC_EV(OCTEON, LMSTS)                       \
     __PMC_EV(OCTEON, IOSTS)                       \
     __PMC_EV(OCTEON, IOBDMA)                      \
     __PMC_EV(OCTEON, DTLB)                        \
     __PMC_EV(OCTEON, DTLBAD)                      \
     __PMC_EV(OCTEON, ITLB)                        \
     __PMC_EV(OCTEON, SYNC)                        \
     __PMC_EV(OCTEON, SYNCIOB)                     \
     __PMC_EV(OCTEON, SYNCW)
 
 #define	PMC_EV_OCTEON_FIRST	PMC_EV_OCTEON_CLK
 #define	PMC_EV_OCTEON_LAST	PMC_EV_OCTEON_SYNCW
 
 #define __PMC_EV_PPC7450()						\
 	__PMC_EV(PPC7450, CYCLE)					\
 	__PMC_EV(PPC7450, INSTR_COMPLETED)				\
 	__PMC_EV(PPC7450, TLB_BIT_TRANSITIONS)				\
 	__PMC_EV(PPC7450, INSTR_DISPATCHED)				\
 	__PMC_EV(PPC7450, PMON_EXCEPT)					\
 	__PMC_EV(PPC7450, PMON_SIG)					\
 	__PMC_EV(PPC7450, VPU_INSTR_COMPLETED)				\
 	__PMC_EV(PPC7450, VFPU_INSTR_COMPLETED)				\
 	__PMC_EV(PPC7450, VIU1_INSTR_COMPLETED)				\
 	__PMC_EV(PPC7450, VIU2_INSTR_COMPLETED)				\
 	__PMC_EV(PPC7450, MTVSCR_INSTR_COMPLETED)			\
 	__PMC_EV(PPC7450, MTVRSAVE_INSTR_COMPLETED)			\
 	__PMC_EV(PPC7450, VPU_INSTR_WAIT_CYCLES)			\
 	__PMC_EV(PPC7450, VFPU_INSTR_WAIT_CYCLES)			\
 	__PMC_EV(PPC7450, VIU1_INSTR_WAIT_CYCLES)			\
 	__PMC_EV(PPC7450, VIU2_INSTR_WAIT_CYCLES)			\
 	__PMC_EV(PPC7450, MFVSCR_SYNC_CYCLES)				\
 	__PMC_EV(PPC7450, VSCR_SAT_SET)					\
 	__PMC_EV(PPC7450, STORE_INSTR_COMPLETED)			\
 	__PMC_EV(PPC7450, L1_INSTR_CACHE_MISSES)			\
 	__PMC_EV(PPC7450, L1_DATA_SNOOPS)				\
 	__PMC_EV(PPC7450, UNRESOLVED_BRANCHES)				\
 	__PMC_EV(PPC7450, SPEC_BUFFER_CYCLES)				\
 	__PMC_EV(PPC7450, BRANCH_UNIT_STALL_CYCLES)			\
 	__PMC_EV(PPC7450, TRUE_BRANCH_TARGET_HITS)			\
 	__PMC_EV(PPC7450, BRANCH_LINK_STAC_PREDICTED)			\
 	__PMC_EV(PPC7450, GPR_ISSUE_QUEUE_DISPATCHES)			\
 	__PMC_EV(PPC7450, CYCLES_THREE_INSTR_DISPATCHED)		\
 	__PMC_EV(PPC7450, THRESHOLD_INSTR_QUEUE_ENTRIES_CYCLES)		\
 	__PMC_EV(PPC7450, THRESHOLD_VEC_INSTR_QUEUE_ENTRIES_CYCLES)	\
 	__PMC_EV(PPC7450, CYCLES_NO_COMPLETED_INSTRS)			\
 	__PMC_EV(PPC7450, IU2_INSTR_COMPLETED)				\
 	__PMC_EV(PPC7450, BRANCHES_COMPLETED)				\
 	__PMC_EV(PPC7450, EIEIO_INSTR_COMPLETED)			\
 	__PMC_EV(PPC7450, MTSPR_INSTR_COMPLETED)			\
 	__PMC_EV(PPC7450, SC_INSTR_COMPLETED)				\
 	__PMC_EV(PPC7450, LS_LM_COMPLETED)				\
 	__PMC_EV(PPC7450, ITLB_HW_TABLE_SEARCH_CYCLES)			\
 	__PMC_EV(PPC7450, DTLB_HW_SEARCH_CYCLES_OVER_THRESHOLD)		\
 	__PMC_EV(PPC7450, L1_INSTR_CACHE_ACCESSES)			\
 	__PMC_EV(PPC7450, INSTR_BKPT_MATCHES)				\
 	__PMC_EV(PPC7450, L1_DATA_CACHE_LOAD_MISS_CYCLES_OVER_THRESHOLD)\
 	__PMC_EV(PPC7450, L1_DATA_SNOOP_HIT_ON_MODIFIED)		\
 	__PMC_EV(PPC7450, LOAD_MISS_ALIAS)				\
 	__PMC_EV(PPC7450, LOAD_MISS_ALIAS_ON_TOUCH)			\
 	__PMC_EV(PPC7450, TOUCH_ALIAS)					\
 	__PMC_EV(PPC7450, L1_DATA_SNOOP_HIT_CASTOUT_QUEUE)		\
 	__PMC_EV(PPC7450, L1_DATA_SNOOP_HIT_CASTOUT)			\
 	__PMC_EV(PPC7450, L1_DATA_SNOOP_HITS)				\
 	__PMC_EV(PPC7450, WRITE_THROUGH_STORES)				\
 	__PMC_EV(PPC7450, CACHE_INHIBITED_STORES)			\
 	__PMC_EV(PPC7450, L1_DATA_LOAD_HIT)				\
 	__PMC_EV(PPC7450, L1_DATA_TOUCH_HIT)				\
 	__PMC_EV(PPC7450, L1_DATA_STORE_HIT)				\
 	__PMC_EV(PPC7450, L1_DATA_TOTAL_HITS)				\
 	__PMC_EV(PPC7450, DST_INSTR_DISPATCHED)				\
 	__PMC_EV(PPC7450, REFRESHED_DSTS)				\
 	__PMC_EV(PPC7450, SUCCESSFUL_DST_TABLE_SEARCHES)		\
 	__PMC_EV(PPC7450, DSS_INSTR_COMPLETED)				\
 	__PMC_EV(PPC7450, DST_STREAM_0_CACHE_LINE_FETCHES)		\
 	__PMC_EV(PPC7450, VTQ_SUSPENDS_DUE_TO_CTX_CHANGE)		\
 	__PMC_EV(PPC7450, VTQ_LINE_FETCH_HIT)				\
 	__PMC_EV(PPC7450, VEC_LOAD_INSTR_COMPLETED)			\
 	__PMC_EV(PPC7450, FP_STORE_INSTR_COMPLETED_IN_LSU)		\
 	__PMC_EV(PPC7450, FPU_RENORMALIZATION)				\
 	__PMC_EV(PPC7450, FPU_DENORMALIZATION)				\
 	__PMC_EV(PPC7450, FP_STORE_CAUSES_STALL_IN_LSU)			\
 	__PMC_EV(PPC7450, LD_ST_TRUE_ALIAS_STALL)			\
 	__PMC_EV(PPC7450, LSU_INDEXED_ALIAS_STALL)			\
 	__PMC_EV(PPC7450, LSU_ALIAS_VS_FSQ_WB0_WB1)			\
 	__PMC_EV(PPC7450, LSU_ALIAS_VS_CSQ)				\
 	__PMC_EV(PPC7450, LSU_LOAD_HIT_LINE_ALIAS_VS_CSQ0)		\
 	__PMC_EV(PPC7450, LSU_LOAD_MISS_LINE_ALIAS_VS_CSQ0)		\
 	__PMC_EV(PPC7450, LSU_TOUCH_LINE_ALIAS_VS_FSQ_WB0_WB1)		\
 	__PMC_EV(PPC7450, LSU_TOUCH_ALIAS_VS_CSQ)			\
 	__PMC_EV(PPC7450, LSU_LMQ_FULL_STALL)				\
 	__PMC_EV(PPC7450, FP_LOAD_INSTR_COMPLETED_IN_LSU)		\
 	__PMC_EV(PPC7450, FP_LOAD_SINGLE_INSTR_COMPLETED_IN_LSU)	\
 	__PMC_EV(PPC7450, FP_LOAD_DOUBLE_COMPLETED_IN_LSU)		\
 	__PMC_EV(PPC7450, LSU_RA_LATCH_STALL)				\
 	__PMC_EV(PPC7450, LSU_LOAD_VS_STORE_QUEUE_ALIAS_STALL)		\
 	__PMC_EV(PPC7450, LSU_LMQ_INDEX_ALIAS)				\
 	__PMC_EV(PPC7450, LSU_STORE_QUEUE_INDEX_ALIAS)			\
 	__PMC_EV(PPC7450, LSU_CSQ_FORWARDING)				\
 	__PMC_EV(PPC7450, LSU_MISALIGNED_LOAD_FINISH)			\
 	__PMC_EV(PPC7450, LSU_MISALIGN_STORE_COMPLETED)			\
 	__PMC_EV(PPC7450, LSU_MISALIGN_STALL)				\
 	__PMC_EV(PPC7450, FP_ONE_QUARTER_FPSCR_RENAMES_BUSY)		\
 	__PMC_EV(PPC7450, FP_ONE_HALF_FPSCR_RENAMES_BUSY)		\
 	__PMC_EV(PPC7450, FP_THREE_QUARTERS_FPSCR_RENAMES_BUSY)		\
 	__PMC_EV(PPC7450, FP_ALL_FPSCR_RENAMES_BUSY)			\
 	__PMC_EV(PPC7450, FP_DENORMALIZED_RESULT)			\
 	__PMC_EV(PPC7450, L1_DATA_TOTAL_MISSES)				\
 	__PMC_EV(PPC7450, DISPATCHES_TO_FPR_ISSUE_QUEUE)		\
 	__PMC_EV(PPC7450, LSU_INSTR_COMPLETED)				\
 	__PMC_EV(PPC7450, LOAD_INSTR_COMPLETED)				\
 	__PMC_EV(PPC7450, SS_SM_INSTR_COMPLETED)			\
 	__PMC_EV(PPC7450, TLBIE_INSTR_COMPLETED)			\
 	__PMC_EV(PPC7450, LWARX_INSTR_COMPLETED)			\
 	__PMC_EV(PPC7450, MFSPR_INSTR_COMPLETED)			\
 	__PMC_EV(PPC7450, REFETCH_SERIALIZATION)			\
 	__PMC_EV(PPC7450, COMPLETION_QUEUE_ENTRIES_OVER_THRESHOLD)	\
 	__PMC_EV(PPC7450, CYCLES_ONE_INSTR_DISPATCHED)			\
 	__PMC_EV(PPC7450, CYCLES_TWO_INSTR_COMPLETED)			\
 	__PMC_EV(PPC7450, ITLB_NON_SPECULATIVE_MISSES)			\
 	__PMC_EV(PPC7450, CYCLES_WAITING_FROM_L1_INSTR_CACHE_MISS)	\
 	__PMC_EV(PPC7450, L1_DATA_LOAD_ACCESS_MISS)			\
 	__PMC_EV(PPC7450, L1_DATA_TOUCH_MISS)				\
 	__PMC_EV(PPC7450, L1_DATA_STORE_MISS)				\
 	__PMC_EV(PPC7450, L1_DATA_TOUCH_MISS_CYCLES)			\
 	__PMC_EV(PPC7450, L1_DATA_CYCLES_USED)				\
 	__PMC_EV(PPC7450, DST_STREAM_1_CACHE_LINE_FETCHES)		\
 	__PMC_EV(PPC7450, VTQ_STREAM_CANCELED_PREMATURELY)		\
 	__PMC_EV(PPC7450, VTQ_RESUMES_DUE_TO_CTX_CHANGE)		\
 	__PMC_EV(PPC7450, VTQ_LINE_FETCH_MISS)				\
 	__PMC_EV(PPC7450, VTQ_LINE_FETCH)				\
 	__PMC_EV(PPC7450, TLBIE_SNOOPS)					\
 	__PMC_EV(PPC7450, L1_INSTR_CACHE_RELOADS)			\
 	__PMC_EV(PPC7450, L1_DATA_CACHE_RELOADS)			\
 	__PMC_EV(PPC7450, L1_DATA_CACHE_CASTOUTS_TO_L2)			\
 	__PMC_EV(PPC7450, STORE_MERGE_GATHER)				\
 	__PMC_EV(PPC7450, CACHEABLE_STORE_MERGE_TO_32_BYTES)		\
 	__PMC_EV(PPC7450, DATA_BKPT_MATCHES)				\
 	__PMC_EV(PPC7450, FALL_THROUGH_BRANCHES_PROCESSED)		\
 	__PMC_EV(PPC7450,						\
 	    FIRST_SPECULATIVE_BRANCH_BUFFER_RESOLVED_CORRECTLY)		\
 	__PMC_EV(PPC7450, SECOND_SPECULATION_BUFFER_ACTIVE)		\
 	__PMC_EV(PPC7450, BPU_STALL_ON_LR_DEPENDENCY)			\
 	__PMC_EV(PPC7450, BTIC_MISS)					\
 	__PMC_EV(PPC7450, BRANCH_LINK_STACK_CORRECTLY_RESOLVED)		\
 	__PMC_EV(PPC7450, FPR_ISSUE_STALLED)				\
 	__PMC_EV(PPC7450, SWITCHES_BETWEEN_PRIV_USER)			\
 	__PMC_EV(PPC7450, LSU_COMPLETES_FP_STORE_SINGLE)		\
 	__PMC_EV(PPC7450, VR_ISSUE_QUEUE_DISPATCHES)			\
 	__PMC_EV(PPC7450, VR_STALLS)					\
 	__PMC_EV(PPC7450, GPR_RENAME_BUFFER_ENTRIES_OVER_THRESHOLD)	\
 	__PMC_EV(PPC7450, FPR_ISSUE_QUEUE_ENTRIES)			\
 	__PMC_EV(PPC7450, FPU_INSTR_COMPLETED)				\
 	__PMC_EV(PPC7450, STWCX_INSTR_COMPLETED)			\
 	__PMC_EV(PPC7450, LS_LM_INSTR_PIECES)				\
 	__PMC_EV(PPC7450, ITLB_HW_SEARCH_CYCLES_OVER_THRESHOLD)		\
 	__PMC_EV(PPC7450, DTLB_MISSES)					\
 	__PMC_EV(PPC7450, CANCELLED_L1_INSTR_CACHE_MISSES)		\
 	__PMC_EV(PPC7450, L1_DATA_CACHE_OP_HIT)				\
 	__PMC_EV(PPC7450, L1_DATA_LOAD_MISS_CYCLES)			\
 	__PMC_EV(PPC7450, L1_DATA_PUSHES)				\
 	__PMC_EV(PPC7450, L1_DATA_TOTAL_MISS)				\
 	__PMC_EV(PPC7450, VT2_FETCHES)					\
 	__PMC_EV(PPC7450, TAKEN_BRANCHES_PROCESSED)			\
 	__PMC_EV(PPC7450, BRANCH_FLUSHES)				\
 	__PMC_EV(PPC7450,						\
 	    SECOND_SPECULATIVE_BRANCH_BUFFER_RESOLVED_CORRECTLY)	\
 	__PMC_EV(PPC7450, THIRD_SPECULATION_BUFFER_ACTIVE)		\
 	__PMC_EV(PPC7450, BRANCH_UNIT_STALL_ON_CTR_DEPENDENCY)		\
 	__PMC_EV(PPC7450, FAST_BTIC_HIT)				\
 	__PMC_EV(PPC7450, BRANCH_LINK_STACK_MISPREDICTED)		\
 	__PMC_EV(PPC7450, CYCLES_THREE_INSTR_COMPLETED)			\
 	__PMC_EV(PPC7450, CYCLES_NO_INSTR_DISPATCHED)			\
 	__PMC_EV(PPC7450, GPR_ISSUE_QUEUE_ENTRIES_OVER_THRESHOLD)	\
 	__PMC_EV(PPC7450, GPR_ISSUE_QUEUE_STALLED)			\
 	__PMC_EV(PPC7450, IU1_INSTR_COMPLETED)				\
 	__PMC_EV(PPC7450, DSSALL_INSTR_COMPLETED)			\
 	__PMC_EV(PPC7450, TLBSYNC_INSTR_COMPLETED)			\
 	__PMC_EV(PPC7450, SYNC_INSTR_COMPLETED)				\
 	__PMC_EV(PPC7450, SS_SM_INSTR_PIECES)				\
 	__PMC_EV(PPC7450, DTLB_HW_SEARCH_CYCLES)			\
 	__PMC_EV(PPC7450, SNOOP_RETRIES)				\
 	__PMC_EV(PPC7450, SUCCESSFUL_STWCX)				\
 	__PMC_EV(PPC7450, DST_STREAM_3_CACHE_LINE_FETCHES)		\
 	__PMC_EV(PPC7450,						\
 	    THIRD_SPECULATIVE_BRANCH_BUFFER_RESOLVED_CORRECTLY)		\
 	__PMC_EV(PPC7450, MISPREDICTED_BRANCHES)			\
 	__PMC_EV(PPC7450, FOLDED_BRANCHES)				\
 	__PMC_EV(PPC7450, FP_STORE_DOUBLE_COMPLETES_IN_LSU)		\
 	__PMC_EV(PPC7450, L2_CACHE_HITS)				\
 	__PMC_EV(PPC7450, L3_CACHE_HITS)				\
 	__PMC_EV(PPC7450, L2_INSTR_CACHE_MISSES)			\
 	__PMC_EV(PPC7450, L3_INSTR_CACHE_MISSES)			\
 	__PMC_EV(PPC7450, L2_DATA_CACHE_MISSES)				\
 	__PMC_EV(PPC7450, L3_DATA_CACHE_MISSES)				\
 	__PMC_EV(PPC7450, L2_LOAD_HITS)					\
 	__PMC_EV(PPC7450, L2_STORE_HITS)				\
 	__PMC_EV(PPC7450, L3_LOAD_HITS)					\
 	__PMC_EV(PPC7450, L3_STORE_HITS)				\
 	__PMC_EV(PPC7450, L2_TOUCH_HITS)				\
 	__PMC_EV(PPC7450, L3_TOUCH_HITS)				\
 	__PMC_EV(PPC7450, SNOOP_MODIFIED)				\
 	__PMC_EV(PPC7450, SNOOP_VALID)					\
 	__PMC_EV(PPC7450, INTERVENTION)					\
 	__PMC_EV(PPC7450, L2_CACHE_MISSES)				\
 	__PMC_EV(PPC7450, L3_CACHE_MISSES)				\
 	__PMC_EV(PPC7450, L2_CACHE_CASTOUTS)				\
 	__PMC_EV(PPC7450, L3_CACHE_CASTOUTS)				\
 	__PMC_EV(PPC7450, L2SQ_FULL_CYCLES)				\
 	__PMC_EV(PPC7450, L3SQ_FULL_CYCLES)				\
 	__PMC_EV(PPC7450, RAQ_FULL_CYCLES)				\
 	__PMC_EV(PPC7450, WAQ_FULL_CYCLES)				\
 	__PMC_EV(PPC7450, L1_EXTERNAL_INTERVENTIONS)			\
 	__PMC_EV(PPC7450, L2_EXTERNAL_INTERVENTIONS)			\
 	__PMC_EV(PPC7450, L3_EXTERNAL_INTERVENTIONS)			\
 	__PMC_EV(PPC7450, EXTERNAL_INTERVENTIONS)			\
 	__PMC_EV(PPC7450, EXTERNAL_PUSHES)				\
 	__PMC_EV(PPC7450, EXTERNAL_SNOOP_RETRY)				\
 	__PMC_EV(PPC7450, DTQ_FULL_CYCLES)				\
 	__PMC_EV(PPC7450, BUS_RETRY)					\
 	__PMC_EV(PPC7450, L2_VALID_REQUEST)				\
 	__PMC_EV(PPC7450, BORDQ_FULL)					\
 	__PMC_EV(PPC7450, BUS_TAS_FOR_READS)				\
 	__PMC_EV(PPC7450, BUS_TAS_FOR_WRITES)				\
 	__PMC_EV(PPC7450, BUS_READS_NOT_RETRIED)			\
 	__PMC_EV(PPC7450, BUS_WRITES_NOT_RETRIED)			\
 	__PMC_EV(PPC7450, BUS_READS_WRITES_NOT_RETRIED)			\
 	__PMC_EV(PPC7450, BUS_RETRY_DUE_TO_L1_RETRY)			\
 	__PMC_EV(PPC7450, BUS_RETRY_DUE_TO_PREVIOUS_ADJACENT)		\
 	__PMC_EV(PPC7450, BUS_RETRY_DUE_TO_COLLISION)			\
 	__PMC_EV(PPC7450, BUS_RETRY_DUE_TO_INTERVENTION_ORDERING)	\
 	__PMC_EV(PPC7450, SNOOP_REQUESTS)				\
 	__PMC_EV(PPC7450, PREFETCH_ENGINE_REQUEST)			\
 	__PMC_EV(PPC7450, PREFETCH_ENGINE_COLLISION_VS_LOAD)		\
 	__PMC_EV(PPC7450, PREFETCH_ENGINE_COLLISION_VS_STORE)		\
 	__PMC_EV(PPC7450, PREFETCH_ENGINE_COLLISION_VS_INSTR_FETCH)	\
 	__PMC_EV(PPC7450,						\
 	    PREFETCH_ENGINE_COLLISION_VS_LOAD_STORE_INSTR_FETCH)	\
 	__PMC_EV(PPC7450, PREFETCH_ENGINE_FULL)
 
 #define PMC_EV_PPC7450_FIRST	PMC_EV_PPC7450_CYCLE
 #define PMC_EV_PPC7450_LAST	PMC_EV_PPC7450_PREFETCH_ENGINE_FULL
 
 #define __PMC_EV_PPC970() \
 	__PMC_EV(PPC970, INSTR_COMPLETED) \
 	__PMC_EV(PPC970, MARKED_GROUP_DISPATCH) \
 	__PMC_EV(PPC970, MARKED_STORE_COMPLETED) \
 	__PMC_EV(PPC970, GCT_EMPTY) \
 	__PMC_EV(PPC970, RUN_CYCLES) \
 	__PMC_EV(PPC970, OVERFLOW) \
 	__PMC_EV(PPC970, CYCLES) \
 	__PMC_EV(PPC970, THRESHOLD_TIMEOUT) \
 	__PMC_EV(PPC970, GROUP_DISPATCH) \
 	__PMC_EV(PPC970, BR_MARKED_INSTR_FINISH) \
 	__PMC_EV(PPC970, GCT_EMPTY_BY_SRQ_FULL) \
 	__PMC_EV(PPC970, STOP_COMPLETION) \
 	__PMC_EV(PPC970, LSU_EMPTY) \
 	__PMC_EV(PPC970, MARKED_STORE_WITH_INTR) \
 	__PMC_EV(PPC970, CYCLES_IN_SUPER) \
 	__PMC_EV(PPC970, VPU_MARKED_INSTR_COMPLETED) \
 	__PMC_EV(PPC970, FXU0_IDLE_FXU1_BUSY) \
 	__PMC_EV(PPC970, SRQ_EMPTY) \
 	__PMC_EV(PPC970, MARKED_GROUP_COMPLETED) \
 	__PMC_EV(PPC970, CR_MARKED_INSTR_FINISH) \
 	__PMC_EV(PPC970, DISPATCH_SUCCESS) \
 	__PMC_EV(PPC970, FXU0_IDLE_FXU1_IDLE) \
 	__PMC_EV(PPC970, ONE_PLUS_INSTR_COMPLETED) \
 	__PMC_EV(PPC970, GROUP_MARKED_IDU) \
 	__PMC_EV(PPC970, MARKED_GROUP_COMPLETE_TIMEOUT) \
 	__PMC_EV(PPC970, FXU0_BUSY_FXU1_BUSY) \
 	__PMC_EV(PPC970, MARKED_STORE_SENT_TO_STS) \
 	__PMC_EV(PPC970, FXU_MARKED_INSTR_FINISHED) \
 	__PMC_EV(PPC970, MARKED_GROUP_ISSUED) \
 	__PMC_EV(PPC970, FXU0_BUSY_FXU1_IDLE) \
 	__PMC_EV(PPC970, GROUP_COMPLETED) \
 	__PMC_EV(PPC970, FPU_MARKED_INSTR_COMPLETED) \
 	__PMC_EV(PPC970, MARKED_INSTR_FINISH_ANY_UNIT) \
 	__PMC_EV(PPC970, EXTERNAL_INTERRUPT) \
 	__PMC_EV(PPC970, GROUP_DISPATCH_REJECT) \
 	__PMC_EV(PPC970, LSU_MARKED_INSTR_FINISH) \
 	__PMC_EV(PPC970, TIMEBASE_EVENT) \
 	__PMC_EV(PPC970, LSU_COMPLETION_STALL) \
 	__PMC_EV(PPC970, FXU_COMPLETION_STALL) \
 	__PMC_EV(PPC970, DCACHE_MISS_COMPLETION_STALL) \
 	__PMC_EV(PPC970, FPU_COMPLETION_STALL) \
 	__PMC_EV(PPC970, FXU_LONG_INSTR_COMPLETION_STALL) \
 	__PMC_EV(PPC970, REJECT_COMPLETION_STALL) \
 	__PMC_EV(PPC970, FPU_LONG_INSTR_COMPLETION_STALL) \
 	__PMC_EV(PPC970, GCT_EMPTY_BY_ICACHE_MISS) \
 	__PMC_EV(PPC970, REJECT_COMPLETION_STALL_ERAT_MISS) \
 	__PMC_EV(PPC970, GCT_EMPTY_BY_BRANCH_MISS_PREDICT) \
 	__PMC_EV(PPC970, BUS_HIGH) \
 	__PMC_EV(PPC970, BUS_LOW) \
 	__PMC_EV(PPC970, ADDER)
 
 
 #define PMC_EV_PPC970_FIRST		PMC_EV_PPC970_INSTR_COMPLETED
 #define PMC_EV_PPC970_LAST		PMC_EV_PPC970_ADDER
 
 #define __PMC_EV_E500() \
 	__PMC_EV(E500, CYCLES) \
 	__PMC_EV(E500, INSTR_COMPLETED) \
 	__PMC_EV(E500, UOPS_COMPLETED) \
 	__PMC_EV(E500, INSTR_FETCHED) \
 	__PMC_EV(E500, UOPS_DECODED) \
 	__PMC_EV(E500, PM_EVENT_TRANSITIONS) \
 	__PMC_EV(E500, PM_EVENT_CYCLES) \
 	__PMC_EV(E500, BRANCH_INSTRS_COMPLETED) \
 	__PMC_EV(E500, LOAD_UOPS_COMPLETED) \
 	__PMC_EV(E500, STORE_UOPS_COMPLETED) \
 	__PMC_EV(E500, CQ_REDIRECTS) \
 	__PMC_EV(E500, BRANCHES_FINISHED) \
 	__PMC_EV(E500, TAKEN_BRANCHES_FINISHED) \
 	__PMC_EV(E500, FINISHED_UNCOND_BRANCHES_MISS_BTB) \
 	__PMC_EV(E500, BRANCH_MISPRED) \
 	__PMC_EV(E500, BTB_BRANCH_MISPRED_FROM_DIRECTION) \
 	__PMC_EV(E500, BTB_HITS_PSEUDO_HITS) \
 	__PMC_EV(E500, CYCLES_DECODE_STALLED) \
 	__PMC_EV(E500, CYCLES_ISSUE_STALLED) \
 	__PMC_EV(E500, CYCLES_BRANCH_ISSUE_STALLED) \
 	__PMC_EV(E500, CYCLES_SU1_SCHED_STALLED) \
 	__PMC_EV(E500, CYCLES_SU2_SCHED_STALLED) \
 	__PMC_EV(E500, CYCLES_MU_SCHED_STALLED) \
 	__PMC_EV(E500, CYCLES_LRU_SCHED_STALLED) \
 	__PMC_EV(E500, CYCLES_BU_SCHED_STALLED) \
 	__PMC_EV(E500, TOTAL_TRANSLATED) \
 	__PMC_EV(E500, LOADS_TRANSLATED) \
 	__PMC_EV(E500, STORES_TRANSLATED) \
 	__PMC_EV(E500, TOUCHES_TRANSLATED) \
 	__PMC_EV(E500, CACHEOPS_TRANSLATED) \
 	__PMC_EV(E500, CACHE_INHIBITED_ACCESS_TRANSLATED) \
 	__PMC_EV(E500, GUARDED_LOADS_TRANSLATED) \
 	__PMC_EV(E500, WRITE_THROUGH_STORES_TRANSLATED) \
 	__PMC_EV(E500, MISALIGNED_LOAD_STORE_ACCESS_TRANSLATED) \
 	__PMC_EV(E500, TOTAL_ALLOCATED_TO_DLFB) \
 	__PMC_EV(E500, LOADS_TRANSLATED_ALLOCATED_TO_DLFB) \
 	__PMC_EV(E500, STORES_COMPLETED_ALLOCATED_TO_DLFB) \
 	__PMC_EV(E500, TOUCHES_TRANSLATED_ALLOCATED_TO_DLFB) \
 	__PMC_EV(E500, STORES_COMPLETED) \
 	__PMC_EV(E500, DATA_L1_CACHE_LOCKS) \
 	__PMC_EV(E500, DATA_L1_CACHE_RELOADS) \
 	__PMC_EV(E500, DATA_L1_CACHE_CASTOUTS) \
 	__PMC_EV(E500, LOAD_MISS_DLFB_FULL) \
 	__PMC_EV(E500, LOAD_MISS_LDQ_FULL) \
 	__PMC_EV(E500, LOAD_GUARDED_MISS) \
 	__PMC_EV(E500, STORE_TRANSLATE_WHEN_QUEUE_FULL) \
 	__PMC_EV(E500, ADDRESS_COLLISION) \
 	__PMC_EV(E500, DATA_MMU_MISS) \
 	__PMC_EV(E500, DATA_MMU_BUSY) \
 	__PMC_EV(E500, PART2_MISALIGNED_CACHE_ACCESS) \
 	__PMC_EV(E500, LOAD_MISS_DLFB_FULL_CYCLES) \
 	__PMC_EV(E500, LOAD_MISS_LDQ_FULL_CYCLES) \
 	__PMC_EV(E500, LOAD_GUARDED_MISS_CYCLES) \
 	__PMC_EV(E500, STORE_TRANSLATE_WHEN_QUEUE_FULL_CYCLES) \
 	__PMC_EV(E500, ADDRESS_COLLISION_CYCLES) \
 	__PMC_EV(E500, DATA_MMU_MISS_CYCLES) \
 	__PMC_EV(E500, DATA_MMU_BUSY_CYCLES) \
 	__PMC_EV(E500, PART2_MISALIGNED_CACHE_ACCESS_CYCLES) \
 	__PMC_EV(E500, INSTR_L1_CACHE_LOCKS) \
 	__PMC_EV(E500, INSTR_L1_CACHE_RELOADS) \
 	__PMC_EV(E500, INSTR_L1_CACHE_FETCHES) \
 	__PMC_EV(E500, INSTR_MMU_TLB4K_RELOADS) \
 	__PMC_EV(E500, INSTR_MMU_VSP_RELOADS) \
 	__PMC_EV(E500, DATA_MMU_TLB4K_RELOADS) \
 	__PMC_EV(E500, DATA_MMU_VSP_RELOADS) \
 	__PMC_EV(E500, L2MMU_MISSES) \
 	__PMC_EV(E500, BIU_MASTER_REQUESTS) \
 	__PMC_EV(E500, BIU_MASTER_INSTR_SIDE_REQUESTS) \
 	__PMC_EV(E500, BIU_MASTER_DATA_SIDE_REQUESTS) \
 	__PMC_EV(E500, BIU_MASTER_DATA_SIDE_CASTOUT_REQUESTS) \
 	__PMC_EV(E500, BIU_MASTER_RETRIES) \
 	__PMC_EV(E500, SNOOP_REQUESTS) \
 	__PMC_EV(E500, SNOOP_HITS) \
 	__PMC_EV(E500, SNOOP_PUSHES) \
 	__PMC_EV(E500, SNOOP_RETRIES) \
 	__PMC_EV(E500, DLFB_LOAD_MISS_CYCLES) \
 	__PMC_EV(E500, ILFB_FETCH_MISS_CYCLES) \
 	__PMC_EV(E500, EXT_INPU_INTR_LATENCY_CYCLES) \
 	__PMC_EV(E500, CRIT_INPUT_INTR_LATENCY_CYCLES) \
 	__PMC_EV(E500, EXT_INPUT_INTR_PENDING_LATENCY_CYCLES) \
 	__PMC_EV(E500, CRIT_INPUT_INTR_PENDING_LATENCY_CYCLES) \
 	__PMC_EV(E500, PMC0_OVERFLOW) \
 	__PMC_EV(E500, PMC1_OVERFLOW) \
 	__PMC_EV(E500, PMC2_OVERFLOW) \
 	__PMC_EV(E500, PMC3_OVERFLOW) \
 	__PMC_EV(E500, INTERRUPTS_TAKEN) \
 	__PMC_EV(E500, EXT_INPUT_INTR_TAKEN) \
 	__PMC_EV(E500, CRIT_INPUT_INTR_TAKEN) \
 	__PMC_EV(E500, SYSCALL_TRAP_INTR) \
 	__PMC_EV(E500, TLB_BIT_TRANSITIONS) \
 	__PMC_EV(E500, L2_LINEFILL_BUFFER) \
 	__PMC_EV(E500, LV2_VS) \
 	__PMC_EV(E500, CASTOUTS_RELEASED) \
 	__PMC_EV(E500, INTV_ALLOCATIONS) \
 	__PMC_EV(E500, DLFB_RETRIES_TO_MBAR) \
 	__PMC_EV(E500, STORE_RETRIES) \
 	__PMC_EV(E500, STASH_L1_HITS) \
 	__PMC_EV(E500, STASH_L2_HITS) \
 	__PMC_EV(E500, STASH_BUSY_1) \
 	__PMC_EV(E500, STASH_BUSY_2) \
 	__PMC_EV(E500, STASH_BUSY_3) \
 	__PMC_EV(E500, STASH_HITS) \
 	__PMC_EV(E500, STASH_HIT_DLFB) \
 	__PMC_EV(E500, STASH_REQUESTS) \
 	__PMC_EV(E500, STASH_REQUESTS_L1) \
 	__PMC_EV(E500, STASH_REQUESTS_L2) \
 	__PMC_EV(E500, STALLS_NO_CAQ_OR_COB) \
 	__PMC_EV(E500, L2_CACHE_ACCESSES) \
 	__PMC_EV(E500, L2_HIT_CACHE_ACCESSES) \
 	__PMC_EV(E500, L2_CACHE_DATA_ACCESSES) \
 	__PMC_EV(E500, L2_CACHE_DATA_HITS) \
 	__PMC_EV(E500, L2_CACHE_INSTR_ACCESSES) \
 	__PMC_EV(E500, L2_CACHE_INSTR_HITS) \
 	__PMC_EV(E500, L2_CACHE_ALLOCATIONS) \
 	__PMC_EV(E500, L2_CACHE_DATA_ALLOCATIONS) \
 	__PMC_EV(E500, L2_CACHE_DIRTY_DATA_ALLOCATIONS) \
 	__PMC_EV(E500, L2_CACHE_INSTR_ALLOCATIONS) \
 	__PMC_EV(E500, L2_CACHE_UPDATES) \
 	__PMC_EV(E500, L2_CACHE_CLEAN_UPDATES) \
 	__PMC_EV(E500, L2_CACHE_DIRTY_UPDATES) \
 	__PMC_EV(E500, L2_CACHE_CLEAN_REDUNDANT_UPDATES) \
 	__PMC_EV(E500, L2_CACHE_DIRTY_REDUNDANT_UPDATES) \
 	__PMC_EV(E500, L2_CACHE_LOCKS) \
 	__PMC_EV(E500, L2_CACHE_CASTOUTS) \
 	__PMC_EV(E500, L2_CACHE_DATA_DIRTY_HITS) \
 	__PMC_EV(E500, INSTR_LFB_WENT_HIGH_PRIORITY) \
 	__PMC_EV(E500, SNOOP_THROTTLING_TURNED_ON) \
 	__PMC_EV(E500, L2_CLEAN_LINE_INVALIDATIONS) \
 	__PMC_EV(E500, L2_INCOHERENT_LINE_INVALIDATIONS) \
 	__PMC_EV(E500, L2_COHERENT_LINE_INVALIDATIONS) \
 	__PMC_EV(E500, COHERENT_LOOKUP_MISS_DUE_TO_VALID_BUT_INCOHERENT_MATCHES) \
 	__PMC_EV(E500, IAC1S_DETECTED) \
 	__PMC_EV(E500, IAC2S_DETECTED) \
 	__PMC_EV(E500, DAC1S_DTECTED) \
 	__PMC_EV(E500, DAC2S_DTECTED) \
 	__PMC_EV(E500, DVT0_DETECTED) \
 	__PMC_EV(E500, DVT1_DETECTED) \
 	__PMC_EV(E500, DVT2_DETECTED) \
 	__PMC_EV(E500, DVT3_DETECTED) \
 	__PMC_EV(E500, DVT4_DETECTED) \
 	__PMC_EV(E500, DVT5_DETECTED) \
 	__PMC_EV(E500, DVT6_DETECTED) \
 	__PMC_EV(E500, DVT7_DETECTED) \
 	__PMC_EV(E500, CYCLES_COMPLETION_STALLED_NEXUS_FIFO_FULL) \
 	__PMC_EV(E500, FPU_DOUBLE_PUMP) \
 	__PMC_EV(E500, FPU_FINISH) \
 	__PMC_EV(E500, FPU_DIVIDE_CYCLES) \
 	__PMC_EV(E500, FPU_DENORM_INPUT_CYCLES) \
 	__PMC_EV(E500, FPU_RESULT_STALL_CYCLES) \
 	__PMC_EV(E500, FPU_FPSCR_FULL_STALL) \
 	__PMC_EV(E500, FPU_PIPE_SYNC_STALLS) \
 	__PMC_EV(E500, FPU_INPUT_DATA_STALLS) \
 	__PMC_EV(E500, DECORATED_LOADS) \
 	__PMC_EV(E500, DECORATED_STORES) \
 	__PMC_EV(E500, LOAD_RETRIES) \
 	__PMC_EV(E500, STWCX_SUCCESSES) \
 	__PMC_EV(E500, STWCX_FAILURES) \
 
 #define PMC_EV_E500_FIRST		PMC_EV_E500_CYCLES
 #define PMC_EV_E500_LAST		PMC_EV_E500_STWCX_FAILURES
 /*
  * All known PMC events.
  *
  * PMC event numbers are allocated sparsely to allow new PMC events to
  * be added to a PMC class without breaking ABI compatibility.  The
  * current allocation scheme is:
  *
  * START	#EVENTS		DESCRIPTION
  * 0		0x1000		Reserved
  * 0x1000	0x0001		TSC
  * 0x2000	0x0080		AMD K7 events
  * 0x2080	0x0100		AMD K8 events
  * 0x10000	0x0080		INTEL architectural fixed-function events
  * 0x10080	0x0F80		INTEL architectural programmable events
  * 0x11000	0x0080		INTEL Pentium 4 events
  * 0x11080	0x0080		INTEL Pentium MMX events
  * 0x11100	0x0100		INTEL Pentium Pro/P-II/P-III/Pentium-M events
  * 0x11200	0x00FF		INTEL XScale events
  * 0x11300	0x00FF		MIPS 24K events
  * 0x11400	0x00FF		Octeon events
  * 0x11500	0x00FF		MIPS 74K events
+ * 0x11600	0x00FF		BERI statcounters
  * 0x13000	0x00FF		MPC7450 events
  * 0x13100	0x00FF		IBM PPC970 events
  * 0x13300	0x00FF		Freescale e500 events
  * 0x14000	0x0100		ARMv7 events
  * 0x14100	0x0100		ARMv8 events
  * 0x20000	0x1000		Software events
  */
 #define	__PMC_EVENTS()				\
 	__PMC_EV_BLOCK(TSC,	0x01000)	\
 	__PMC_EV_TSC()				\
 	__PMC_EV_BLOCK(IAF,     0x10000)	\
 	__PMC_EV_IAF()				\
 	__PMC_EV_BLOCK(K7,	0x2000)		\
 	__PMC_EV_K7()				\
 	__PMC_EV_BLOCK(K8,	0x2080)	        \
 	__PMC_EV_K8()				\
 	__PMC_EV_BLOCK(XSCALE,	0x11200)	\
 	__PMC_EV_XSCALE()                       \
 	__PMC_EV_BLOCK(MIPS24K,	0x11300)	\
 	__PMC_EV_MIPS24K()			\
 	__PMC_EV_BLOCK(OCTEON,	0x11400)	\
 	__PMC_EV_OCTEON()			\
 	__PMC_EV_BLOCK(MIPS74K,	0x11500)        \
 	__PMC_EV_MIPS74K()			\
+	__PMC_EV_BLOCK(BERI,	0x11600)        \
+	__PMC_EV_BERI()				\
 	__PMC_EV_BLOCK(UCP,     0x12080)        \
 	__PMC_EV_UCP()				\
 	__PMC_EV_BLOCK(PPC7450,	0x13000)	\
 	__PMC_EV_PPC7450()			\
 	__PMC_EV_BLOCK(PPC970,	0x13100)	\
 	__PMC_EV_PPC970()			\
 	__PMC_EV_BLOCK(E500,	0x13300)	\
 	__PMC_EV_E500()				\
 	__PMC_EV_BLOCK(ARMV7,	0x14000)	\
 	__PMC_EV_ARMV7()			\
 	__PMC_EV_BLOCK(ARMV8,	0x14100)	\
 	__PMC_EV_ARMV8()
 
 #define	PMC_EVENT_FIRST	PMC_EV_TSC_TSC
 #define	PMC_EVENT_LAST	PMC_EV_SOFT_LAST
 
 #endif /* _DEV_HWPMC_PMC_EVENTS_H_ */
Index: projects/clang900-import/sys/dev/vt/vt_core.c
===================================================================
--- projects/clang900-import/sys/dev/vt/vt_core.c	(revision 352536)
+++ projects/clang900-import/sys/dev/vt/vt_core.c	(revision 352537)
@@ -1,2959 +1,2981 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2009, 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Ed Schouten under sponsorship from the
  * FreeBSD Foundation.
  *
  * Portions of this software were developed by Oleksandr Rybalko
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/consio.h>
 #include <sys/eventhandler.h>
 #include <sys/fbio.h>
 #include <sys/kbio.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/power.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/random.h>
 #include <sys/reboot.h>
 #include <sys/systm.h>
 #include <sys/terminal.h>
 
 #include <dev/kbd/kbdreg.h>
 #include <dev/vt/vt.h>
 
 #if defined(__i386__) || defined(__amd64__)
 #include <machine/psl.h>
 #include <machine/frame.h>
 #endif
 
 static tc_bell_t	vtterm_bell;
 static tc_cursor_t	vtterm_cursor;
 static tc_putchar_t	vtterm_putchar;
 static tc_fill_t	vtterm_fill;
 static tc_copy_t	vtterm_copy;
 static tc_pre_input_t	vtterm_pre_input;
 static tc_post_input_t	vtterm_post_input;
 static tc_param_t	vtterm_param;
 static tc_done_t	vtterm_done;
 
 static tc_cnprobe_t	vtterm_cnprobe;
 static tc_cngetc_t	vtterm_cngetc;
 
 static tc_cngrab_t	vtterm_cngrab;
 static tc_cnungrab_t	vtterm_cnungrab;
 
 static tc_opened_t	vtterm_opened;
 static tc_ioctl_t	vtterm_ioctl;
 static tc_mmap_t	vtterm_mmap;
 
 const struct terminal_class vt_termclass = {
 	.tc_bell	= vtterm_bell,
 	.tc_cursor	= vtterm_cursor,
 	.tc_putchar	= vtterm_putchar,
 	.tc_fill	= vtterm_fill,
 	.tc_copy	= vtterm_copy,
 	.tc_pre_input	= vtterm_pre_input,
 	.tc_post_input	= vtterm_post_input,
 	.tc_param	= vtterm_param,
 	.tc_done	= vtterm_done,
 
 	.tc_cnprobe	= vtterm_cnprobe,
 	.tc_cngetc	= vtterm_cngetc,
 
 	.tc_cngrab	= vtterm_cngrab,
 	.tc_cnungrab	= vtterm_cnungrab,
 
 	.tc_opened	= vtterm_opened,
 	.tc_ioctl	= vtterm_ioctl,
 	.tc_mmap	= vtterm_mmap,
 };
 
 /*
  * Use a constant timer of 25 Hz to redraw the screen.
  *
  * XXX: In theory we should only fire up the timer when there is really
  * activity. Unfortunately we cannot always start timers. We really
  * don't want to process kernel messages synchronously, because it
  * really slows down the system.
  */
 #define	VT_TIMERFREQ	25
 
 /* Bell pitch/duration. */
 #define	VT_BELLDURATION	((5 * hz + 99) / 100)
 #define	VT_BELLPITCH	800
 
 #define	VT_UNIT(vw)	((vw)->vw_device->vd_unit * VT_MAXWINDOWS + \
 			(vw)->vw_number)
 
 static SYSCTL_NODE(_kern, OID_AUTO, vt, CTLFLAG_RD, 0, "vt(9) parameters");
 static VT_SYSCTL_INT(enable_altgr, 1, "Enable AltGr key (Do not assume R.Alt as Alt)");
 static VT_SYSCTL_INT(enable_bell, 1, "Enable bell");
 static VT_SYSCTL_INT(debug, 0, "vt(9) debug level");
 static VT_SYSCTL_INT(deadtimer, 15, "Time to wait busy process in VT_PROCESS mode");
 static VT_SYSCTL_INT(suspendswitch, 1, "Switch to VT0 before suspend");
 
 /* Allow to disable some keyboard combinations. */
 static VT_SYSCTL_INT(kbd_halt, 1, "Enable halt keyboard combination.  "
     "See kbdmap(5) to configure.");
 static VT_SYSCTL_INT(kbd_poweroff, 1, "Enable Power Off keyboard combination.  "
     "See kbdmap(5) to configure.");
 static VT_SYSCTL_INT(kbd_reboot, 1, "Enable reboot keyboard combination.  "
     "See kbdmap(5) to configure (typically Ctrl-Alt-Delete).");
 static VT_SYSCTL_INT(kbd_debug, 1, "Enable key combination to enter debugger.  "
     "See kbdmap(5) to configure (typically Ctrl-Alt-Esc).");
 static VT_SYSCTL_INT(kbd_panic, 0, "Enable request to panic.  "
     "See kbdmap(5) to configure.");
 
 /* Used internally, not a tunable. */
 int vt_draw_logo_cpus;
 VT_SYSCTL_INT(splash_cpu, 0, "Show logo CPUs during boot");
 VT_SYSCTL_INT(splash_ncpu, 0, "Override number of logos displayed "
     "(0 = do not override)");
 VT_SYSCTL_INT(splash_cpu_style, 2, "Draw logo style "
     "(0 = Alternate beastie, 1 = Beastie, 2 = Orb)");
 VT_SYSCTL_INT(splash_cpu_duration, 10, "Hide logos after (seconds)");
 
 static unsigned int vt_unit = 0;
 static MALLOC_DEFINE(M_VT, "vt", "vt device");
 struct vt_device *main_vd = &vt_consdev;
 
 /* Boot logo. */
 extern unsigned int vt_logo_width;
 extern unsigned int vt_logo_height;
 extern unsigned int vt_logo_depth;
 extern unsigned char vt_logo_image[];
 #ifndef DEV_SPLASH
 #define	vtterm_draw_cpu_logos(...)	do {} while (0)
 const unsigned int vt_logo_sprite_height;
 #endif
 
 /* Font. */
 extern struct vt_font vt_font_default;
 #ifndef SC_NO_CUTPASTE
 extern struct vt_mouse_cursor vt_default_mouse_pointer;
 #endif
 
 static int signal_vt_rel(struct vt_window *);
 static int signal_vt_acq(struct vt_window *);
 static int finish_vt_rel(struct vt_window *, int, int *);
 static int finish_vt_acq(struct vt_window *);
 static int vt_window_switch(struct vt_window *);
 static int vt_late_window_switch(struct vt_window *);
 static int vt_proc_alive(struct vt_window *);
 static void vt_resize(struct vt_device *);
 static void vt_update_static(void *);
 #ifndef SC_NO_CUTPASTE
 static void vt_mouse_paste(void);
 #endif
 static void vt_suspend_handler(void *priv);
 static void vt_resume_handler(void *priv);
 
 SET_DECLARE(vt_drv_set, struct vt_driver);
 
 #define	_VTDEFH	MAX(100, PIXEL_HEIGHT(VT_FB_MAX_HEIGHT))
 #define	_VTDEFW	MAX(200, PIXEL_WIDTH(VT_FB_MAX_WIDTH))
 
 struct terminal	vt_consterm;
 static struct vt_window	vt_conswindow;
 #ifndef SC_NO_CONSDRAWN
 static term_char_t vt_consdrawn[PIXEL_HEIGHT(VT_FB_MAX_HEIGHT) * PIXEL_WIDTH(VT_FB_MAX_WIDTH)];
 static term_color_t vt_consdrawnfg[PIXEL_HEIGHT(VT_FB_MAX_HEIGHT) * PIXEL_WIDTH(VT_FB_MAX_WIDTH)];
 static term_color_t vt_consdrawnbg[PIXEL_HEIGHT(VT_FB_MAX_HEIGHT) * PIXEL_WIDTH(VT_FB_MAX_WIDTH)];
 #endif
 struct vt_device	vt_consdev = {
 	.vd_driver = NULL,
 	.vd_softc = NULL,
 	.vd_prev_driver = NULL,
 	.vd_prev_softc = NULL,
 	.vd_flags = VDF_INVALID,
 	.vd_windows = { [VT_CONSWINDOW] =  &vt_conswindow, },
 	.vd_curwindow = &vt_conswindow,
 	.vd_kbstate = 0,
 
 #ifndef SC_NO_CUTPASTE
 	.vd_pastebuf = {
 		.vpb_buf = NULL,
 		.vpb_bufsz = 0,
 		.vpb_len = 0
 	},
 	.vd_mcursor = &vt_default_mouse_pointer,
 	.vd_mcursor_fg = TC_WHITE,
 	.vd_mcursor_bg = TC_BLACK,
 #endif
 
 #ifndef SC_NO_CONSDRAWN
 	.vd_drawn = vt_consdrawn,
 	.vd_drawnfg = vt_consdrawnfg,
 	.vd_drawnbg = vt_consdrawnbg,
 #endif
 };
 static term_char_t vt_constextbuf[(_VTDEFW) * (VBF_DEFAULT_HISTORY_SIZE)];
 static term_char_t *vt_constextbufrows[VBF_DEFAULT_HISTORY_SIZE];
 static struct vt_window	vt_conswindow = {
 	.vw_number = VT_CONSWINDOW,
 	.vw_flags = VWF_CONSOLE,
 	.vw_buf = {
 		.vb_buffer = &vt_constextbuf[0],
 		.vb_rows = &vt_constextbufrows[0],
 		.vb_history_size = VBF_DEFAULT_HISTORY_SIZE,
 		.vb_curroffset = 0,
 		.vb_roffset = 0,
 		.vb_flags = VBF_STATIC,
 		.vb_mark_start = {.tp_row = 0, .tp_col = 0,},
 		.vb_mark_end = {.tp_row = 0, .tp_col = 0,},
 		.vb_scr_size = {
 			.tp_row = _VTDEFH,
 			.tp_col = _VTDEFW,
 		},
 	},
 	.vw_device = &vt_consdev,
 	.vw_terminal = &vt_consterm,
 	.vw_kbdmode = K_XLATE,
 	.vw_grabbed = 0,
 };
 struct terminal vt_consterm = {
 	.tm_class = &vt_termclass,
 	.tm_softc = &vt_conswindow,
 	.tm_flags = TF_CONS,
 };
 static struct consdev vt_consterm_consdev = {
 	.cn_ops = &termcn_cnops,
 	.cn_arg = &vt_consterm,
 	.cn_name = "ttyv0",
 };
 
 /* Add to set of consoles. */
 DATA_SET(cons_set, vt_consterm_consdev);
 
 /*
  * Right after kmem is done to allow early drivers to use locking and allocate
  * memory.
  */
 SYSINIT(vt_update_static, SI_SUB_KMEM, SI_ORDER_ANY, vt_update_static,
     &vt_consdev);
 /* Delay until all devices attached, to not waste time. */
 SYSINIT(vt_early_cons, SI_SUB_INT_CONFIG_HOOKS, SI_ORDER_ANY, vt_upgrade,
     &vt_consdev);
 
 /* Initialize locks/mem depended members. */
 static void
 vt_update_static(void *dummy)
 {
 
 	if (!vty_enabled(VTY_VT))
 		return;
 	if (main_vd->vd_driver != NULL)
 		printf("VT(%s): %s %ux%u\n", main_vd->vd_driver->vd_name,
 		    (main_vd->vd_flags & VDF_TEXTMODE) ? "text" : "resolution",
 		    main_vd->vd_width, main_vd->vd_height);
 	else
 		printf("VT: init without driver.\n");
 
 	mtx_init(&main_vd->vd_lock, "vtdev", NULL, MTX_DEF);
 	cv_init(&main_vd->vd_winswitch, "vtwswt");
 }
 
 static void
 vt_schedule_flush(struct vt_device *vd, int ms)
 {
 
 	if (ms <= 0)
 		/* Default to initial value. */
 		ms = 1000 / VT_TIMERFREQ;
 
 	callout_schedule(&vd->vd_timer, hz / (1000 / ms));
 }
 
 void
 vt_resume_flush_timer(struct vt_window *vw, int ms)
 {
 	struct vt_device *vd = vw->vw_device;
 
 	if (vd->vd_curwindow != vw)
 		return;
 
 	if (!(vd->vd_flags & VDF_ASYNC) ||
 	    !atomic_cmpset_int(&vd->vd_timer_armed, 0, 1))
 		return;
 
 	vt_schedule_flush(vd, ms);
 }
 
 static void
 vt_suspend_flush_timer(struct vt_device *vd)
 {
 	/*
 	 * As long as this function is called locked, callout_stop()
 	 * has the same effect like callout_drain() with regard to
 	 * preventing the callback function from executing.
 	 */
 	VT_LOCK_ASSERT(vd, MA_OWNED);
 
 	if (!(vd->vd_flags & VDF_ASYNC) ||
 	    !atomic_cmpset_int(&vd->vd_timer_armed, 1, 0))
 		return;
 
 	callout_stop(&vd->vd_timer);
 }
 
 static void
 vt_switch_timer(void *arg)
 {
 
-	vt_late_window_switch((struct vt_window *)arg);
+	(void)vt_late_window_switch((struct vt_window *)arg);
 }
 
 static int
 vt_save_kbd_mode(struct vt_window *vw, keyboard_t *kbd)
 {
 	int mode, ret;
 
 	mode = 0;
 	ret = kbdd_ioctl(kbd, KDGKBMODE, (caddr_t)&mode);
 	if (ret == ENOIOCTL)
 		ret = ENODEV;
 	if (ret != 0)
 		return (ret);
 
 	vw->vw_kbdmode = mode;
 
 	return (0);
 }
 
 static int
 vt_update_kbd_mode(struct vt_window *vw, keyboard_t *kbd)
 {
 	int ret;
 
 	ret = kbdd_ioctl(kbd, KDSKBMODE, (caddr_t)&vw->vw_kbdmode);
 	if (ret == ENOIOCTL)
 		ret = ENODEV;
 
 	return (ret);
 }
 
 static int
 vt_save_kbd_state(struct vt_window *vw, keyboard_t *kbd)
 {
 	int state, ret;
 
 	state = 0;
 	ret = kbdd_ioctl(kbd, KDGKBSTATE, (caddr_t)&state);
 	if (ret == ENOIOCTL)
 		ret = ENODEV;
 	if (ret != 0)
 		return (ret);
 
 	vw->vw_kbdstate &= ~LOCK_MASK;
 	vw->vw_kbdstate |= state & LOCK_MASK;
 
 	return (0);
 }
 
 static int
 vt_update_kbd_state(struct vt_window *vw, keyboard_t *kbd)
 {
 	int state, ret;
 
 	state = vw->vw_kbdstate & LOCK_MASK;
 	ret = kbdd_ioctl(kbd, KDSKBSTATE, (caddr_t)&state);
 	if (ret == ENOIOCTL)
 		ret = ENODEV;
 
 	return (ret);
 }
 
 static int
 vt_save_kbd_leds(struct vt_window *vw, keyboard_t *kbd)
 {
 	int leds, ret;
 
 	leds = 0;
 	ret = kbdd_ioctl(kbd, KDGETLED, (caddr_t)&leds);
 	if (ret == ENOIOCTL)
 		ret = ENODEV;
 	if (ret != 0)
 		return (ret);
 
 	vw->vw_kbdstate &= ~LED_MASK;
 	vw->vw_kbdstate |= leds & LED_MASK;
 
 	return (0);
 }
 
 static int
 vt_update_kbd_leds(struct vt_window *vw, keyboard_t *kbd)
 {
 	int leds, ret;
 
 	leds = vw->vw_kbdstate & LED_MASK;
 	ret = kbdd_ioctl(kbd, KDSETLED, (caddr_t)&leds);
 	if (ret == ENOIOCTL)
 		ret = ENODEV;
 
 	return (ret);
 }
 
 static int
 vt_window_preswitch(struct vt_window *vw, struct vt_window *curvw)
 {
 
 	DPRINTF(40, "%s\n", __func__);
 	curvw->vw_switch_to = vw;
 	/* Set timer to allow switch in case when process hang. */
 	callout_reset(&vw->vw_proc_dead_timer, hz * vt_deadtimer,
 	    vt_switch_timer, (void *)vw);
 	/* Notify process about vt switch attempt. */
 	DPRINTF(30, "%s: Notify process.\n", __func__);
 	signal_vt_rel(curvw);
 
 	return (0);
 }
 
 static int
 vt_window_postswitch(struct vt_window *vw)
 {
 
 	signal_vt_acq(vw);
 	return (0);
 }
 
 /* vt_late_window_switch will done VT switching for regular case. */
 static int
 vt_late_window_switch(struct vt_window *vw)
 {
+	struct vt_window *curvw;
 	int ret;
 
 	callout_stop(&vw->vw_proc_dead_timer);
 
 	ret = vt_window_switch(vw);
-	if (ret)
+	if (ret != 0) {
+		/*
+		 * If the switch hasn't happened, then return the VT
+		 * to the current owner, if any.
+		 */
+		curvw = vw->vw_device->vd_curwindow;
+		if (curvw->vw_smode.mode == VT_PROCESS)
+			(void)vt_window_postswitch(curvw);
 		return (ret);
+	}
 
 	/* Notify owner process about terminal availability. */
 	if (vw->vw_smode.mode == VT_PROCESS) {
 		ret = vt_window_postswitch(vw);
 	}
 	return (ret);
 }
 
 /* Switch window. */
 static int
 vt_proc_window_switch(struct vt_window *vw)
 {
 	struct vt_window *curvw;
 	struct vt_device *vd;
 	int ret;
 
 	/* Prevent switching to NULL */
 	if (vw == NULL) {
 		DPRINTF(30, "%s: Cannot switch: vw is NULL.", __func__);
 		return (EINVAL);
 	}
 	vd = vw->vw_device;
 	curvw = vd->vd_curwindow;
 
 	/* Check if virtual terminal is locked */
 	if (curvw->vw_flags & VWF_VTYLOCK)
 		return (EBUSY);
 
 	/* Check if switch already in progress */
 	if (curvw->vw_flags & VWF_SWWAIT_REL) {
 		/* Check if switching to same window */
 		if (curvw->vw_switch_to == vw) {
 			DPRINTF(30, "%s: Switch in progress to same vw.", __func__);
 			return (0);	/* success */
 		}
 		DPRINTF(30, "%s: Switch in progress to different vw.", __func__);
 		return (EBUSY);
 	}
 
 	/* Avoid switching to already selected window */
 	if (vw == curvw) {
 		DPRINTF(30, "%s: Cannot switch: vw == curvw.", __func__);
 		return (0);	/* success */
 	}
 
+	/*
+	 * Early check for an attempt to switch to a non-functional VT.
+	 * The same check is done in vt_window_switch(), but it's better
+	 * to fail as early as possible to avoid needless pre-switch
+	 * actions.
+	 */
+	VT_LOCK(vd);
+	if ((vw->vw_flags & (VWF_OPENED|VWF_CONSOLE)) == 0) {
+		VT_UNLOCK(vd);
+		return (EINVAL);
+	}
+	VT_UNLOCK(vd);
+
 	/* Ask current process permission to switch away. */
 	if (curvw->vw_smode.mode == VT_PROCESS) {
 		DPRINTF(30, "%s: VT_PROCESS ", __func__);
 		if (vt_proc_alive(curvw) == FALSE) {
 			DPRINTF(30, "Dead. Cleaning.");
 			/* Dead */
 		} else {
 			DPRINTF(30, "%s: Signaling process.\n", __func__);
 			/* Alive, try to ask him. */
 			ret = vt_window_preswitch(vw, curvw);
 			/* Wait for process answer or timeout. */
 			return (ret);
 		}
 		DPRINTF(30, "\n");
 	}
 
 	ret = vt_late_window_switch(vw);
 	return (ret);
 }
 
 /* Switch window ignoring process locking. */
 static int
 vt_window_switch(struct vt_window *vw)
 {
 	struct vt_device *vd = vw->vw_device;
 	struct vt_window *curvw = vd->vd_curwindow;
 	keyboard_t *kbd;
 
 	if (kdb_active) {
 		/*
 		 * When grabbing the console for the debugger, avoid
 		 * locks as that can result in deadlock.  While this
 		 * could use try locks, that wouldn't really make a
 		 * difference as there are sufficient barriers in
 		 * debugger entry/exit to be equivalent to
 		 * successfully try-locking here.
 		 */
 		if (curvw == vw)
 			return (0);
 		if (!(vw->vw_flags & (VWF_OPENED|VWF_CONSOLE)))
 			return (EINVAL);
 
 		vd->vd_curwindow = vw;
 		vd->vd_flags |= VDF_INVALID;
 		if (vd->vd_driver->vd_postswitch)
 			vd->vd_driver->vd_postswitch(vd);
 		return (0);
 	}
 
 	VT_LOCK(vd);
 	if (curvw == vw) {
 		/* Nothing to do. */
 		VT_UNLOCK(vd);
 		return (0);
 	}
 	if (!(vw->vw_flags & (VWF_OPENED|VWF_CONSOLE))) {
 		VT_UNLOCK(vd);
 		return (EINVAL);
 	}
 
 	vt_suspend_flush_timer(vd);
 
 	vd->vd_curwindow = vw;
 	vd->vd_flags |= VDF_INVALID;
 	cv_broadcast(&vd->vd_winswitch);
 	VT_UNLOCK(vd);
 
 	if (vd->vd_driver->vd_postswitch)
 		vd->vd_driver->vd_postswitch(vd);
 
 	vt_resume_flush_timer(vw, 0);
 
 	/* Restore per-window keyboard mode. */
 	mtx_lock(&Giant);
 	kbd = kbd_get_keyboard(vd->vd_keyboard);
 	if (kbd != NULL) {
 		if (curvw->vw_kbdmode == K_XLATE)
 			vt_save_kbd_state(curvw, kbd);
 
 		vt_update_kbd_mode(vw, kbd);
 		vt_update_kbd_state(vw, kbd);
 	}
 	mtx_unlock(&Giant);
 	DPRINTF(10, "%s(ttyv%d) done\n", __func__, vw->vw_number);
 
 	return (0);
 }
 
 void
 vt_termsize(struct vt_device *vd, struct vt_font *vf, term_pos_t *size)
 {
 
 	size->tp_row = vd->vd_height;
 	if (vt_draw_logo_cpus)
 		size->tp_row -= vt_logo_sprite_height;
 	size->tp_col = vd->vd_width;
 	if (vf != NULL) {
 		size->tp_row /= vf->vf_height;
 		size->tp_col /= vf->vf_width;
 	}
 }
 
 static inline void
 vt_termrect(struct vt_device *vd, struct vt_font *vf, term_rect_t *rect)
 {
 
 	rect->tr_begin.tp_row = rect->tr_begin.tp_col = 0;
 	if (vt_draw_logo_cpus)
 		rect->tr_begin.tp_row = vt_logo_sprite_height;
 
 	rect->tr_end.tp_row = vd->vd_height;
 	rect->tr_end.tp_col = vd->vd_width;
 
 	if (vf != NULL) {
 		rect->tr_begin.tp_row =
 		    howmany(rect->tr_begin.tp_row, vf->vf_height);
 
 		rect->tr_end.tp_row /= vf->vf_height;
 		rect->tr_end.tp_col /= vf->vf_width;
 	}
 }
 
 void
 vt_winsize(struct vt_device *vd, struct vt_font *vf, struct winsize *size)
 {
 
 	size->ws_ypixel = vd->vd_height;
 	if (vt_draw_logo_cpus)
 		size->ws_ypixel -= vt_logo_sprite_height;
 	size->ws_row = size->ws_ypixel;
 	size->ws_col = size->ws_xpixel = vd->vd_width;
 	if (vf != NULL) {
 		size->ws_row /= vf->vf_height;
 		size->ws_col /= vf->vf_width;
 	}
 }
 
 void
 vt_compute_drawable_area(struct vt_window *vw)
 {
 	struct vt_device *vd;
 	struct vt_font *vf;
 	vt_axis_t height;
 
 	vd = vw->vw_device;
 
 	if (vw->vw_font == NULL) {
 		vw->vw_draw_area.tr_begin.tp_col = 0;
 		vw->vw_draw_area.tr_begin.tp_row = 0;
 		if (vt_draw_logo_cpus)
 			vw->vw_draw_area.tr_begin.tp_row = vt_logo_sprite_height;
 		vw->vw_draw_area.tr_end.tp_col = vd->vd_width;
 		vw->vw_draw_area.tr_end.tp_row = vd->vd_height;
 		return;
 	}
 
 	vf = vw->vw_font;
 
 	/*
 	 * Compute the drawable area, so that the text is centered on
 	 * the screen.
 	 */
 
 	height = vd->vd_height;
 	if (vt_draw_logo_cpus)
 		height -= vt_logo_sprite_height;
 	vw->vw_draw_area.tr_begin.tp_col = (vd->vd_width % vf->vf_width) / 2;
 	vw->vw_draw_area.tr_begin.tp_row = (height % vf->vf_height) / 2;
 	if (vt_draw_logo_cpus)
 		vw->vw_draw_area.tr_begin.tp_row += vt_logo_sprite_height;
 	vw->vw_draw_area.tr_end.tp_col = vw->vw_draw_area.tr_begin.tp_col +
 	    rounddown(vd->vd_width, vf->vf_width);
 	vw->vw_draw_area.tr_end.tp_row = vw->vw_draw_area.tr_begin.tp_row +
 	    rounddown(height, vf->vf_height);
 }
 
 static void
 vt_scroll(struct vt_window *vw, int offset, int whence)
 {
 	int diff;
 	term_pos_t size;
 
 	if ((vw->vw_flags & VWF_SCROLL) == 0)
 		return;
 
 	vt_termsize(vw->vw_device, vw->vw_font, &size);
 
 	diff = vthistory_seek(&vw->vw_buf, offset, whence);
 	if (diff)
 		vw->vw_device->vd_flags |= VDF_INVALID;
 	vt_resume_flush_timer(vw, 0);
 }
 
 static int
 vt_machine_kbdevent(int c)
 {
 
 	switch (c) {
 	case SPCLKEY | DBG: /* kbdmap(5) keyword `debug`. */
 		if (vt_kbd_debug)
 			kdb_enter(KDB_WHY_BREAK, "manual escape to debugger");
 		return (1);
 	case SPCLKEY | HALT: /* kbdmap(5) keyword `halt`. */
 		if (vt_kbd_halt)
 			shutdown_nice(RB_HALT);
 		return (1);
 	case SPCLKEY | PASTE: /* kbdmap(5) keyword `paste`. */
 #ifndef SC_NO_CUTPASTE
 		/* Insert text from cut-paste buffer. */
 		vt_mouse_paste();
 #endif
 		break;
 	case SPCLKEY | PDWN: /* kbdmap(5) keyword `pdwn`. */
 		if (vt_kbd_poweroff)
 			shutdown_nice(RB_HALT|RB_POWEROFF);
 		return (1);
 	case SPCLKEY | PNC: /* kbdmap(5) keyword `panic`. */
 		/*
 		 * Request to immediate panic if sysctl
 		 * kern.vt.enable_panic_key allow it.
 		 */
 		if (vt_kbd_panic)
 			panic("Forced by the panic key");
 		return (1);
 	case SPCLKEY | RBT: /* kbdmap(5) keyword `boot`. */
 		if (vt_kbd_reboot)
 			shutdown_nice(RB_AUTOBOOT);
 		return (1);
 	case SPCLKEY | SPSC: /* kbdmap(5) keyword `spsc`. */
 		/* Force activatation/deactivation of the screen saver. */
 		/* TODO */
 		return (1);
 	case SPCLKEY | STBY: /* XXX Not present in kbdcontrol parser. */
 		/* Put machine into Stand-By mode. */
 		power_pm_suspend(POWER_SLEEP_STATE_STANDBY);
 		return (1);
 	case SPCLKEY | SUSP: /* kbdmap(5) keyword `susp`. */
 		/* Suspend machine. */
 		power_pm_suspend(POWER_SLEEP_STATE_SUSPEND);
 		return (1);
 	}
 
 	return (0);
 }
 
 static void
 vt_scrollmode_kbdevent(struct vt_window *vw, int c, int console)
 {
 	struct vt_device *vd;
 	term_pos_t size;
 
 	vd = vw->vw_device;
 	/* Only special keys handled in ScrollLock mode */
 	if ((c & SPCLKEY) == 0)
 		return;
 
 	c &= ~SPCLKEY;
 
 	if (console == 0) {
 		if (c >= F_SCR && c <= MIN(L_SCR, F_SCR + VT_MAXWINDOWS - 1)) {
 			vw = vd->vd_windows[c - F_SCR];
 			vt_proc_window_switch(vw);
 			return;
 		}
 		VT_LOCK(vd);
 	}
 
 	switch (c) {
 	case SLK: {
 		/* Turn scrolling off. */
 		vt_scroll(vw, 0, VHS_END);
 		VTBUF_SLCK_DISABLE(&vw->vw_buf);
 		vw->vw_flags &= ~VWF_SCROLL;
 		break;
 	}
 	case FKEY | F(49): /* Home key. */
 		vt_scroll(vw, 0, VHS_SET);
 		break;
 	case FKEY | F(50): /* Arrow up. */
 		vt_scroll(vw, -1, VHS_CUR);
 		break;
 	case FKEY | F(51): /* Page up. */
 		vt_termsize(vd, vw->vw_font, &size);
 		vt_scroll(vw, -size.tp_row, VHS_CUR);
 		break;
 	case FKEY | F(57): /* End key. */
 		vt_scroll(vw, 0, VHS_END);
 		break;
 	case FKEY | F(58): /* Arrow down. */
 		vt_scroll(vw, 1, VHS_CUR);
 		break;
 	case FKEY | F(59): /* Page down. */
 		vt_termsize(vd, vw->vw_font, &size);
 		vt_scroll(vw, size.tp_row, VHS_CUR);
 		break;
 	}
 
 	if (console == 0)
 		VT_UNLOCK(vd);
 }
 
 static int
 vt_processkey(keyboard_t *kbd, struct vt_device *vd, int c)
 {
 	struct vt_window *vw = vd->vd_curwindow;
 
 	random_harvest_queue(&c, sizeof(c), RANDOM_KEYBOARD);
 #if VT_ALT_TO_ESC_HACK
 	if (c & RELKEY) {
 		switch (c & ~RELKEY) {
 		case (SPCLKEY | RALT):
 			if (vt_enable_altgr != 0)
 				break;
 		case (SPCLKEY | LALT):
 			vd->vd_kbstate &= ~ALKED;
 		}
 		/* Other keys ignored for RELKEY event. */
 		return (0);
 	} else {
 		switch (c & ~RELKEY) {
 		case (SPCLKEY | RALT):
 			if (vt_enable_altgr != 0)
 				break;
 		case (SPCLKEY | LALT):
 			vd->vd_kbstate |= ALKED;
 		}
 	}
 #else
 	if (c & RELKEY)
 		/* Other keys ignored for RELKEY event. */
 		return (0);
 #endif
 
 	if (vt_machine_kbdevent(c))
 		return (0);
 
 	if (vw->vw_flags & VWF_SCROLL) {
 		vt_scrollmode_kbdevent(vw, c, 0/* Not a console */);
 		/* Scroll mode keys handled, nothing to do more. */
 		return (0);
 	}
 
 	if (c & SPCLKEY) {
 		c &= ~SPCLKEY;
 
 		if (c >= F_SCR && c <= MIN(L_SCR, F_SCR + VT_MAXWINDOWS - 1)) {
 			vw = vd->vd_windows[c - F_SCR];
 			vt_proc_window_switch(vw);
 			return (0);
 		}
 
 		switch (c) {
 		case NEXT:
 			/* Switch to next VT. */
 			c = (vw->vw_number + 1) % VT_MAXWINDOWS;
 			vw = vd->vd_windows[c];
 			vt_proc_window_switch(vw);
 			return (0);
 		case PREV:
 			/* Switch to previous VT. */
 			c = (vw->vw_number + VT_MAXWINDOWS - 1) % VT_MAXWINDOWS;
 			vw = vd->vd_windows[c];
 			vt_proc_window_switch(vw);
 			return (0);
 		case SLK: {
 			vt_save_kbd_state(vw, kbd);
 			VT_LOCK(vd);
 			if (vw->vw_kbdstate & SLKED) {
 				/* Turn scrolling on. */
 				vw->vw_flags |= VWF_SCROLL;
 				VTBUF_SLCK_ENABLE(&vw->vw_buf);
 			} else {
 				/* Turn scrolling off. */
 				vw->vw_flags &= ~VWF_SCROLL;
 				VTBUF_SLCK_DISABLE(&vw->vw_buf);
 				vt_scroll(vw, 0, VHS_END);
 			}
 			VT_UNLOCK(vd);
 			break;
 		}
 		case FKEY | F(1):  case FKEY | F(2):  case FKEY | F(3):
 		case FKEY | F(4):  case FKEY | F(5):  case FKEY | F(6):
 		case FKEY | F(7):  case FKEY | F(8):  case FKEY | F(9):
 		case FKEY | F(10): case FKEY | F(11): case FKEY | F(12):
 			/* F1 through F12 keys. */
 			terminal_input_special(vw->vw_terminal,
 			    TKEY_F1 + c - (FKEY | F(1)));
 			break;
 		case FKEY | F(49): /* Home key. */
 			terminal_input_special(vw->vw_terminal, TKEY_HOME);
 			break;
 		case FKEY | F(50): /* Arrow up. */
 			terminal_input_special(vw->vw_terminal, TKEY_UP);
 			break;
 		case FKEY | F(51): /* Page up. */
 			terminal_input_special(vw->vw_terminal, TKEY_PAGE_UP);
 			break;
 		case FKEY | F(53): /* Arrow left. */
 			terminal_input_special(vw->vw_terminal, TKEY_LEFT);
 			break;
 		case FKEY | F(55): /* Arrow right. */
 			terminal_input_special(vw->vw_terminal, TKEY_RIGHT);
 			break;
 		case FKEY | F(57): /* End key. */
 			terminal_input_special(vw->vw_terminal, TKEY_END);
 			break;
 		case FKEY | F(58): /* Arrow down. */
 			terminal_input_special(vw->vw_terminal, TKEY_DOWN);
 			break;
 		case FKEY | F(59): /* Page down. */
 			terminal_input_special(vw->vw_terminal, TKEY_PAGE_DOWN);
 			break;
 		case FKEY | F(60): /* Insert key. */
 			terminal_input_special(vw->vw_terminal, TKEY_INSERT);
 			break;
 		case FKEY | F(61): /* Delete key. */
 			terminal_input_special(vw->vw_terminal, TKEY_DELETE);
 			break;
 		}
 	} else if (KEYFLAGS(c) == 0) {
 		/* Don't do UTF-8 conversion when doing raw mode. */
 		if (vw->vw_kbdmode == K_XLATE) {
 #if VT_ALT_TO_ESC_HACK
 			if (vd->vd_kbstate & ALKED) {
 				/*
 				 * Prepend ESC sequence if one of ALT keys down.
 				 */
 				terminal_input_char(vw->vw_terminal, 0x1b);
 			}
 #endif
 #if defined(KDB)
 			kdb_alt_break(c, &vd->vd_altbrk);
 #endif
 			terminal_input_char(vw->vw_terminal, KEYCHAR(c));
 		} else
 			terminal_input_raw(vw->vw_terminal, c);
 	}
 	return (0);
 }
 
 static int
 vt_kbdevent(keyboard_t *kbd, int event, void *arg)
 {
 	struct vt_device *vd = arg;
 	int c;
 
 	switch (event) {
 	case KBDIO_KEYINPUT:
 		break;
 	case KBDIO_UNLOADING:
 		mtx_lock(&Giant);
 		vd->vd_keyboard = -1;
 		kbd_release(kbd, (void *)vd);
 		mtx_unlock(&Giant);
 		return (0);
 	default:
 		return (EINVAL);
 	}
 
 	while ((c = kbdd_read_char(kbd, 0)) != NOKEY)
 		vt_processkey(kbd, vd, c);
 
 	return (0);
 }
 
 static int
 vt_allocate_keyboard(struct vt_device *vd)
 {
 	int		 grabbed, i, idx0, idx;
 	keyboard_t	*k0, *k;
 	keyboard_info_t	 ki;
 
 	/*
 	 * If vt_upgrade() happens while the console is grabbed, we are
 	 * potentially going to switch keyboard devices while the keyboard is in
 	 * use. Unwind the grabbing of the current keyboard first, then we will
 	 * re-grab the new keyboard below, before we return.
 	 */
 	if (vd->vd_curwindow == &vt_conswindow) {
 		grabbed = vd->vd_curwindow->vw_grabbed;
 		for (i = 0; i < grabbed; ++i)
 			vtterm_cnungrab(vd->vd_curwindow->vw_terminal);
 	}
 
 	idx0 = kbd_allocate("kbdmux", -1, vd, vt_kbdevent, vd);
 	if (idx0 >= 0) {
 		DPRINTF(20, "%s: kbdmux allocated, idx = %d\n", __func__, idx0);
 		k0 = kbd_get_keyboard(idx0);
 
 		for (idx = kbd_find_keyboard2("*", -1, 0);
 		     idx != -1;
 		     idx = kbd_find_keyboard2("*", -1, idx + 1)) {
 			k = kbd_get_keyboard(idx);
 
 			if (idx == idx0 || KBD_IS_BUSY(k))
 				continue;
 
 			bzero(&ki, sizeof(ki));
 			strncpy(ki.kb_name, k->kb_name, sizeof(ki.kb_name));
 			ki.kb_name[sizeof(ki.kb_name) - 1] = '\0';
 			ki.kb_unit = k->kb_unit;
 
 			kbdd_ioctl(k0, KBADDKBD, (caddr_t) &ki);
 		}
 	} else {
 		DPRINTF(20, "%s: no kbdmux allocated\n", __func__);
 		idx0 = kbd_allocate("*", -1, vd, vt_kbdevent, vd);
 		if (idx0 < 0) {
 			DPRINTF(10, "%s: No keyboard found.\n", __func__);
 			return (-1);
 		}
 	}
 	vd->vd_keyboard = idx0;
 	DPRINTF(20, "%s: vd_keyboard = %d\n", __func__, vd->vd_keyboard);
 
 	if (vd->vd_curwindow == &vt_conswindow) {
 		for (i = 0; i < grabbed; ++i)
 			vtterm_cngrab(vd->vd_curwindow->vw_terminal);
 	}
 
 	return (idx0);
 }
 
 static void
 vtterm_bell(struct terminal *tm)
 {
 	struct vt_window *vw = tm->tm_softc;
 	struct vt_device *vd = vw->vw_device;
 
 	if (!vt_enable_bell)
 		return;
 
 	if (vd->vd_flags & VDF_QUIET_BELL)
 		return;
 
 	sysbeep(1193182 / VT_BELLPITCH, VT_BELLDURATION);
 }
 
 static void
 vtterm_beep(struct terminal *tm, u_int param)
 {
 	u_int freq, period;
 
 	if (!vt_enable_bell)
 		return;
 
 	if ((param == 0) || ((param & 0xffff) == 0)) {
 		vtterm_bell(tm);
 		return;
 	}
 
 	period = ((param >> 16) & 0xffff) * hz / 1000;
 	freq = 1193182 / (param & 0xffff);
 
 	sysbeep(freq, period);
 }
 
 static void
 vtterm_cursor(struct terminal *tm, const term_pos_t *p)
 {
 	struct vt_window *vw = tm->tm_softc;
 
 	vtbuf_cursor_position(&vw->vw_buf, p);
 }
 
 static void
 vtterm_putchar(struct terminal *tm, const term_pos_t *p, term_char_t c)
 {
 	struct vt_window *vw = tm->tm_softc;
 
 	vtbuf_putchar(&vw->vw_buf, p, c);
 }
 
 static void
 vtterm_fill(struct terminal *tm, const term_rect_t *r, term_char_t c)
 {
 	struct vt_window *vw = tm->tm_softc;
 
 	vtbuf_fill(&vw->vw_buf, r, c);
 }
 
 static void
 vtterm_copy(struct terminal *tm, const term_rect_t *r,
     const term_pos_t *p)
 {
 	struct vt_window *vw = tm->tm_softc;
 
 	vtbuf_copy(&vw->vw_buf, r, p);
 }
 
 static void
 vtterm_param(struct terminal *tm, int cmd, unsigned int arg)
 {
 	struct vt_window *vw = tm->tm_softc;
 
 	switch (cmd) {
 	case TP_SETLOCALCURSOR:
 		/*
 		 * 0 means normal (usually block), 1 means hidden, and
 		 * 2 means blinking (always block) for compatibility with
 		 * syscons.  We don't support any changes except hiding,
 		 * so must map 2 to 0.
 		 */
 		arg = (arg == 1) ? 0 : 1;
 		/* FALLTHROUGH */
 	case TP_SHOWCURSOR:
 		vtbuf_cursor_visibility(&vw->vw_buf, arg);
 		vt_resume_flush_timer(vw, 0);
 		break;
 	case TP_MOUSE:
 		vw->vw_mouse_level = arg;
 		break;
 	}
 }
 
 void
 vt_determine_colors(term_char_t c, int cursor,
     term_color_t *fg, term_color_t *bg)
 {
 	term_color_t tmp;
 	int invert;
 
 	invert = 0;
 
 	*fg = TCHAR_FGCOLOR(c);
 	if (TCHAR_FORMAT(c) & TF_BOLD)
 		*fg = TCOLOR_LIGHT(*fg);
 	*bg = TCHAR_BGCOLOR(c);
 	if (TCHAR_FORMAT(c) & TF_BLINK)
 		*bg = TCOLOR_LIGHT(*bg);
 
 	if (TCHAR_FORMAT(c) & TF_REVERSE)
 		invert ^= 1;
 	if (cursor)
 		invert ^= 1;
 
 	if (invert) {
 		tmp = *fg;
 		*fg = *bg;
 		*bg = tmp;
 	}
 }
 
 #ifndef SC_NO_CUTPASTE
 int
 vt_is_cursor_in_area(const struct vt_device *vd, const term_rect_t *area)
 {
 	unsigned int mx, my;
 
 	/*
 	 * We use the cursor position saved during the current refresh,
 	 * in case the cursor moved since.
 	 */
 	mx = vd->vd_mx_drawn + vd->vd_curwindow->vw_draw_area.tr_begin.tp_col;
 	my = vd->vd_my_drawn + vd->vd_curwindow->vw_draw_area.tr_begin.tp_row;
 
 	if (mx >= area->tr_end.tp_col ||
 	    mx + vd->vd_mcursor->width <= area->tr_begin.tp_col ||
 	    my >= area->tr_end.tp_row ||
 	    my + vd->vd_mcursor->height <= area->tr_begin.tp_row)
 		return (0);
 	return (1);
 }
 
 static void
 vt_mark_mouse_position_as_dirty(struct vt_device *vd, int locked)
 {
 	term_rect_t area;
 	struct vt_window *vw;
 	struct vt_font *vf;
 	int x, y;
 
 	vw = vd->vd_curwindow;
 	vf = vw->vw_font;
 
 	x = vd->vd_mx_drawn;
 	y = vd->vd_my_drawn;
 
 	if (vf != NULL) {
 		area.tr_begin.tp_col = x / vf->vf_width;
 		area.tr_begin.tp_row = y / vf->vf_height;
 		area.tr_end.tp_col =
 		    ((x + vd->vd_mcursor->width) / vf->vf_width) + 1;
 		area.tr_end.tp_row =
 		    ((y + vd->vd_mcursor->height) / vf->vf_height) + 1;
 	} else {
 		/*
 		 * No font loaded (ie. vt_vga operating in textmode).
 		 *
 		 * FIXME: This fake area needs to be revisited once the
 		 * mouse cursor is supported in vt_vga's textmode.
 		 */
 		area.tr_begin.tp_col = x;
 		area.tr_begin.tp_row = y;
 		area.tr_end.tp_col = x + 2;
 		area.tr_end.tp_row = y + 2;
 	}
 
 	if (!locked)
 		vtbuf_lock(&vw->vw_buf);
 	if (vd->vd_driver->vd_invalidate_text)
 		vd->vd_driver->vd_invalidate_text(vd, &area);
 	vtbuf_dirty(&vw->vw_buf, &area);
 	if (!locked)
 		vtbuf_unlock(&vw->vw_buf);
 }
 #endif
 
 static void
 vt_set_border(struct vt_device *vd, const term_rect_t *area,
     const term_color_t c)
 {
 	vd_drawrect_t *drawrect = vd->vd_driver->vd_drawrect;
 
 	if (drawrect == NULL)
 		return;
 
 	/* Top bar */
 	if (area->tr_begin.tp_row > 0)
 		drawrect(vd, 0, 0, vd->vd_width - 1,
 		    area->tr_begin.tp_row - 1, 1, c);
 
 	/* Left bar */
 	if (area->tr_begin.tp_col > 0)
 		drawrect(vd, 0, area->tr_begin.tp_row,
 		    area->tr_begin.tp_col - 1, area->tr_end.tp_row - 1, 1, c);
 
 	/* Right bar */
 	if (area->tr_end.tp_col < vd->vd_width)
 		drawrect(vd, area->tr_end.tp_col, area->tr_begin.tp_row,
 		    vd->vd_width - 1, area->tr_end.tp_row - 1, 1, c);
 
 	/* Bottom bar */
 	if (area->tr_end.tp_row < vd->vd_height)
 		drawrect(vd, 0, area->tr_end.tp_row, vd->vd_width - 1,
 		    vd->vd_height - 1, 1, c);
 }
 
 static int
 vt_flush(struct vt_device *vd)
 {
 	struct vt_window *vw;
 	struct vt_font *vf;
 	term_rect_t tarea;
 #ifndef SC_NO_CUTPASTE
 	int cursor_was_shown, cursor_moved;
 #endif
 
 	vw = vd->vd_curwindow;
 	if (vw == NULL)
 		return (0);
 
 	if (vd->vd_flags & VDF_SPLASH || vw->vw_flags & VWF_BUSY)
 		return (0);
 
 	vf = vw->vw_font;
 	if (((vd->vd_flags & VDF_TEXTMODE) == 0) && (vf == NULL))
 		return (0);
 
 	vtbuf_lock(&vw->vw_buf);
 
 #ifndef SC_NO_CUTPASTE
 	cursor_was_shown = vd->vd_mshown;
 	cursor_moved = (vd->vd_mx != vd->vd_mx_drawn ||
 	    vd->vd_my != vd->vd_my_drawn);
 
 	/* Check if the cursor should be displayed or not. */
 	if ((vd->vd_flags & VDF_MOUSECURSOR) && /* Mouse support enabled. */
 	    !(vw->vw_flags & VWF_MOUSE_HIDE) && /* Cursor displayed.      */
 	    !kdb_active && panicstr == NULL) {  /* DDB inactive.          */
 		vd->vd_mshown = 1;
 	} else {
 		vd->vd_mshown = 0;
 	}
 
 	/*
 	 * If the cursor changed display state or moved, we must mark
 	 * the old position as dirty, so that it's erased.
 	 */
 	if (cursor_was_shown != vd->vd_mshown ||
 	    (vd->vd_mshown && cursor_moved))
 		vt_mark_mouse_position_as_dirty(vd, true);
 
 	/*
          * Save position of the mouse cursor. It's used by backends to
          * know where to draw the cursor and during the next refresh to
          * erase the previous position.
 	 */
 	vd->vd_mx_drawn = vd->vd_mx;
 	vd->vd_my_drawn = vd->vd_my;
 
 	/*
 	 * If the cursor is displayed and has moved since last refresh,
 	 * mark the new position as dirty.
 	 */
 	if (vd->vd_mshown && cursor_moved)
 		vt_mark_mouse_position_as_dirty(vd, true);
 #endif
 
 	vtbuf_undirty(&vw->vw_buf, &tarea);
 
 	/* Force a full redraw when the screen contents might be invalid. */
 	if (vd->vd_flags & (VDF_INVALID | VDF_SUSPENDED)) {
 		vd->vd_flags &= ~VDF_INVALID;
 
 		vt_set_border(vd, &vw->vw_draw_area, TC_BLACK);
 		vt_termrect(vd, vf, &tarea);
 		if (vd->vd_driver->vd_invalidate_text)
 			vd->vd_driver->vd_invalidate_text(vd, &tarea);
 		if (vt_draw_logo_cpus)
 			vtterm_draw_cpu_logos(vd);
 	}
 
 	if (tarea.tr_begin.tp_col < tarea.tr_end.tp_col) {
 		vd->vd_driver->vd_bitblt_text(vd, vw, &tarea);
 		vtbuf_unlock(&vw->vw_buf);
 		return (1);
 	}
 
 	vtbuf_unlock(&vw->vw_buf);
 	return (0);
 }
 
 static void
 vt_timer(void *arg)
 {
 	struct vt_device *vd;
 	int changed;
 
 	vd = arg;
 	/* Update screen if required. */
 	changed = vt_flush(vd);
 
 	/* Schedule for next update. */
 	if (changed)
 		vt_schedule_flush(vd, 0);
 	else
 		vd->vd_timer_armed = 0;
 }
 
 static void
 vtterm_pre_input(struct terminal *tm)
 {
 	struct vt_window *vw = tm->tm_softc;
 
 	vtbuf_lock(&vw->vw_buf);
 }
 
 static void
 vtterm_post_input(struct terminal *tm)
 {
 	struct vt_window *vw = tm->tm_softc;
 
 	vtbuf_unlock(&vw->vw_buf);
 	vt_resume_flush_timer(vw, 0);
 }
 
 static void
 vtterm_done(struct terminal *tm)
 {
 	struct vt_window *vw = tm->tm_softc;
 	struct vt_device *vd = vw->vw_device;
 
 	if (kdb_active || panicstr != NULL) {
 		/* Switch to the debugger. */
 		if (vd->vd_curwindow != vw) {
 			vd->vd_curwindow = vw;
 			vd->vd_flags |= VDF_INVALID;
 			if (vd->vd_driver->vd_postswitch)
 				vd->vd_driver->vd_postswitch(vd);
 		}
 		vd->vd_flags &= ~VDF_SPLASH;
 		vt_flush(vd);
 	} else if (!(vd->vd_flags & VDF_ASYNC)) {
 		vt_flush(vd);
 	}
 }
 
 #ifdef DEV_SPLASH
 static void
 vtterm_splash(struct vt_device *vd)
 {
 	vt_axis_t top, left;
 
 	/* Display a nice boot splash. */
 	if (!(vd->vd_flags & VDF_TEXTMODE) && (boothowto & RB_MUTE)) {
 
 		top = (vd->vd_height - vt_logo_height) / 2;
 		left = (vd->vd_width - vt_logo_width) / 2;
 		switch (vt_logo_depth) {
 		case 1:
 			/* XXX: Unhardcode colors! */
 			vd->vd_driver->vd_bitblt_bmp(vd, vd->vd_curwindow,
 			    vt_logo_image, NULL, vt_logo_width, vt_logo_height,
 			    left, top, TC_WHITE, TC_BLACK);
 		}
 		vd->vd_flags |= VDF_SPLASH;
 	}
 }
 #endif
 
 
 static void
 vtterm_cnprobe(struct terminal *tm, struct consdev *cp)
 {
 	struct vt_driver *vtd, **vtdlist, *vtdbest = NULL;
 	struct vt_window *vw = tm->tm_softc;
 	struct vt_device *vd = vw->vw_device;
 	struct winsize wsz;
 	term_attr_t attr;
 	term_char_t c;
 
 	if (!vty_enabled(VTY_VT))
 		return;
 
 	if (vd->vd_flags & VDF_INITIALIZED)
 		/* Initialization already done. */
 		return;
 
 	SET_FOREACH(vtdlist, vt_drv_set) {
 		vtd = *vtdlist;
 		if (vtd->vd_probe == NULL)
 			continue;
 		if (vtd->vd_probe(vd) == CN_DEAD)
 			continue;
 		if ((vtdbest == NULL) ||
 		    (vtd->vd_priority > vtdbest->vd_priority))
 			vtdbest = vtd;
 	}
 	if (vtdbest == NULL) {
 		cp->cn_pri = CN_DEAD;
 		vd->vd_flags |= VDF_DEAD;
 	} else {
 		vd->vd_driver = vtdbest;
 		cp->cn_pri = vd->vd_driver->vd_init(vd);
 	}
 
 	/* Check if driver's vt_init return CN_DEAD. */
 	if (cp->cn_pri == CN_DEAD) {
 		vd->vd_flags |= VDF_DEAD;
 	}
 
 	/* Initialize any early-boot keyboard drivers */
 	kbd_configure(KB_CONF_PROBE_ONLY);
 
 	vd->vd_unit = atomic_fetchadd_int(&vt_unit, 1);
 	vd->vd_windows[VT_CONSWINDOW] = vw;
 	sprintf(cp->cn_name, "ttyv%r", VT_UNIT(vw));
 
 	/* Attach default font if not in TEXTMODE. */
 	if ((vd->vd_flags & VDF_TEXTMODE) == 0) {
 		vw->vw_font = vtfont_ref(&vt_font_default);
 		vt_compute_drawable_area(vw);
 	}
 
 	/*
 	 * The original screen size was faked (_VTDEFW x _VTDEFH). Now
 	 * that we have the real viewable size, fix it in the static
 	 * buffer.
 	 */
 	if (vd->vd_width != 0 && vd->vd_height != 0)
 		vt_termsize(vd, vw->vw_font, &vw->vw_buf.vb_scr_size);
 
 	vtbuf_init_early(&vw->vw_buf);
 	vt_winsize(vd, vw->vw_font, &wsz);
 	c = (boothowto & RB_MUTE) == 0 ? TERMINAL_KERN_ATTR :
 	    TERMINAL_NORM_ATTR;
 	attr.ta_format = TCHAR_FORMAT(c);
 	attr.ta_fgcolor = TCHAR_FGCOLOR(c);
 	attr.ta_bgcolor = TCHAR_BGCOLOR(c);
 	terminal_set_winsize_blank(tm, &wsz, 1, &attr);
 
 	if (vtdbest != NULL) {
 #ifdef DEV_SPLASH
 		if (!vt_splash_cpu)
 			vtterm_splash(vd);
 #endif
 		vd->vd_flags |= VDF_INITIALIZED;
 	}
 }
 
 static int
 vtterm_cngetc(struct terminal *tm)
 {
 	struct vt_window *vw = tm->tm_softc;
 	struct vt_device *vd = vw->vw_device;
 	keyboard_t *kbd;
 	u_int c;
 
 	if (vw->vw_kbdsq && *vw->vw_kbdsq)
 		return (*vw->vw_kbdsq++);
 
 	/* Make sure the splash screen is not there. */
 	if (vd->vd_flags & VDF_SPLASH) {
 		/* Remove splash */
 		vd->vd_flags &= ~VDF_SPLASH;
 		/* Mark screen as invalid to force update */
 		vd->vd_flags |= VDF_INVALID;
 		vt_flush(vd);
 	}
 
 	/* Stripped down keyboard handler. */
 	kbd = kbd_get_keyboard(vd->vd_keyboard);
 	if (kbd == NULL)
 		return (-1);
 
 	/* Force keyboard input mode to K_XLATE */
 	vw->vw_kbdmode = K_XLATE;
 	vt_update_kbd_mode(vw, kbd);
 
 	/* Switch the keyboard to polling to make it work here. */
 	kbdd_poll(kbd, TRUE);
 	c = kbdd_read_char(kbd, 0);
 	kbdd_poll(kbd, FALSE);
 	if (c & RELKEY)
 		return (-1);
 
 	if (vw->vw_flags & VWF_SCROLL) {
 		vt_scrollmode_kbdevent(vw, c, 1/* Console mode */);
 		vt_flush(vd);
 		return (-1);
 	}
 
 	/* Stripped down handling of vt_kbdevent(), without locking, etc. */
 	if (c & SPCLKEY) {
 		switch (c) {
 		case SPCLKEY | SLK:
 			vt_save_kbd_state(vw, kbd);
 			if (vw->vw_kbdstate & SLKED) {
 				/* Turn scrolling on. */
 				vw->vw_flags |= VWF_SCROLL;
 				VTBUF_SLCK_ENABLE(&vw->vw_buf);
 			} else {
 				/* Turn scrolling off. */
 				vt_scroll(vw, 0, VHS_END);
 				vw->vw_flags &= ~VWF_SCROLL;
 				VTBUF_SLCK_DISABLE(&vw->vw_buf);
 			}
 			break;
 		/* XXX: KDB can handle history. */
 		case SPCLKEY | FKEY | F(50): /* Arrow up. */
 			vw->vw_kbdsq = "\x1b[A";
 			break;
 		case SPCLKEY | FKEY | F(58): /* Arrow down. */
 			vw->vw_kbdsq = "\x1b[B";
 			break;
 		case SPCLKEY | FKEY | F(55): /* Arrow right. */
 			vw->vw_kbdsq = "\x1b[C";
 			break;
 		case SPCLKEY | FKEY | F(53): /* Arrow left. */
 			vw->vw_kbdsq = "\x1b[D";
 			break;
 		}
 
 		/* Force refresh to make scrollback work. */
 		vt_flush(vd);
 	} else if (KEYFLAGS(c) == 0) {
 		return (KEYCHAR(c));
 	}
 
 	if (vw->vw_kbdsq && *vw->vw_kbdsq)
 		return (*vw->vw_kbdsq++);
 
 	return (-1);
 }
 
 static void
 vtterm_cngrab(struct terminal *tm)
 {
 	struct vt_device *vd;
 	struct vt_window *vw;
 	keyboard_t *kbd;
 
 	vw = tm->tm_softc;
 	vd = vw->vw_device;
 
 	if (!cold)
 		vt_window_switch(vw);
 
 	kbd = kbd_get_keyboard(vd->vd_keyboard);
 	if (kbd == NULL)
 		return;
 
 	if (vw->vw_grabbed++ > 0)
 		return;
 
 	/*
 	 * Make sure the keyboard is accessible even when the kbd device
 	 * driver is disabled.
 	 */
 	kbdd_enable(kbd);
 
 	/* We shall always use the keyboard in the XLATE mode here. */
 	vw->vw_prev_kbdmode = vw->vw_kbdmode;
 	vw->vw_kbdmode = K_XLATE;
 	vt_update_kbd_mode(vw, kbd);
 
 	kbdd_poll(kbd, TRUE);
 }
 
 static void
 vtterm_cnungrab(struct terminal *tm)
 {
 	struct vt_device *vd;
 	struct vt_window *vw;
 	keyboard_t *kbd;
 
 	vw = tm->tm_softc;
 	vd = vw->vw_device;
 
 	kbd = kbd_get_keyboard(vd->vd_keyboard);
 	if (kbd == NULL)
 		return;
 
 	if (--vw->vw_grabbed > 0)
 		return;
 
 	kbdd_poll(kbd, FALSE);
 
 	vw->vw_kbdmode = vw->vw_prev_kbdmode;
 	vt_update_kbd_mode(vw, kbd);
 	kbdd_disable(kbd);
 }
 
 static void
 vtterm_opened(struct terminal *tm, int opened)
 {
 	struct vt_window *vw = tm->tm_softc;
 	struct vt_device *vd = vw->vw_device;
 
 	VT_LOCK(vd);
 	vd->vd_flags &= ~VDF_SPLASH;
 	if (opened)
 		vw->vw_flags |= VWF_OPENED;
 	else {
 		vw->vw_flags &= ~VWF_OPENED;
 		/* TODO: finish ACQ/REL */
 	}
 	VT_UNLOCK(vd);
 }
 
 static int
 vt_change_font(struct vt_window *vw, struct vt_font *vf)
 {
 	struct vt_device *vd = vw->vw_device;
 	struct terminal *tm = vw->vw_terminal;
 	term_pos_t size;
 	struct winsize wsz;
 
 	/*
 	 * Changing fonts.
 	 *
 	 * Changing fonts is a little tricky.  We must prevent
 	 * simultaneous access to the device, so we must stop
 	 * the display timer and the terminal from accessing.
 	 * We need to switch fonts and grow our screen buffer.
 	 *
 	 * XXX: Right now the code uses terminal_mute() to
 	 * prevent data from reaching the console driver while
 	 * resizing the screen buffer.  This isn't elegant...
 	 */
 
 	VT_LOCK(vd);
 	if (vw->vw_flags & VWF_BUSY) {
 		/* Another process is changing the font. */
 		VT_UNLOCK(vd);
 		return (EBUSY);
 	}
 	vw->vw_flags |= VWF_BUSY;
 	VT_UNLOCK(vd);
 
 	vt_termsize(vd, vf, &size);
 	vt_winsize(vd, vf, &wsz);
 
 	/* Grow the screen buffer and terminal. */
 	terminal_mute(tm, 1);
 	vtbuf_grow(&vw->vw_buf, &size, vw->vw_buf.vb_history_size);
 	terminal_set_winsize_blank(tm, &wsz, 0, NULL);
 	terminal_set_cursor(tm, &vw->vw_buf.vb_cursor);
 	terminal_mute(tm, 0);
 
 	/* Actually apply the font to the current window. */
 	VT_LOCK(vd);
 	if (vw->vw_font != vf && vw->vw_font != NULL && vf != NULL) {
 		/*
 		 * In case vt_change_font called to update size we don't need
 		 * to update font link.
 		 */
 		vtfont_unref(vw->vw_font);
 		vw->vw_font = vtfont_ref(vf);
 	}
 
 	/*
 	 * Compute the drawable area and move the mouse cursor inside
 	 * it, in case the new area is smaller than the previous one.
 	 */
 	vt_compute_drawable_area(vw);
 	vd->vd_mx = min(vd->vd_mx,
 	    vw->vw_draw_area.tr_end.tp_col -
 	    vw->vw_draw_area.tr_begin.tp_col - 1);
 	vd->vd_my = min(vd->vd_my,
 	    vw->vw_draw_area.tr_end.tp_row -
 	    vw->vw_draw_area.tr_begin.tp_row - 1);
 
 	/* Force a full redraw the next timer tick. */
 	if (vd->vd_curwindow == vw) {
 		vd->vd_flags |= VDF_INVALID;
 		vt_resume_flush_timer(vw, 0);
 	}
 	vw->vw_flags &= ~VWF_BUSY;
 	VT_UNLOCK(vd);
 	return (0);
 }
 
 static int
 vt_proc_alive(struct vt_window *vw)
 {
 	struct proc *p;
 
 	if (vw->vw_smode.mode != VT_PROCESS)
 		return (FALSE);
 
 	if (vw->vw_proc) {
 		if ((p = pfind(vw->vw_pid)) != NULL)
 			PROC_UNLOCK(p);
 		if (vw->vw_proc == p)
 			return (TRUE);
 		vw->vw_proc = NULL;
 		vw->vw_smode.mode = VT_AUTO;
 		DPRINTF(1, "vt controlling process %d died\n", vw->vw_pid);
 		vw->vw_pid = 0;
 	}
 	return (FALSE);
 }
 
 static int
 signal_vt_rel(struct vt_window *vw)
 {
 
 	if (vw->vw_smode.mode != VT_PROCESS)
 		return (FALSE);
 	if (vw->vw_proc == NULL || vt_proc_alive(vw) == FALSE) {
 		vw->vw_proc = NULL;
 		vw->vw_pid = 0;
 		return (TRUE);
 	}
 	vw->vw_flags |= VWF_SWWAIT_REL;
 	PROC_LOCK(vw->vw_proc);
 	kern_psignal(vw->vw_proc, vw->vw_smode.relsig);
 	PROC_UNLOCK(vw->vw_proc);
 	DPRINTF(1, "sending relsig to %d\n", vw->vw_pid);
 	return (TRUE);
 }
 
 static int
 signal_vt_acq(struct vt_window *vw)
 {
 
 	if (vw->vw_smode.mode != VT_PROCESS)
 		return (FALSE);
 	if (vw == vw->vw_device->vd_windows[VT_CONSWINDOW])
 		cnavailable(vw->vw_terminal->consdev, FALSE);
 	if (vw->vw_proc == NULL || vt_proc_alive(vw) == FALSE) {
 		vw->vw_proc = NULL;
 		vw->vw_pid = 0;
 		return (TRUE);
 	}
 	vw->vw_flags |= VWF_SWWAIT_ACQ;
 	PROC_LOCK(vw->vw_proc);
 	kern_psignal(vw->vw_proc, vw->vw_smode.acqsig);
 	PROC_UNLOCK(vw->vw_proc);
 	DPRINTF(1, "sending acqsig to %d\n", vw->vw_pid);
 	return (TRUE);
 }
 
 static int
 finish_vt_rel(struct vt_window *vw, int release, int *s)
 {
 
 	if (vw->vw_flags & VWF_SWWAIT_REL) {
 		vw->vw_flags &= ~VWF_SWWAIT_REL;
 		if (release) {
 			callout_drain(&vw->vw_proc_dead_timer);
-			vt_late_window_switch(vw->vw_switch_to);
+			(void)vt_late_window_switch(vw->vw_switch_to);
 		}
 		return (0);
 	}
 	return (EINVAL);
 }
 
 static int
 finish_vt_acq(struct vt_window *vw)
 {
 
 	if (vw->vw_flags & VWF_SWWAIT_ACQ) {
 		vw->vw_flags &= ~VWF_SWWAIT_ACQ;
 		return (0);
 	}
 	return (EINVAL);
 }
 
 #ifndef SC_NO_CUTPASTE
 static void
 vt_mouse_terminput_button(struct vt_device *vd, int button)
 {
 	struct vt_window *vw;
 	struct vt_font *vf;
 	char mouseb[6] = "\x1B[M";
 	int i, x, y;
 
 	vw = vd->vd_curwindow;
 	vf = vw->vw_font;
 
 	/* Translate to char position. */
 	x = vd->vd_mx / vf->vf_width;
 	y = vd->vd_my / vf->vf_height;
 	/* Avoid overflow. */
 	x = MIN(x, 255 - '!');
 	y = MIN(y, 255 - '!');
 
 	mouseb[3] = ' ' + button;
 	mouseb[4] = '!' + x;
 	mouseb[5] = '!' + y;
 
 	for (i = 0; i < sizeof(mouseb); i++)
 		terminal_input_char(vw->vw_terminal, mouseb[i]);
 }
 
 static void
 vt_mouse_terminput(struct vt_device *vd, int type, int x, int y, int event,
     int cnt)
 {
 
 	switch (type) {
 	case MOUSE_BUTTON_EVENT:
 		if (cnt > 0) {
 			/* Mouse button pressed. */
 			if (event & MOUSE_BUTTON1DOWN)
 				vt_mouse_terminput_button(vd, 0);
 			if (event & MOUSE_BUTTON2DOWN)
 				vt_mouse_terminput_button(vd, 1);
 			if (event & MOUSE_BUTTON3DOWN)
 				vt_mouse_terminput_button(vd, 2);
 		} else {
 			/* Mouse button released. */
 			vt_mouse_terminput_button(vd, 3);
 		}
 		break;
 #ifdef notyet
 	case MOUSE_MOTION_EVENT:
 		if (mouse->u.data.z < 0) {
 			/* Scroll up. */
 			sc_mouse_input_button(vd, 64);
 		} else if (mouse->u.data.z > 0) {
 			/* Scroll down. */
 			sc_mouse_input_button(vd, 65);
 		}
 		break;
 #endif
 	}
 }
 
 static void
 vt_mouse_paste()
 {
 	term_char_t *buf;
 	int i, len;
 
 	len = VD_PASTEBUFLEN(main_vd);
 	buf = VD_PASTEBUF(main_vd);
 	len /= sizeof(term_char_t);
 	for (i = 0; i < len; i++) {
 		if (buf[i] == '\0')
 			continue;
 		terminal_input_char(main_vd->vd_curwindow->vw_terminal,
 		    buf[i]);
 	}
 }
 
 void
 vt_mouse_event(int type, int x, int y, int event, int cnt, int mlevel)
 {
 	struct vt_device *vd;
 	struct vt_window *vw;
 	struct vt_font *vf;
 	term_pos_t size;
 	int len, mark;
 
 	vd = main_vd;
 	vw = vd->vd_curwindow;
 	vf = vw->vw_font;
 	mark = 0;
 
 	if (vw->vw_flags & (VWF_MOUSE_HIDE | VWF_GRAPHICS))
 		/*
 		 * Either the mouse is disabled, or the window is in
 		 * "graphics mode". The graphics mode is usually set by
 		 * an X server, using the KDSETMODE ioctl.
 		 */
 		return;
 
 	if (vf == NULL)	/* Text mode. */
 		return;
 
 	/*
 	 * TODO: add flag about pointer position changed, to not redraw chars
 	 * under mouse pointer when nothing changed.
 	 */
 
 	if (vw->vw_mouse_level > 0)
 		vt_mouse_terminput(vd, type, x, y, event, cnt);
 
 	switch (type) {
 	case MOUSE_ACTION:
 	case MOUSE_MOTION_EVENT:
 		/* Movement */
 		x += vd->vd_mx;
 		y += vd->vd_my;
 
 		vt_termsize(vd, vf, &size);
 
 		/* Apply limits. */
 		x = MAX(x, 0);
 		y = MAX(y, 0);
 		x = MIN(x, (size.tp_col * vf->vf_width) - 1);
 		y = MIN(y, (size.tp_row * vf->vf_height) - 1);
 
 		vd->vd_mx = x;
 		vd->vd_my = y;
 		if (vd->vd_mstate & MOUSE_BUTTON1DOWN)
 			vtbuf_set_mark(&vw->vw_buf, VTB_MARK_MOVE,
 			    vd->vd_mx / vf->vf_width,
 			    vd->vd_my / vf->vf_height);
 
 		vt_resume_flush_timer(vw, 0);
 		return; /* Done */
 	case MOUSE_BUTTON_EVENT:
 		/* Buttons */
 		break;
 	default:
 		return; /* Done */
 	}
 
 	switch (event) {
 	case MOUSE_BUTTON1DOWN:
 		switch (cnt % 4) {
 		case 0:	/* up */
 			mark = VTB_MARK_END;
 			break;
 		case 1: /* single click: start cut operation */
 			mark = VTB_MARK_START;
 			break;
 		case 2:	/* double click: cut a word */
 			mark = VTB_MARK_WORD;
 			break;
 		case 3:	/* triple click: cut a line */
 			mark = VTB_MARK_ROW;
 			break;
 		}
 		break;
 	case VT_MOUSE_PASTEBUTTON:
 		switch (cnt) {
 		case 0:	/* up */
 			break;
 		default:
 			vt_mouse_paste();
 			break;
 		}
 		return; /* Done */
 	case VT_MOUSE_EXTENDBUTTON:
 		switch (cnt) {
 		case 0:	/* up */
 			if (!(vd->vd_mstate & MOUSE_BUTTON1DOWN))
 				mark = VTB_MARK_EXTEND;
 			else
 				mark = 0;
 			break;
 		default:
 			mark = VTB_MARK_EXTEND;
 			break;
 		}
 		break;
 	default:
 		return; /* Done */
 	}
 
 	/* Save buttons state. */
 	if (cnt > 0)
 		vd->vd_mstate |= event;
 	else
 		vd->vd_mstate &= ~event;
 
 	if (vtbuf_set_mark(&vw->vw_buf, mark, vd->vd_mx / vf->vf_width,
 	    vd->vd_my / vf->vf_height) == 1) {
 		/*
 		 * We have something marked to copy, so update pointer to
 		 * window with selection.
 		 */
 		vt_resume_flush_timer(vw, 0);
 
 		switch (mark) {
 		case VTB_MARK_END:
 		case VTB_MARK_WORD:
 		case VTB_MARK_ROW:
 		case VTB_MARK_EXTEND:
 			break;
 		default:
 			/* Other types of mark do not require to copy data. */
 			return;
 		}
 
 		/* Get current selection size in bytes. */
 		len = vtbuf_get_marked_len(&vw->vw_buf);
 		if (len <= 0)
 			return;
 
 		/* Reallocate buffer only if old one is too small. */
 		if (len > VD_PASTEBUFSZ(vd)) {
 			VD_PASTEBUF(vd) = realloc(VD_PASTEBUF(vd), len, M_VT,
 			    M_WAITOK | M_ZERO);
 			/* Update buffer size. */
 			VD_PASTEBUFSZ(vd) = len;
 		}
 		/* Request copy/paste buffer data, no more than `len' */
 		vtbuf_extract_marked(&vw->vw_buf, VD_PASTEBUF(vd),
 		    VD_PASTEBUFSZ(vd));
 
 		VD_PASTEBUFLEN(vd) = len;
 
 		/* XXX VD_PASTEBUF(vd) have to be freed on shutdown/unload. */
 	}
 }
 
 void
 vt_mouse_state(int show)
 {
 	struct vt_device *vd;
 	struct vt_window *vw;
 
 	vd = main_vd;
 	vw = vd->vd_curwindow;
 
 	switch (show) {
 	case VT_MOUSE_HIDE:
 		vw->vw_flags |= VWF_MOUSE_HIDE;
 		break;
 	case VT_MOUSE_SHOW:
 		vw->vw_flags &= ~VWF_MOUSE_HIDE;
 		break;
 	}
 
 	/* Mark mouse position as dirty. */
 	vt_mark_mouse_position_as_dirty(vd, false);
 	vt_resume_flush_timer(vw, 0);
 }
 #endif
 
 static int
 vtterm_mmap(struct terminal *tm, vm_ooffset_t offset, vm_paddr_t * paddr,
     int nprot, vm_memattr_t *memattr)
 {
 	struct vt_window *vw = tm->tm_softc;
 	struct vt_device *vd = vw->vw_device;
 
 	if (vd->vd_driver->vd_fb_mmap)
 		return (vd->vd_driver->vd_fb_mmap(vd, offset, paddr, nprot,
 		    memattr));
 
 	return (ENXIO);
 }
 
 static int
 vtterm_ioctl(struct terminal *tm, u_long cmd, caddr_t data,
     struct thread *td)
 {
 	struct vt_window *vw = tm->tm_softc;
 	struct vt_device *vd = vw->vw_device;
 	keyboard_t *kbd;
 	int error, i, s;
 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
 	int ival;
 
 	switch (cmd) {
 	case _IO('v', 4):
 		cmd = VT_RELDISP;
 		break;
 	case _IO('v', 5):
 		cmd = VT_ACTIVATE;
 		break;
 	case _IO('v', 6):
 		cmd = VT_WAITACTIVE;
 		break;
 	case _IO('K', 20):
 		cmd = KDSKBSTATE;
 		break;
 	case _IO('K', 67):
 		cmd = KDSETRAD;
 		break;
 	case _IO('K', 7):
 		cmd = KDSKBMODE;
 		break;
 	case _IO('K', 8):
 		cmd = KDMKTONE;
 		break;
 	case _IO('K', 10):
 		cmd = KDSETMODE;
 		break;
 	case _IO('K', 13):
 		cmd = KDSBORDER;
 		break;
 	case _IO('K', 63):
 		cmd = KIOCSOUND;
 		break;
 	case _IO('K', 66):
 		cmd = KDSETLED;
 		break;
 	case _IO('c', 104):
 		cmd = CONS_SETWINORG;
 		break;
 	case _IO('c', 110):
 		cmd = CONS_SETKBD;
 		break;
 	default:
 		goto skip_thunk;
 	}
 	ival = IOCPARM_IVAL(data);
 	data = (caddr_t)&ival;
 skip_thunk:
 #endif
 
 	switch (cmd) {
 	case KDSETRAD:		/* set keyboard repeat & delay rates (old) */
 		if (*(int *)data & ~0x7f)
 			return (EINVAL);
 		/* FALLTHROUGH */
 	case GIO_KEYMAP:
 	case PIO_KEYMAP:
 	case GIO_DEADKEYMAP:
 	case PIO_DEADKEYMAP:
 	case GETFKEY:
 	case SETFKEY:
 	case KDGKBINFO:
 	case KDGKBTYPE:
 	case KDGETREPEAT:	/* get keyboard repeat & delay rates */
 	case KDSETREPEAT:	/* set keyboard repeat & delay rates (new) */
 	case KBADDKBD:		/* add/remove keyboard to/from mux */
 	case KBRELKBD: {
 		error = 0;
 
 		mtx_lock(&Giant);
 		kbd = kbd_get_keyboard(vd->vd_keyboard);
 		if (kbd != NULL)
 			error = kbdd_ioctl(kbd, cmd, data);
 		mtx_unlock(&Giant);
 		if (error == ENOIOCTL) {
 			if (cmd == KDGKBTYPE) {
 				/* always return something? XXX */
 				*(int *)data = 0;
 			} else {
 				return (ENODEV);
 			}
 		}
 		return (error);
 	}
 	case KDGKBSTATE: {	/* get keyboard state (locks) */
 		error = 0;
 
 		if (vw == vd->vd_curwindow) {
 			mtx_lock(&Giant);
 			kbd = kbd_get_keyboard(vd->vd_keyboard);
 			if (kbd != NULL)
 				error = vt_save_kbd_state(vw, kbd);
 			mtx_unlock(&Giant);
 
 			if (error != 0)
 				return (error);
 		}
 
 		*(int *)data = vw->vw_kbdstate & LOCK_MASK;
 
 		return (error);
 	}
 	case KDSKBSTATE: {	/* set keyboard state (locks) */
 		int state;
 
 		state = *(int *)data;
 		if (state & ~LOCK_MASK)
 			return (EINVAL);
 
 		vw->vw_kbdstate &= ~LOCK_MASK;
 		vw->vw_kbdstate |= state;
 
 		error = 0;
 		if (vw == vd->vd_curwindow) {
 			mtx_lock(&Giant);
 			kbd = kbd_get_keyboard(vd->vd_keyboard);
 			if (kbd != NULL)
 				error = vt_update_kbd_state(vw, kbd);
 			mtx_unlock(&Giant);
 		}
 
 		return (error);
 	}
 	case KDGETLED: {	/* get keyboard LED status */
 		error = 0;
 
 		if (vw == vd->vd_curwindow) {
 			mtx_lock(&Giant);
 			kbd = kbd_get_keyboard(vd->vd_keyboard);
 			if (kbd != NULL)
 				error = vt_save_kbd_leds(vw, kbd);
 			mtx_unlock(&Giant);
 
 			if (error != 0)
 				return (error);
 		}
 
 		*(int *)data = vw->vw_kbdstate & LED_MASK;
 
 		return (error);
 	}
 	case KDSETLED: {	/* set keyboard LED status */
 		int leds;
 
 		leds = *(int *)data;
 		if (leds & ~LED_MASK)
 			return (EINVAL);
 
 		vw->vw_kbdstate &= ~LED_MASK;
 		vw->vw_kbdstate |= leds;
 
 		error = 0;
 		if (vw == vd->vd_curwindow) {
 			mtx_lock(&Giant);
 			kbd = kbd_get_keyboard(vd->vd_keyboard);
 			if (kbd != NULL)
 				error = vt_update_kbd_leds(vw, kbd);
 			mtx_unlock(&Giant);
 		}
 
 		return (error);
 	}
 	case KDGETMODE:
 		*(int *)data = (vw->vw_flags & VWF_GRAPHICS) ?
 		    KD_GRAPHICS : KD_TEXT;
 		return (0);
 	case KDGKBMODE: {
 		error = 0;
 
 		if (vw == vd->vd_curwindow) {
 			mtx_lock(&Giant);
 			kbd = kbd_get_keyboard(vd->vd_keyboard);
 			if (kbd != NULL)
 				error = vt_save_kbd_mode(vw, kbd);
 			mtx_unlock(&Giant);
 
 			if (error != 0)
 				return (error);
 		}
 
 		*(int *)data = vw->vw_kbdmode;
 
 		return (error);
 	}
 	case KDSKBMODE: {
 		int mode;
 
 		mode = *(int *)data;
 		switch (mode) {
 		case K_XLATE:
 		case K_RAW:
 		case K_CODE:
 			vw->vw_kbdmode = mode;
 
 			error = 0;
 			if (vw == vd->vd_curwindow) {
 				mtx_lock(&Giant);
 				kbd = kbd_get_keyboard(vd->vd_keyboard);
 				if (kbd != NULL)
 					error = vt_update_kbd_mode(vw, kbd);
 				mtx_unlock(&Giant);
 			}
 
 			return (error);
 		default:
 			return (EINVAL);
 		}
 	}
 	case FBIOGTYPE:
 	case FBIO_GETWINORG:	/* get frame buffer window origin */
 	case FBIO_GETDISPSTART:	/* get display start address */
 	case FBIO_GETLINEWIDTH:	/* get scan line width in bytes */
 	case FBIO_BLANK:	/* blank display */
 		if (vd->vd_driver->vd_fb_ioctl)
 			return (vd->vd_driver->vd_fb_ioctl(vd, cmd, data, td));
 		break;
 	case CONS_BLANKTIME:
 		/* XXX */
 		return (0);
 	case CONS_HISTORY:
 		if (*(int *)data < 0)
 			return EINVAL;
 		if (*(int *)data != vd->vd_curwindow->vw_buf.vb_history_size)
 			vtbuf_sethistory_size(&vd->vd_curwindow->vw_buf,
 			    *(int *)data);
 		return 0;
 	case CONS_GET:
 		/* XXX */
 		*(int *)data = M_CG640x480;
 		return (0);
 	case CONS_BELLTYPE:	/* set bell type sound */
 		if ((*(int *)data) & CONS_QUIET_BELL)
 			vd->vd_flags |= VDF_QUIET_BELL;
 		else
 			vd->vd_flags &= ~VDF_QUIET_BELL;
 		return (0);
 	case CONS_GETINFO: {
 		vid_info_t *vi = (vid_info_t *)data;
 		if (vi->size != sizeof(struct vid_info))
 			return (EINVAL);
 
 		if (vw == vd->vd_curwindow) {
 			mtx_lock(&Giant);
 			kbd = kbd_get_keyboard(vd->vd_keyboard);
 			if (kbd != NULL)
 				vt_save_kbd_state(vw, kbd);
 			mtx_unlock(&Giant);
 		}
 
 		vi->m_num = vd->vd_curwindow->vw_number + 1;
 		vi->mk_keylock = vw->vw_kbdstate & LOCK_MASK;
 		/* XXX: other fields! */
 		return (0);
 	}
 	case CONS_GETVERS:
 		*(int *)data = 0x200;
 		return (0);
 	case CONS_MODEINFO:
 		/* XXX */
 		return (0);
 	case CONS_MOUSECTL: {
 		mouse_info_t *mouse = (mouse_info_t*)data;
 
 		/*
 		 * All the commands except MOUSE_SHOW nd MOUSE_HIDE
 		 * should not be applied to individual TTYs, but only to
 		 * consolectl.
 		 */
 		switch (mouse->operation) {
 		case MOUSE_HIDE:
 			if (vd->vd_flags & VDF_MOUSECURSOR) {
 				vd->vd_flags &= ~VDF_MOUSECURSOR;
 #ifndef SC_NO_CUTPASTE
 				vt_mouse_state(VT_MOUSE_HIDE);
 #endif
 			}
 			return (0);
 		case MOUSE_SHOW:
 			if (!(vd->vd_flags & VDF_MOUSECURSOR)) {
 				vd->vd_flags |= VDF_MOUSECURSOR;
 				vd->vd_mx = vd->vd_width / 2;
 				vd->vd_my = vd->vd_height / 2;
 #ifndef SC_NO_CUTPASTE
 				vt_mouse_state(VT_MOUSE_SHOW);
 #endif
 			}
 			return (0);
 		default:
 			return (EINVAL);
 		}
 	}
 	case PIO_VFONT: {
 		struct vt_font *vf;
 
 		if (vd->vd_flags & VDF_TEXTMODE)
 			return (ENOTSUP);
 
 		error = vtfont_load((void *)data, &vf);
 		if (error != 0)
 			return (error);
 
 		error = vt_change_font(vw, vf);
 		vtfont_unref(vf);
 		return (error);
 	}
 	case PIO_VFONT_DEFAULT: {
 		/* Reset to default font. */
 		error = vt_change_font(vw, &vt_font_default);
 		return (error);
 	}
 	case GIO_SCRNMAP: {
 		scrmap_t *sm = (scrmap_t *)data;
 
 		/* We don't have screen maps, so return a handcrafted one. */
 		for (i = 0; i < 256; i++)
 			sm->scrmap[i] = i;
 		return (0);
 	}
 	case KDSETMODE:
 		/*
 		 * FIXME: This implementation is incomplete compared to
 		 * syscons.
 		 */
 		switch (*(int *)data) {
 		case KD_TEXT:
 		case KD_TEXT1:
 		case KD_PIXEL:
 			vw->vw_flags &= ~VWF_GRAPHICS;
 			break;
 		case KD_GRAPHICS:
 			vw->vw_flags |= VWF_GRAPHICS;
 			break;
 		}
 		return (0);
 	case KDENABIO:		/* allow io operations */
 		error = priv_check(td, PRIV_IO);
 		if (error != 0)
 			return (error);
 		error = securelevel_gt(td->td_ucred, 0);
 		if (error != 0)
 			return (error);
 #if defined(__i386__)
 		td->td_frame->tf_eflags |= PSL_IOPL;
 #elif defined(__amd64__)
 		td->td_frame->tf_rflags |= PSL_IOPL;
 #endif
 		return (0);
 	case KDDISABIO:		/* disallow io operations (default) */
 #if defined(__i386__)
 		td->td_frame->tf_eflags &= ~PSL_IOPL;
 #elif defined(__amd64__)
 		td->td_frame->tf_rflags &= ~PSL_IOPL;
 #endif
 		return (0);
 	case KDMKTONE:		/* sound the bell */
 		vtterm_beep(tm, *(u_int *)data);
 		return (0);
 	case KIOCSOUND:		/* make tone (*data) hz */
 		/* TODO */
 		return (0);
 	case CONS_SETKBD:	/* set the new keyboard */
 		mtx_lock(&Giant);
 		error = 0;
 		if (vd->vd_keyboard != *(int *)data) {
 			kbd = kbd_get_keyboard(*(int *)data);
 			if (kbd == NULL) {
 				mtx_unlock(&Giant);
 				return (EINVAL);
 			}
 			i = kbd_allocate(kbd->kb_name, kbd->kb_unit,
 			    (void *)vd, vt_kbdevent, vd);
 			if (i >= 0) {
 				if (vd->vd_keyboard != -1) {
 					kbd = kbd_get_keyboard(vd->vd_keyboard);
 					vt_save_kbd_state(vd->vd_curwindow, kbd);
 					kbd_release(kbd, (void *)vd);
 				}
 				kbd = kbd_get_keyboard(i);
 				vd->vd_keyboard = i;
 
 				vt_update_kbd_mode(vd->vd_curwindow, kbd);
 				vt_update_kbd_state(vd->vd_curwindow, kbd);
 			} else {
 				error = EPERM;	/* XXX */
 			}
 		}
 		mtx_unlock(&Giant);
 		return (error);
 	case CONS_RELKBD:	/* release the current keyboard */
 		mtx_lock(&Giant);
 		error = 0;
 		if (vd->vd_keyboard != -1) {
 			kbd = kbd_get_keyboard(vd->vd_keyboard);
 			if (kbd == NULL) {
 				mtx_unlock(&Giant);
 				return (EINVAL);
 			}
 			vt_save_kbd_state(vd->vd_curwindow, kbd);
 			error = kbd_release(kbd, (void *)vd);
 			if (error == 0) {
 				vd->vd_keyboard = -1;
 			}
 		}
 		mtx_unlock(&Giant);
 		return (error);
 	case VT_ACTIVATE: {
 		int win;
 		win = *(int *)data - 1;
 		DPRINTF(5, "%s%d: VT_ACTIVATE ttyv%d ", SC_DRIVER_NAME,
 		    VT_UNIT(vw), win);
 		if ((win >= VT_MAXWINDOWS) || (win < 0))
 			return (EINVAL);
 		return (vt_proc_window_switch(vd->vd_windows[win]));
 	}
 	case VT_GETACTIVE:
 		*(int *)data = vd->vd_curwindow->vw_number + 1;
 		return (0);
 	case VT_GETINDEX:
 		*(int *)data = vw->vw_number + 1;
 		return (0);
 	case VT_LOCKSWITCH:
 		/* TODO: Check current state, switching can be in progress. */
 		if ((*(int *)data) == 0x01)
 			vw->vw_flags |= VWF_VTYLOCK;
 		else if ((*(int *)data) == 0x02)
 			vw->vw_flags &= ~VWF_VTYLOCK;
 		else
 			return (EINVAL);
 		return (0);
 	case VT_OPENQRY:
 		VT_LOCK(vd);
 		for (i = 0; i < VT_MAXWINDOWS; i++) {
 			vw = vd->vd_windows[i];
 			if (vw == NULL)
 				continue;
 			if (!(vw->vw_flags & VWF_OPENED)) {
 				*(int *)data = vw->vw_number + 1;
 				VT_UNLOCK(vd);
 				return (0);
 			}
 		}
 		VT_UNLOCK(vd);
 		return (EINVAL);
 	case VT_WAITACTIVE: {
 		unsigned int idx;
 
 		error = 0;
 
 		idx = *(unsigned int *)data;
 		if (idx > VT_MAXWINDOWS)
 			return (EINVAL);
 		if (idx > 0)
 			vw = vd->vd_windows[idx - 1];
 
 		VT_LOCK(vd);
 		while (vd->vd_curwindow != vw && error == 0)
 			error = cv_wait_sig(&vd->vd_winswitch, &vd->vd_lock);
 		VT_UNLOCK(vd);
 		return (error);
 	}
 	case VT_SETMODE: {	/* set screen switcher mode */
 		struct vt_mode *mode;
 		struct proc *p1;
 
 		mode = (struct vt_mode *)data;
 		DPRINTF(5, "%s%d: VT_SETMODE ", SC_DRIVER_NAME, VT_UNIT(vw));
 		if (vw->vw_smode.mode == VT_PROCESS) {
 			p1 = pfind(vw->vw_pid);
 			if (vw->vw_proc == p1 && vw->vw_proc != td->td_proc) {
 				if (p1)
 					PROC_UNLOCK(p1);
 				DPRINTF(5, "error EPERM\n");
 				return (EPERM);
 			}
 			if (p1)
 				PROC_UNLOCK(p1);
 		}
 		if (mode->mode == VT_AUTO) {
 			vw->vw_smode.mode = VT_AUTO;
 			vw->vw_proc = NULL;
 			vw->vw_pid = 0;
 			DPRINTF(5, "VT_AUTO, ");
 			if (vw == vw->vw_device->vd_windows[VT_CONSWINDOW])
 				cnavailable(vw->vw_terminal->consdev, TRUE);
 			/* were we in the middle of the vty switching process? */
 			if (finish_vt_rel(vw, TRUE, &s) == 0)
 				DPRINTF(5, "reset WAIT_REL, ");
 			if (finish_vt_acq(vw) == 0)
 				DPRINTF(5, "reset WAIT_ACQ, ");
 			return (0);
 		} else if (mode->mode == VT_PROCESS) {
 			if (!ISSIGVALID(mode->relsig) ||
 			    !ISSIGVALID(mode->acqsig) ||
 			    !ISSIGVALID(mode->frsig)) {
 				DPRINTF(5, "error EINVAL\n");
 				return (EINVAL);
 			}
 			DPRINTF(5, "VT_PROCESS %d, ", td->td_proc->p_pid);
 			bcopy(data, &vw->vw_smode, sizeof(struct vt_mode));
 			vw->vw_proc = td->td_proc;
 			vw->vw_pid = vw->vw_proc->p_pid;
 			if (vw == vw->vw_device->vd_windows[VT_CONSWINDOW])
 				cnavailable(vw->vw_terminal->consdev, FALSE);
 		} else {
 			DPRINTF(5, "VT_SETMODE failed, unknown mode %d\n",
 			    mode->mode);
 			return (EINVAL);
 		}
 		DPRINTF(5, "\n");
 		return (0);
 	}
 	case VT_GETMODE:	/* get screen switcher mode */
 		bcopy(&vw->vw_smode, data, sizeof(struct vt_mode));
 		return (0);
 
 	case VT_RELDISP:	/* screen switcher ioctl */
 		/*
 		 * This must be the current vty which is in the VT_PROCESS
 		 * switching mode...
 		 */
 		if ((vw != vd->vd_curwindow) || (vw->vw_smode.mode !=
 		    VT_PROCESS)) {
 			return (EINVAL);
 		}
 		/* ...and this process is controlling it. */
 		if (vw->vw_proc != td->td_proc) {
 			return (EPERM);
 		}
 		error = EINVAL;
 		switch(*(int *)data) {
 		case VT_FALSE:	/* user refuses to release screen, abort */
 			if ((error = finish_vt_rel(vw, FALSE, &s)) == 0)
 				DPRINTF(5, "%s%d: VT_RELDISP: VT_FALSE\n",
 				    SC_DRIVER_NAME, VT_UNIT(vw));
 			break;
 		case VT_TRUE:	/* user has released screen, go on */
 			/* finish_vt_rel(..., TRUE, ...) should not be locked */
 			if (vw->vw_flags & VWF_SWWAIT_REL) {
 				if ((error = finish_vt_rel(vw, TRUE, &s)) == 0)
 					DPRINTF(5, "%s%d: VT_RELDISP: VT_TRUE\n",
 					    SC_DRIVER_NAME, VT_UNIT(vw));
 			} else {
 				error = EINVAL;
 			}
 			return (error);
 		case VT_ACKACQ:	/* acquire acknowledged, switch completed */
 			if ((error = finish_vt_acq(vw)) == 0)
 				DPRINTF(5, "%s%d: VT_RELDISP: VT_ACKACQ\n",
 				    SC_DRIVER_NAME, VT_UNIT(vw));
 			break;
 		default:
 			break;
 		}
 		return (error);
 	}
 
 	return (ENOIOCTL);
 }
 
 static struct vt_window *
 vt_allocate_window(struct vt_device *vd, unsigned int window)
 {
 	struct vt_window *vw;
 	struct terminal *tm;
 	term_pos_t size;
 	struct winsize wsz;
 
 	vw = malloc(sizeof *vw, M_VT, M_WAITOK|M_ZERO);
 	vw->vw_device = vd;
 	vw->vw_number = window;
 	vw->vw_kbdmode = K_XLATE;
 
 	if ((vd->vd_flags & VDF_TEXTMODE) == 0) {
 		vw->vw_font = vtfont_ref(&vt_font_default);
 		vt_compute_drawable_area(vw);
 	}
 
 	vt_termsize(vd, vw->vw_font, &size);
 	vt_winsize(vd, vw->vw_font, &wsz);
 	vtbuf_init(&vw->vw_buf, &size);
 
 	tm = vw->vw_terminal = terminal_alloc(&vt_termclass, vw);
 	terminal_set_winsize(tm, &wsz);
 	vd->vd_windows[window] = vw;
 	callout_init(&vw->vw_proc_dead_timer, 0);
 
 	return (vw);
 }
 
 void
 vt_upgrade(struct vt_device *vd)
 {
 	struct vt_window *vw;
 	unsigned int i;
 	int register_handlers;
 
 	if (!vty_enabled(VTY_VT))
 		return;
 	if (main_vd->vd_driver == NULL)
 		return;
 
 	for (i = 0; i < VT_MAXWINDOWS; i++) {
 		vw = vd->vd_windows[i];
 		if (vw == NULL) {
 			/* New window. */
 			vw = vt_allocate_window(vd, i);
 		}
 		if (!(vw->vw_flags & VWF_READY)) {
 			callout_init(&vw->vw_proc_dead_timer, 0);
 			terminal_maketty(vw->vw_terminal, "v%r", VT_UNIT(vw));
 			vw->vw_flags |= VWF_READY;
 			if (vw->vw_flags & VWF_CONSOLE) {
 				/* For existing console window. */
 				EVENTHANDLER_REGISTER(shutdown_pre_sync,
 				    vt_window_switch, vw, SHUTDOWN_PRI_DEFAULT);
 			}
 		}
 
 	}
 	VT_LOCK(vd);
 	if (vd->vd_curwindow == NULL)
 		vd->vd_curwindow = vd->vd_windows[VT_CONSWINDOW];
 
 	register_handlers = 0;
 	if (!(vd->vd_flags & VDF_ASYNC)) {
 		/* Attach keyboard. */
 		vt_allocate_keyboard(vd);
 
 		/* Init 25 Hz timer. */
 		callout_init_mtx(&vd->vd_timer, &vd->vd_lock, 0);
 
 		/*
 		 * Start timer when everything ready.
 		 * Note that the operations here are purposefully ordered.
 		 * We need to ensure vd_timer_armed is non-zero before we set
 		 * the VDF_ASYNC flag. That prevents this function from
 		 * racing with vt_resume_flush_timer() to update the
 		 * callout structure.
 		 */
 		atomic_add_acq_int(&vd->vd_timer_armed, 1);
 		vd->vd_flags |= VDF_ASYNC;
 		callout_reset(&vd->vd_timer, hz / VT_TIMERFREQ, vt_timer, vd);
 		register_handlers = 1;
 	}
 
 	VT_UNLOCK(vd);
 
 	/* Refill settings with new sizes. */
 	vt_resize(vd);
 
 	if (register_handlers) {
 		/* Register suspend/resume handlers. */
 		EVENTHANDLER_REGISTER(power_suspend_early, vt_suspend_handler,
 		    vd, EVENTHANDLER_PRI_ANY);
 		EVENTHANDLER_REGISTER(power_resume, vt_resume_handler, vd,
 		    EVENTHANDLER_PRI_ANY);
 	}
 }
 
 static void
 vt_resize(struct vt_device *vd)
 {
 	struct vt_window *vw;
 	int i;
 
 	for (i = 0; i < VT_MAXWINDOWS; i++) {
 		vw = vd->vd_windows[i];
 		VT_LOCK(vd);
 		/* Assign default font to window, if not textmode. */
 		if (!(vd->vd_flags & VDF_TEXTMODE) && vw->vw_font == NULL)
 			vw->vw_font = vtfont_ref(&vt_font_default);
 		VT_UNLOCK(vd);
 
 		/* Resize terminal windows */
 		while (vt_change_font(vw, vw->vw_font) == EBUSY) {
 			DPRINTF(100, "%s: vt_change_font() is busy, "
 			    "window %d\n", __func__, i);
 		}
 	}
 }
 
 static void
 vt_replace_backend(const struct vt_driver *drv, void *softc)
 {
 	struct vt_device *vd;
 
 	vd = main_vd;
 
 	if (vd->vd_flags & VDF_ASYNC) {
 		/* Stop vt_flush periodic task. */
 		VT_LOCK(vd);
 		vt_suspend_flush_timer(vd);
 		VT_UNLOCK(vd);
 		/*
 		 * Mute current terminal until we done. vt_change_font (called
 		 * from vt_resize) will unmute it.
 		 */
 		terminal_mute(vd->vd_curwindow->vw_terminal, 1);
 	}
 
 	/*
 	 * Reset VDF_TEXTMODE flag, driver who require that flag (vt_vga) will
 	 * set it.
 	 */
 	VT_LOCK(vd);
 	vd->vd_flags &= ~VDF_TEXTMODE;
 
 	if (drv != NULL) {
 		/*
 		 * We want to upgrade from the current driver to the
 		 * given driver.
 		 */
 
 		vd->vd_prev_driver = vd->vd_driver;
 		vd->vd_prev_softc = vd->vd_softc;
 		vd->vd_driver = drv;
 		vd->vd_softc = softc;
 
 		vd->vd_driver->vd_init(vd);
 	} else if (vd->vd_prev_driver != NULL && vd->vd_prev_softc != NULL) {
 		/*
 		 * No driver given: we want to downgrade to the previous
 		 * driver.
 		 */
 		const struct vt_driver *old_drv;
 		void *old_softc;
 
 		old_drv = vd->vd_driver;
 		old_softc = vd->vd_softc;
 
 		vd->vd_driver = vd->vd_prev_driver;
 		vd->vd_softc = vd->vd_prev_softc;
 		vd->vd_prev_driver = NULL;
 		vd->vd_prev_softc = NULL;
 
 		vd->vd_flags |= VDF_DOWNGRADE;
 
 		vd->vd_driver->vd_init(vd);
 
 		if (old_drv->vd_fini)
 			old_drv->vd_fini(vd, old_softc);
 
 		vd->vd_flags &= ~VDF_DOWNGRADE;
 	}
 
 	VT_UNLOCK(vd);
 
 	/* Update windows sizes and initialize last items. */
 	vt_upgrade(vd);
 
 #ifdef DEV_SPLASH
 	if (vd->vd_flags & VDF_SPLASH)
 		vtterm_splash(vd);
 #endif
 
 	if (vd->vd_flags & VDF_ASYNC) {
 		/* Allow to put chars now. */
 		terminal_mute(vd->vd_curwindow->vw_terminal, 0);
 		/* Rerun timer for screen updates. */
 		vt_resume_flush_timer(vd->vd_curwindow, 0);
 	}
 
 	/*
 	 * Register as console. If it already registered, cnadd() will ignore
 	 * it.
 	 */
 	termcn_cnregister(vd->vd_windows[VT_CONSWINDOW]->vw_terminal);
 }
 
 static void
 vt_suspend_handler(void *priv)
 {
 	struct vt_device *vd;
 
 	vd = priv;
 	vd->vd_flags |= VDF_SUSPENDED;
 	if (vd->vd_driver != NULL && vd->vd_driver->vd_suspend != NULL)
 		vd->vd_driver->vd_suspend(vd);
 }
 
 static void
 vt_resume_handler(void *priv)
 {
 	struct vt_device *vd;
 
 	vd = priv;
 	if (vd->vd_driver != NULL && vd->vd_driver->vd_resume != NULL)
 		vd->vd_driver->vd_resume(vd);
 	vd->vd_flags &= ~VDF_SUSPENDED;
 }
 
 void
 vt_allocate(const struct vt_driver *drv, void *softc)
 {
 
 	if (!vty_enabled(VTY_VT))
 		return;
 
 	if (main_vd->vd_driver == NULL) {
 		main_vd->vd_driver = drv;
 		printf("VT: initialize with new VT driver \"%s\".\n",
 		    drv->vd_name);
 	} else {
 		/*
 		 * Check if have rights to replace current driver. For example:
 		 * it is bad idea to replace KMS driver with generic VGA one.
 		 */
 		if (drv->vd_priority <= main_vd->vd_driver->vd_priority) {
 			printf("VT: Driver priority %d too low. Current %d\n ",
 			    drv->vd_priority, main_vd->vd_driver->vd_priority);
 			return;
 		}
 		printf("VT: Replacing driver \"%s\" with new \"%s\".\n",
 		    main_vd->vd_driver->vd_name, drv->vd_name);
 	}
 
 	vt_replace_backend(drv, softc);
 }
 
 void
 vt_deallocate(const struct vt_driver *drv, void *softc)
 {
 
 	if (!vty_enabled(VTY_VT))
 		return;
 
 	if (main_vd->vd_prev_driver == NULL ||
 	    main_vd->vd_driver != drv ||
 	    main_vd->vd_softc != softc)
 		return;
 
 	printf("VT: Switching back from \"%s\" to \"%s\".\n",
 	    main_vd->vd_driver->vd_name, main_vd->vd_prev_driver->vd_name);
 
 	vt_replace_backend(NULL, NULL);
 }
 
 void
 vt_suspend(struct vt_device *vd)
 {
 	int error;
 
 	if (vt_suspendswitch == 0)
 		return;
 	/* Save current window. */
 	vd->vd_savedwindow = vd->vd_curwindow;
 	/* Ask holding process to free window and switch to console window */
 	vt_proc_window_switch(vd->vd_windows[VT_CONSWINDOW]);
 
 	/* Wait for the window switch to complete. */
 	error = 0;
 	VT_LOCK(vd);
 	while (vd->vd_curwindow != vd->vd_windows[VT_CONSWINDOW] && error == 0)
 		error = cv_wait_sig(&vd->vd_winswitch, &vd->vd_lock);
 	VT_UNLOCK(vd);
 }
 
 void
 vt_resume(struct vt_device *vd)
 {
 
 	if (vt_suspendswitch == 0)
 		return;
 	/* Switch back to saved window, if any */
 	vt_proc_window_switch(vd->vd_savedwindow);
 	vd->vd_savedwindow = NULL;
 }
Index: projects/clang900-import/sys/fs/nfsclient/nfs_clport.c
===================================================================
--- projects/clang900-import/sys/fs/nfsclient/nfs_clport.c	(revision 352536)
+++ projects/clang900-import/sys/fs/nfsclient/nfs_clport.c	(revision 352537)
@@ -1,1387 +1,1390 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/capsicum.h>
 
 /*
  * generally, I don't like #includes inside .h files, but it seems to
  * be the easiest way to handle the port.
  */
 #include <sys/fail.h>
 #include <sys/hash.h>
 #include <sys/sysctl.h>
 #include <fs/nfs/nfsport.h>
 #include <netinet/in_fib.h>
 #include <netinet/if_ether.h>
 #include <netinet6/ip6_var.h>
 #include <net/if_types.h>
 
 #include <fs/nfsclient/nfs_kdtrace.h>
 
 #ifdef KDTRACE_HOOKS
 dtrace_nfsclient_attrcache_flush_probe_func_t
 		dtrace_nfscl_attrcache_flush_done_probe;
 uint32_t	nfscl_attrcache_flush_done_id;
 
 dtrace_nfsclient_attrcache_get_hit_probe_func_t
 		dtrace_nfscl_attrcache_get_hit_probe;
 uint32_t	nfscl_attrcache_get_hit_id;
 
 dtrace_nfsclient_attrcache_get_miss_probe_func_t
 		dtrace_nfscl_attrcache_get_miss_probe;
 uint32_t	nfscl_attrcache_get_miss_id;
 
 dtrace_nfsclient_attrcache_load_probe_func_t
 		dtrace_nfscl_attrcache_load_done_probe;
 uint32_t	nfscl_attrcache_load_done_id;
 #endif /* !KDTRACE_HOOKS */
 
 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
 extern struct vop_vector newnfs_vnodeops;
 extern struct vop_vector newnfs_fifoops;
 extern uma_zone_t newnfsnode_zone;
 extern struct buf_ops buf_ops_newnfs;
 extern uma_zone_t ncl_pbuf_zone;
 extern short nfsv4_cbport;
 extern int nfscl_enablecallb;
 extern int nfs_numnfscbd;
 extern int nfscl_inited;
 struct mtx ncl_iod_mutex;
 NFSDLOCKMUTEX;
 extern struct mtx nfsrv_dslock_mtx;
 
 extern void (*ncl_call_invalcaches)(struct vnode *);
 
 SYSCTL_DECL(_vfs_nfs);
 static int ncl_fileid_maxwarnings = 10;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, fileid_maxwarnings, CTLFLAG_RWTUN,
     &ncl_fileid_maxwarnings, 0,
     "Limit fileid corruption warnings; 0 is off; -1 is unlimited");
 static volatile int ncl_fileid_nwarnings;
 
 static void nfscl_warn_fileid(struct nfsmount *, struct nfsvattr *,
     struct nfsvattr *);
 
 /*
  * Comparison function for vfs_hash functions.
  */
 int
 newnfs_vncmpf(struct vnode *vp, void *arg)
 {
 	struct nfsfh *nfhp = (struct nfsfh *)arg;
 	struct nfsnode *np = VTONFS(vp);
 
 	if (np->n_fhp->nfh_len != nfhp->nfh_len ||
 	    NFSBCMP(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len))
 		return (1);
 	return (0);
 }
 
 /*
  * Look up a vnode/nfsnode by file handle.
  * Callers must check for mount points!!
  * In all cases, a pointer to a
  * nfsnode structure is returned.
  * This variant takes a "struct nfsfh *" as second argument and uses
  * that structure up, either by hanging off the nfsnode or FREEing it.
  */
 int
 nfscl_nget(struct mount *mntp, struct vnode *dvp, struct nfsfh *nfhp,
     struct componentname *cnp, struct thread *td, struct nfsnode **npp,
     void *stuff, int lkflags)
 {
 	struct nfsnode *np, *dnp;
 	struct vnode *vp, *nvp;
 	struct nfsv4node *newd, *oldd;
 	int error;
 	u_int hash;
 	struct nfsmount *nmp;
 
 	nmp = VFSTONFS(mntp);
 	dnp = VTONFS(dvp);
 	*npp = NULL;
 
 	hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, FNV1_32_INIT);
 
 	error = vfs_hash_get(mntp, hash, lkflags,
 	    td, &nvp, newnfs_vncmpf, nfhp);
 	if (error == 0 && nvp != NULL) {
 		/*
 		 * I believe there is a slight chance that vgonel() could
 		 * get called on this vnode between when NFSVOPLOCK() drops
 		 * the VI_LOCK() and vget() acquires it again, so that it
 		 * hasn't yet had v_usecount incremented. If this were to
 		 * happen, the VI_DOOMED flag would be set, so check for
 		 * that here. Since we now have the v_usecount incremented,
 		 * we should be ok until we vrele() it, if the VI_DOOMED
 		 * flag isn't set now.
 		 */
 		VI_LOCK(nvp);
 		if ((nvp->v_iflag & VI_DOOMED)) {
 			VI_UNLOCK(nvp);
 			vrele(nvp);
 			error = ENOENT;
 		} else {
 			VI_UNLOCK(nvp);
 		}
 	}
 	if (error) {
 		free(nfhp, M_NFSFH);
 		return (error);
 	}
 	if (nvp != NULL) {
 		np = VTONFS(nvp);
 		/*
 		 * For NFSv4, check to see if it is the same name and
 		 * replace the name, if it is different.
 		 */
 		oldd = newd = NULL;
 		if ((nmp->nm_flag & NFSMNT_NFSV4) && np->n_v4 != NULL &&
 		    nvp->v_type == VREG &&
 		    (np->n_v4->n4_namelen != cnp->cn_namelen ||
 		     NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4),
 		     cnp->cn_namelen) ||
 		     dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen ||
 		     NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
 		     dnp->n_fhp->nfh_len))) {
 		    newd = malloc(
 			sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len +
 			+ cnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK);
 		    NFSLOCKNODE(np);
 		    if (newd != NULL && np->n_v4 != NULL && nvp->v_type == VREG
 			&& (np->n_v4->n4_namelen != cnp->cn_namelen ||
 			 NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4),
 			 cnp->cn_namelen) ||
 			 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen ||
 			 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
 			 dnp->n_fhp->nfh_len))) {
 			oldd = np->n_v4;
 			np->n_v4 = newd;
 			newd = NULL;
 			np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len;
 			np->n_v4->n4_namelen = cnp->cn_namelen;
 			NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
 			    dnp->n_fhp->nfh_len);
 			NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4),
 			    cnp->cn_namelen);
 		    }
 		    NFSUNLOCKNODE(np);
 		}
 		if (newd != NULL)
 			free(newd, M_NFSV4NODE);
 		if (oldd != NULL)
 			free(oldd, M_NFSV4NODE);
 		*npp = np;
 		free(nfhp, M_NFSFH);
 		return (0);
 	}
 	np = uma_zalloc(newnfsnode_zone, M_WAITOK | M_ZERO);
 
 	error = getnewvnode(nfs_vnode_tag, mntp, &newnfs_vnodeops, &nvp);
 	if (error) {
 		uma_zfree(newnfsnode_zone, np);
 		free(nfhp, M_NFSFH);
 		return (error);
 	}
 	vp = nvp;
 	KASSERT(vp->v_bufobj.bo_bsize != 0, ("nfscl_nget: bo_bsize == 0"));
 	vp->v_bufobj.bo_ops = &buf_ops_newnfs;
 	vp->v_data = np;
 	np->n_vnode = vp;
 	/* 
 	 * Initialize the mutex even if the vnode is going to be a loser.
 	 * This simplifies the logic in reclaim, which can then unconditionally
 	 * destroy the mutex (in the case of the loser, or if hash_insert
 	 * happened to return an error no special casing is needed).
 	 */
 	mtx_init(&np->n_mtx, "NEWNFSnode lock", NULL, MTX_DEF | MTX_DUPOK);
 	lockinit(&np->n_excl, PVFS, "nfsupg", VLKTIMEOUT, LK_NOSHARE |
 	    LK_CANRECURSE);
 
 	/* 
 	 * Are we getting the root? If so, make sure the vnode flags
 	 * are correct 
 	 */
 	if ((nfhp->nfh_len == nmp->nm_fhsize) &&
 	    !bcmp(nfhp->nfh_fh, nmp->nm_fh, nfhp->nfh_len)) {
 		if (vp->v_type == VNON)
 			vp->v_type = VDIR;
 		vp->v_vflag |= VV_ROOT;
 	}
 	
 	np->n_fhp = nfhp;
 	/*
 	 * For NFSv4, we have to attach the directory file handle and
 	 * file name, so that Open Ops can be done later.
 	 */
 	if (nmp->nm_flag & NFSMNT_NFSV4) {
 		np->n_v4 = malloc(sizeof (struct nfsv4node)
 		    + dnp->n_fhp->nfh_len + cnp->cn_namelen - 1, M_NFSV4NODE,
 		    M_WAITOK);
 		np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len;
 		np->n_v4->n4_namelen = cnp->cn_namelen;
 		NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
 		    dnp->n_fhp->nfh_len);
 		NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4),
 		    cnp->cn_namelen);
 	} else {
 		np->n_v4 = NULL;
 	}
 
 	/*
 	 * NFS supports recursive and shared locking.
 	 */
 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_NOWITNESS, NULL);
 	VN_LOCK_AREC(vp);
 	VN_LOCK_ASHARE(vp);
 	error = insmntque(vp, mntp);
 	if (error != 0) {
 		*npp = NULL;
 		mtx_destroy(&np->n_mtx);
 		lockdestroy(&np->n_excl);
 		free(nfhp, M_NFSFH);
 		if (np->n_v4 != NULL)
 			free(np->n_v4, M_NFSV4NODE);
 		uma_zfree(newnfsnode_zone, np);
 		return (error);
 	}
 	error = vfs_hash_insert(vp, hash, lkflags, 
 	    td, &nvp, newnfs_vncmpf, nfhp);
 	if (error)
 		return (error);
 	if (nvp != NULL) {
 		*npp = VTONFS(nvp);
 		/* vfs_hash_insert() vput()'s the losing vnode */
 		return (0);
 	}
 	*npp = np;
 
 	return (0);
 }
 
 /*
  * Another variant of nfs_nget(). This one is only used by reopen. It
  * takes almost the same args as nfs_nget(), but only succeeds if an entry
  * exists in the cache. (Since files should already be "open" with a
  * vnode ref cnt on the node when reopen calls this, it should always
  * succeed.)
  * Also, don't get a vnode lock, since it may already be locked by some
  * other process that is handling it. This is ok, since all other threads
  * on the client are blocked by the nfsc_lock being exclusively held by the
  * caller of this function.
  */
 int
 nfscl_ngetreopen(struct mount *mntp, u_int8_t *fhp, int fhsize,
     struct thread *td, struct nfsnode **npp)
 {
 	struct vnode *nvp;
 	u_int hash;
 	struct nfsfh *nfhp;
 	int error;
 
 	*npp = NULL;
 	/* For forced dismounts, just return error. */
 	if (NFSCL_FORCEDISM(mntp))
 		return (EINTR);
 	nfhp = malloc(sizeof (struct nfsfh) + fhsize,
 	    M_NFSFH, M_WAITOK);
 	bcopy(fhp, &nfhp->nfh_fh[0], fhsize);
 	nfhp->nfh_len = fhsize;
 
 	hash = fnv_32_buf(fhp, fhsize, FNV1_32_INIT);
 
 	/*
 	 * First, try to get the vnode locked, but don't block for the lock.
 	 */
 	error = vfs_hash_get(mntp, hash, (LK_EXCLUSIVE | LK_NOWAIT), td, &nvp,
 	    newnfs_vncmpf, nfhp);
 	if (error == 0 && nvp != NULL) {
 		NFSVOPUNLOCK(nvp, 0);
 	} else if (error == EBUSY) {
 		/*
 		 * It is safe so long as a vflush() with
 		 * FORCECLOSE has not been done. Since the Renew thread is
 		 * stopped and the MNTK_UNMOUNTF flag is set before doing
 		 * a vflush() with FORCECLOSE, we should be ok here.
 		 */
 		if (NFSCL_FORCEDISM(mntp))
 			error = EINTR;
 		else {
 			vfs_hash_ref(mntp, hash, td, &nvp, newnfs_vncmpf, nfhp);
 			if (nvp == NULL) {
 				error = ENOENT;
 			} else if ((nvp->v_iflag & VI_DOOMED) != 0) {
 				error = ENOENT;
 				vrele(nvp);
 			} else {
 				error = 0;
 			}
 		}
 	}
 	free(nfhp, M_NFSFH);
 	if (error)
 		return (error);
 	if (nvp != NULL) {
 		*npp = VTONFS(nvp);
 		return (0);
 	}
 	return (EINVAL);
 }
 
 static void
 nfscl_warn_fileid(struct nfsmount *nmp, struct nfsvattr *oldnap,
     struct nfsvattr *newnap)
 {
 	int off;
 
 	if (ncl_fileid_maxwarnings >= 0 &&
 	    ncl_fileid_nwarnings >= ncl_fileid_maxwarnings)
 		return;
 	off = 0;
 	if (ncl_fileid_maxwarnings >= 0) {
 		if (++ncl_fileid_nwarnings >= ncl_fileid_maxwarnings)
 			off = 1;
 	}
 
 	printf("newnfs: server '%s' error: fileid changed. "
 	    "fsid %jx:%jx: expected fileid %#jx, got %#jx. "
 	    "(BROKEN NFS SERVER OR MIDDLEWARE)\n",
 	    nmp->nm_com.nmcom_hostname,
 	    (uintmax_t)nmp->nm_fsid[0],
 	    (uintmax_t)nmp->nm_fsid[1],
 	    (uintmax_t)oldnap->na_fileid,
 	    (uintmax_t)newnap->na_fileid);
 
 	if (off)
 		printf("newnfs: Logged %d times about fileid corruption; "
 		    "going quiet to avoid spamming logs excessively. (Limit "
 		    "is: %d).\n", ncl_fileid_nwarnings,
 		    ncl_fileid_maxwarnings);
 }
 
 /*
  * Load the attribute cache (that lives in the nfsnode entry) with
  * the attributes of the second argument and
  * Iff vaper not NULL
  *    copy the attributes to *vaper
  * Similar to nfs_loadattrcache(), except the attributes are passed in
  * instead of being parsed out of the mbuf list.
  */
 int
 nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper,
     void *stuff, int writeattr, int dontshrink)
 {
 	struct vnode *vp = *vpp;
 	struct vattr *vap, *nvap = &nap->na_vattr, *vaper = nvaper;
 	struct nfsnode *np;
 	struct nfsmount *nmp;
 	struct timespec mtime_save;
+	vm_object_t object;
 	u_quad_t nsize;
-	int setnsize, error, force_fid_err;
+	int error, force_fid_err;
+	bool setnsize;
 
 	error = 0;
-	setnsize = 0;
-	nsize = 0;
 
 	/*
 	 * If v_type == VNON it is a new node, so fill in the v_type,
 	 * n_mtime fields. Check to see if it represents a special 
 	 * device, and if so, check for a possible alias. Once the
 	 * correct vnode has been obtained, fill in the rest of the
 	 * information.
 	 */
 	np = VTONFS(vp);
 	NFSLOCKNODE(np);
 	if (vp->v_type != nvap->va_type) {
 		vp->v_type = nvap->va_type;
 		if (vp->v_type == VFIFO)
 			vp->v_op = &newnfs_fifoops;
 		np->n_mtime = nvap->va_mtime;
 	}
 	nmp = VFSTONFS(vp->v_mount);
 	vap = &np->n_vattr.na_vattr;
 	mtime_save = vap->va_mtime;
 	if (writeattr) {
 		np->n_vattr.na_filerev = nap->na_filerev;
 		np->n_vattr.na_size = nap->na_size;
 		np->n_vattr.na_mtime = nap->na_mtime;
 		np->n_vattr.na_ctime = nap->na_ctime;
 		np->n_vattr.na_fsid = nap->na_fsid;
 		np->n_vattr.na_mode = nap->na_mode;
 	} else {
 		force_fid_err = 0;
 		KFAIL_POINT_ERROR(DEBUG_FP, nfscl_force_fileid_warning,
 		    force_fid_err);
 		/*
 		 * BROKEN NFS SERVER OR MIDDLEWARE
 		 *
 		 * Certain NFS servers (certain old proprietary filers ca.
 		 * 2006) or broken middleboxes (e.g. WAN accelerator products)
 		 * will respond to GETATTR requests with results for a
 		 * different fileid.
 		 *
 		 * The WAN accelerator we've observed not only serves stale
 		 * cache results for a given file, it also occasionally serves
 		 * results for wholly different files.  This causes surprising
 		 * problems; for example the cached size attribute of a file
 		 * may truncate down and then back up, resulting in zero
 		 * regions in file contents read by applications.  We observed
 		 * this reliably with Clang and .c files during parallel build.
 		 * A pcap revealed packet fragmentation and GETATTR RPC
 		 * responses with wholly wrong fileids.
 		 */
 		if ((np->n_vattr.na_fileid != 0 &&
 		     np->n_vattr.na_fileid != nap->na_fileid) ||
 		    force_fid_err) {
 			nfscl_warn_fileid(nmp, &np->n_vattr, nap);
 			error = EIDRM;
 			goto out;
 		}
 		NFSBCOPY((caddr_t)nap, (caddr_t)&np->n_vattr,
 		    sizeof (struct nfsvattr));
 	}
 
 	/*
 	 * For NFSv4, if the node's fsid is not equal to the mount point's
 	 * fsid, return the low order 32bits of the node's fsid. This
 	 * allows getcwd(3) to work. There is a chance that the fsid might
 	 * be the same as a local fs, but since this is in an NFS mount
 	 * point, I don't think that will cause any problems?
 	 */
 	if (NFSHASNFSV4(nmp) && NFSHASHASSETFSID(nmp) &&
 	    (nmp->nm_fsid[0] != np->n_vattr.na_filesid[0] ||
 	     nmp->nm_fsid[1] != np->n_vattr.na_filesid[1])) {
 		/*
 		 * va_fsid needs to be set to some value derived from
 		 * np->n_vattr.na_filesid that is not equal
 		 * vp->v_mount->mnt_stat.f_fsid[0], so that it changes
 		 * from the value used for the top level server volume
 		 * in the mounted subtree.
 		 */
 		vn_fsid(vp, vap);
 		if ((uint32_t)vap->va_fsid == np->n_vattr.na_filesid[0])
 			vap->va_fsid = hash32_buf(
 			    np->n_vattr.na_filesid, 2 * sizeof(uint64_t), 0);
 	} else
 		vn_fsid(vp, vap);
 	np->n_attrstamp = time_second;
 	if (vap->va_size != np->n_size) {
 		if (vap->va_type == VREG) {
 			if (dontshrink && vap->va_size < np->n_size) {
 				/*
 				 * We've been told not to shrink the file;
 				 * zero np->n_attrstamp to indicate that
 				 * the attributes are stale.
 				 */
-				nsize = vap->va_size = np->n_size;
-				setnsize = 1;
+				vap->va_size = np->n_size;
 				np->n_attrstamp = 0;
 				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 			} else if (np->n_flag & NMODIFIED) {
 				/*
 				 * We've modified the file: Use the larger
 				 * of our size, and the server's size.
 				 */
 				if (vap->va_size < np->n_size) {
 					vap->va_size = np->n_size;
 				} else {
 					np->n_size = vap->va_size;
 					np->n_flag |= NSIZECHANGED;
 				}
-				nsize = np->n_size;
-				setnsize = 1;
-			} else if (vap->va_size < np->n_size) {
-				/*
-				 * When shrinking the size, the call to
-				 * vnode_pager_setsize() cannot be done
-				 * with the mutex held, so delay it until
-				 * after the mtx_unlock call.
-				 */
-				nsize = np->n_size = vap->va_size;
-				np->n_flag |= NSIZECHANGED;
-				setnsize = 1;
 			} else {
-				nsize = np->n_size = vap->va_size;
+				np->n_size = vap->va_size;
 				np->n_flag |= NSIZECHANGED;
-				setnsize = 1;
 			}
 		} else {
 			np->n_size = vap->va_size;
 		}
 	}
 	/*
 	 * The following checks are added to prevent a race between (say)
 	 * a READDIR+ and a WRITE. 
 	 * READDIR+, WRITE requests sent out.
 	 * READDIR+ resp, WRITE resp received on client.
 	 * However, the WRITE resp was handled before the READDIR+ resp
 	 * causing the post op attrs from the write to be loaded first
 	 * and the attrs from the READDIR+ to be loaded later. If this 
 	 * happens, we have stale attrs loaded into the attrcache.
 	 * We detect this by for the mtime moving back. We invalidate the 
 	 * attrcache when this happens.
 	 */
 	if (timespeccmp(&mtime_save, &vap->va_mtime, >)) {
 		/* Size changed or mtime went backwards */
 		np->n_attrstamp = 0;
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 	}
 	if (vaper != NULL) {
 		NFSBCOPY((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
 		if (np->n_flag & NCHG) {
 			if (np->n_flag & NACC)
 				vaper->va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vaper->va_mtime = np->n_mtim;
 		}
 	}
 
 out:
 #ifdef KDTRACE_HOOKS
 	if (np->n_attrstamp != 0)
 		KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, vap, error);
 #endif
+	nsize = vap->va_size;
+	object = vp->v_object;
+	setnsize = false;
+	if (object != NULL) {
+		if (OFF_TO_IDX(nsize + PAGE_MASK) < object->size) {
+			/*
+			 * When shrinking the size, the call to
+			 * vnode_pager_setsize() cannot be done with
+			 * the mutex held, because we might need to
+			 * wait for a busy page.  Delay it until after
+			 * the node is unlocked.
+			 */
+			setnsize = true;
+		} else {
+			vnode_pager_setsize(vp, nsize);
+		}
+	}
 	NFSUNLOCKNODE(np);
 	if (setnsize)
 		vnode_pager_setsize(vp, nsize);
 	return (error);
 }
 
 /*
  * Fill in the client id name. For these bytes:
  * 1 - they must be unique
  * 2 - they should be persistent across client reboots
  * 1 is more critical than 2
  * Use the mount point's unique id plus either the uuid or, if that
  * isn't set, random junk.
  */
 void
 nfscl_fillclid(u_int64_t clval, char *uuid, u_int8_t *cp, u_int16_t idlen)
 {
 	int uuidlen;
 
 	/*
 	 * First, put in the 64bit mount point identifier.
 	 */
 	if (idlen >= sizeof (u_int64_t)) {
 		NFSBCOPY((caddr_t)&clval, cp, sizeof (u_int64_t));
 		cp += sizeof (u_int64_t);
 		idlen -= sizeof (u_int64_t);
 	}
 
 	/*
 	 * If uuid is non-zero length, use it.
 	 */
 	uuidlen = strlen(uuid);
 	if (uuidlen > 0 && idlen >= uuidlen) {
 		NFSBCOPY(uuid, cp, uuidlen);
 		cp += uuidlen;
 		idlen -= uuidlen;
 	}
 
 	/*
 	 * This only normally happens if the uuid isn't set.
 	 */
 	while (idlen > 0) {
 		*cp++ = (u_int8_t)(arc4random() % 256);
 		idlen--;
 	}
 }
 
 /*
  * Fill in a lock owner name. For now, pid + the process's creation time.
  */
 void
 nfscl_filllockowner(void *id, u_int8_t *cp, int flags)
 {
 	union {
 		u_int32_t	lval;
 		u_int8_t	cval[4];
 	} tl;
 	struct proc *p;
 
 	if (id == NULL) {
 		/* Return the single open_owner of all 0 bytes. */
 		bzero(cp, NFSV4CL_LOCKNAMELEN);
 		return;
 	}
 	if ((flags & F_POSIX) != 0) {
 		p = (struct proc *)id;
 		tl.lval = p->p_pid;
 		*cp++ = tl.cval[0];
 		*cp++ = tl.cval[1];
 		*cp++ = tl.cval[2];
 		*cp++ = tl.cval[3];
 		tl.lval = p->p_stats->p_start.tv_sec;
 		*cp++ = tl.cval[0];
 		*cp++ = tl.cval[1];
 		*cp++ = tl.cval[2];
 		*cp++ = tl.cval[3];
 		tl.lval = p->p_stats->p_start.tv_usec;
 		*cp++ = tl.cval[0];
 		*cp++ = tl.cval[1];
 		*cp++ = tl.cval[2];
 		*cp = tl.cval[3];
 	} else if ((flags & F_FLOCK) != 0) {
 		bcopy(&id, cp, sizeof(id));
 		bzero(&cp[sizeof(id)], NFSV4CL_LOCKNAMELEN - sizeof(id));
 	} else {
 		printf("nfscl_filllockowner: not F_POSIX or F_FLOCK\n");
 		bzero(cp, NFSV4CL_LOCKNAMELEN);
 	}
 }
 
 /*
  * Find the parent process for the thread passed in as an argument.
  * If none exists, return NULL, otherwise return a thread for the parent.
  * (Can be any of the threads, since it is only used for td->td_proc.)
  */
 NFSPROC_T *
 nfscl_getparent(struct thread *td)
 {
 	struct proc *p;
 	struct thread *ptd;
 
 	if (td == NULL)
 		return (NULL);
 	p = td->td_proc;
 	if (p->p_pid == 0)
 		return (NULL);
 	p = p->p_pptr;
 	if (p == NULL)
 		return (NULL);
 	ptd = TAILQ_FIRST(&p->p_threads);
 	return (ptd);
 }
 
 /*
  * Start up the renew kernel thread.
  */
 static void
 start_nfscl(void *arg)
 {
 	struct nfsclclient *clp;
 	struct thread *td;
 
 	clp = (struct nfsclclient *)arg;
 	td = TAILQ_FIRST(&clp->nfsc_renewthread->p_threads);
 	nfscl_renewthread(clp, td);
 	kproc_exit(0);
 }
 
 void
 nfscl_start_renewthread(struct nfsclclient *clp)
 {
 
 	kproc_create(start_nfscl, (void *)clp, &clp->nfsc_renewthread, 0, 0,
 	    "nfscl");
 }
 
 /*
  * Handle wcc_data.
  * For NFSv4, it assumes that nfsv4_wccattr() was used to set up the getattr
  * as the first Op after PutFH.
  * (For NFSv4, the postop attributes are after the Op, so they can't be
  *  parsed here. A separate call to nfscl_postop_attr() is required.)
  */
 int
 nfscl_wcc_data(struct nfsrv_descript *nd, struct vnode *vp,
     struct nfsvattr *nap, int *flagp, int *wccflagp, void *stuff)
 {
 	u_int32_t *tl;
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsvattr nfsva;
 	int error = 0;
 
 	if (wccflagp != NULL)
 		*wccflagp = 0;
 	if (nd->nd_flag & ND_NFSV3) {
 		*flagp = 0;
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (*tl == newnfs_true) {
 			NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
 			if (wccflagp != NULL) {
 				mtx_lock(&np->n_mtx);
 				*wccflagp = (np->n_mtime.tv_sec ==
 				    fxdr_unsigned(u_int32_t, *(tl + 2)) &&
 				    np->n_mtime.tv_nsec ==
 				    fxdr_unsigned(u_int32_t, *(tl + 3)));
 				mtx_unlock(&np->n_mtx);
 			}
 		}
 		error = nfscl_postop_attr(nd, nap, flagp, stuff);
 		if (wccflagp != NULL && *flagp == 0)
 			*wccflagp = 0;
 	} else if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR))
 	    == (ND_NFSV4 | ND_V4WCCATTR)) {
 		error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 		    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
 		    NULL, NULL, NULL, NULL, NULL);
 		if (error)
 			return (error);
 		/*
 		 * Get rid of Op# and status for next op.
 		 */
 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		if (*++tl)
 			nd->nd_flag |= ND_NOMOREDATA;
 		if (wccflagp != NULL &&
 		    nfsva.na_vattr.va_mtime.tv_sec != 0) {
 			mtx_lock(&np->n_mtx);
 			*wccflagp = (np->n_mtime.tv_sec ==
 			    nfsva.na_vattr.va_mtime.tv_sec &&
 			    np->n_mtime.tv_nsec ==
 			    nfsva.na_vattr.va_mtime.tv_sec);
 			mtx_unlock(&np->n_mtx);
 		}
 	}
 nfsmout:
 	return (error);
 }
 
 /*
  * Get postop attributes.
  */
 int
 nfscl_postop_attr(struct nfsrv_descript *nd, struct nfsvattr *nap, int *retp,
     void *stuff)
 {
 	u_int32_t *tl;
 	int error = 0;
 
 	*retp = 0;
 	if (nd->nd_flag & ND_NOMOREDATA)
 		return (error);
 	if (nd->nd_flag & ND_NFSV3) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		*retp = fxdr_unsigned(int, *tl);
 	} else if (nd->nd_flag & ND_NFSV4) {
 		/*
 		 * For NFSv4, the postop attr are at the end, so no point
 		 * in looking if nd_repstat != 0.
 		 */
 		if (!nd->nd_repstat) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			if (*(tl + 1))
 				/* should never happen since nd_repstat != 0 */
 				nd->nd_flag |= ND_NOMOREDATA;
 			else
 				*retp = 1;
 		}
 	} else if (!nd->nd_repstat) {
 		/* For NFSv2, the attributes are here iff nd_repstat == 0 */
 		*retp = 1;
 	}
 	if (*retp) {
 		error = nfsm_loadattr(nd, nap);
 		if (error)
 			*retp = 0;
 	}
 nfsmout:
 	return (error);
 }
 
 /*
  * nfscl_request() - mostly a wrapper for newnfs_request().
  */
 int
 nfscl_request(struct nfsrv_descript *nd, struct vnode *vp, NFSPROC_T *p,
     struct ucred *cred, void *stuff)
 {
 	int ret, vers;
 	struct nfsmount *nmp;
 
 	nmp = VFSTONFS(vp->v_mount);
 	if (nd->nd_flag & ND_NFSV4)
 		vers = NFS_VER4;
 	else if (nd->nd_flag & ND_NFSV3)
 		vers = NFS_VER3;
 	else
 		vers = NFS_VER2;
 	ret = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
 		NFS_PROG, vers, NULL, 1, NULL, NULL);
 	return (ret);
 }
 
 /*
  * fill in this bsden's variant of statfs using nfsstatfs.
  */
 void
 nfscl_loadsbinfo(struct nfsmount *nmp, struct nfsstatfs *sfp, void *statfs)
 {
 	struct statfs *sbp = (struct statfs *)statfs;
 
 	if (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) {
 		sbp->f_bsize = NFS_FABLKSIZE;
 		sbp->f_blocks = sfp->sf_tbytes / NFS_FABLKSIZE;
 		sbp->f_bfree = sfp->sf_fbytes / NFS_FABLKSIZE;
 		/*
 		 * Although sf_abytes is uint64_t and f_bavail is int64_t,
 		 * the value after dividing by NFS_FABLKSIZE is small
 		 * enough that it will fit in 63bits, so it is ok to
 		 * assign it to f_bavail without fear that it will become
 		 * negative.
 		 */
 		sbp->f_bavail = sfp->sf_abytes / NFS_FABLKSIZE;
 		sbp->f_files = sfp->sf_tfiles;
 		/* Since f_ffree is int64_t, clip it to 63bits. */
 		if (sfp->sf_ffiles > INT64_MAX)
 			sbp->f_ffree = INT64_MAX;
 		else
 			sbp->f_ffree = sfp->sf_ffiles;
 	} else if ((nmp->nm_flag & NFSMNT_NFSV4) == 0) {
 		/*
 		 * The type casts to (int32_t) ensure that this code is
 		 * compatible with the old NFS client, in that it will
 		 * propagate bit31 to the high order bits. This may or may
 		 * not be correct for NFSv2, but since it is a legacy
 		 * environment, I'd rather retain backwards compatibility.
 		 */
 		sbp->f_bsize = (int32_t)sfp->sf_bsize;
 		sbp->f_blocks = (int32_t)sfp->sf_blocks;
 		sbp->f_bfree = (int32_t)sfp->sf_bfree;
 		sbp->f_bavail = (int32_t)sfp->sf_bavail;
 		sbp->f_files = 0;
 		sbp->f_ffree = 0;
 	}
 }
 
 /*
  * Use the fsinfo stuff to update the mount point.
  */
 void
 nfscl_loadfsinfo(struct nfsmount *nmp, struct nfsfsinfo *fsp)
 {
 
 	if ((nmp->nm_wsize == 0 || fsp->fs_wtpref < nmp->nm_wsize) &&
 	    fsp->fs_wtpref >= NFS_FABLKSIZE)
 		nmp->nm_wsize = (fsp->fs_wtpref + NFS_FABLKSIZE - 1) &
 		    ~(NFS_FABLKSIZE - 1);
 	if (fsp->fs_wtmax < nmp->nm_wsize && fsp->fs_wtmax > 0) {
 		nmp->nm_wsize = fsp->fs_wtmax & ~(NFS_FABLKSIZE - 1);
 		if (nmp->nm_wsize == 0)
 			nmp->nm_wsize = fsp->fs_wtmax;
 	}
 	if (nmp->nm_wsize < NFS_FABLKSIZE)
 		nmp->nm_wsize = NFS_FABLKSIZE;
 	if ((nmp->nm_rsize == 0 || fsp->fs_rtpref < nmp->nm_rsize) &&
 	    fsp->fs_rtpref >= NFS_FABLKSIZE)
 		nmp->nm_rsize = (fsp->fs_rtpref + NFS_FABLKSIZE - 1) &
 		    ~(NFS_FABLKSIZE - 1);
 	if (fsp->fs_rtmax < nmp->nm_rsize && fsp->fs_rtmax > 0) {
 		nmp->nm_rsize = fsp->fs_rtmax & ~(NFS_FABLKSIZE - 1);
 		if (nmp->nm_rsize == 0)
 			nmp->nm_rsize = fsp->fs_rtmax;
 	}
 	if (nmp->nm_rsize < NFS_FABLKSIZE)
 		nmp->nm_rsize = NFS_FABLKSIZE;
 	if ((nmp->nm_readdirsize == 0 || fsp->fs_dtpref < nmp->nm_readdirsize)
 	    && fsp->fs_dtpref >= NFS_DIRBLKSIZ)
 		nmp->nm_readdirsize = (fsp->fs_dtpref + NFS_DIRBLKSIZ - 1) &
 		    ~(NFS_DIRBLKSIZ - 1);
 	if (fsp->fs_rtmax < nmp->nm_readdirsize && fsp->fs_rtmax > 0) {
 		nmp->nm_readdirsize = fsp->fs_rtmax & ~(NFS_DIRBLKSIZ - 1);
 		if (nmp->nm_readdirsize == 0)
 			nmp->nm_readdirsize = fsp->fs_rtmax;
 	}
 	if (nmp->nm_readdirsize < NFS_DIRBLKSIZ)
 		nmp->nm_readdirsize = NFS_DIRBLKSIZ;
 	if (fsp->fs_maxfilesize > 0 &&
 	    fsp->fs_maxfilesize < nmp->nm_maxfilesize)
 		nmp->nm_maxfilesize = fsp->fs_maxfilesize;
 	nmp->nm_mountp->mnt_stat.f_iosize = newnfs_iosize(nmp);
 	nmp->nm_state |= NFSSTA_GOTFSINFO;
 }
 
 /*
  * Lookups source address which should be used to communicate with
  * @nmp and stores it inside @pdst.
  *
  * Returns 0 on success.
  */
 u_int8_t *
 nfscl_getmyip(struct nfsmount *nmp, struct in6_addr *paddr, int *isinet6p)
 {
 #if defined(INET6) || defined(INET)
 	int error, fibnum;
 
 	fibnum = curthread->td_proc->p_fibnum;
 #endif
 #ifdef INET
 	if (nmp->nm_nam->sa_family == AF_INET) {
 		struct sockaddr_in *sin;
 		struct nhop4_extended nh_ext;
 
 		sin = (struct sockaddr_in *)nmp->nm_nam;
 		CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred));
 		error = fib4_lookup_nh_ext(fibnum, sin->sin_addr, 0, 0,
 		    &nh_ext);
 		CURVNET_RESTORE();
 		if (error != 0)
 			return (NULL);
 
 		if (IN_LOOPBACK(ntohl(nh_ext.nh_src.s_addr))) {
 			/* Ignore loopback addresses */
 			return (NULL);
 		}
 
 		*isinet6p = 0;
 		*((struct in_addr *)paddr) = nh_ext.nh_src;
 
 		return (u_int8_t *)paddr;
 	}
 #endif
 #ifdef INET6
 	if (nmp->nm_nam->sa_family == AF_INET6) {
 		struct sockaddr_in6 *sin6;
 
 		sin6 = (struct sockaddr_in6 *)nmp->nm_nam;
 
 		CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred));
 		error = in6_selectsrc_addr(fibnum, &sin6->sin6_addr,
 		    sin6->sin6_scope_id, NULL, paddr, NULL);
 		CURVNET_RESTORE();
 		if (error != 0)
 			return (NULL);
 
 		if (IN6_IS_ADDR_LOOPBACK(paddr))
 			return (NULL);
 
 		/* Scope is embedded in */
 		*isinet6p = 1;
 
 		return (u_int8_t *)paddr;
 	}
 #endif
 	return (NULL);
 }
 
 /*
  * Copy NFS uid, gids from the cred structure.
  */
 void
 newnfs_copyincred(struct ucred *cr, struct nfscred *nfscr)
 {
 	int i;
 
 	KASSERT(cr->cr_ngroups >= 0,
 	    ("newnfs_copyincred: negative cr_ngroups"));
 	nfscr->nfsc_uid = cr->cr_uid;
 	nfscr->nfsc_ngroups = MIN(cr->cr_ngroups, NFS_MAXGRPS + 1);
 	for (i = 0; i < nfscr->nfsc_ngroups; i++)
 		nfscr->nfsc_groups[i] = cr->cr_groups[i];
 }
 
 
 /*
  * Do any client specific initialization.
  */
 void
 nfscl_init(void)
 {
 	static int inited = 0;
 
 	if (inited)
 		return;
 	inited = 1;
 	nfscl_inited = 1;
 	ncl_pbuf_zone = pbuf_zsecond_create("nfspbuf", nswbuf / 2);
 }
 
 /*
  * Check each of the attributes to be set, to ensure they aren't already
  * the correct value. Disable setting ones already correct.
  */
 int
 nfscl_checksattr(struct vattr *vap, struct nfsvattr *nvap)
 {
 
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (vap->va_mode == nvap->na_mode)
 			vap->va_mode = (mode_t)VNOVAL;
 	}
 	if (vap->va_uid != (uid_t)VNOVAL) {
 		if (vap->va_uid == nvap->na_uid)
 			vap->va_uid = (uid_t)VNOVAL;
 	}
 	if (vap->va_gid != (gid_t)VNOVAL) {
 		if (vap->va_gid == nvap->na_gid)
 			vap->va_gid = (gid_t)VNOVAL;
 	}
 	if (vap->va_size != VNOVAL) {
 		if (vap->va_size == nvap->na_size)
 			vap->va_size = VNOVAL;
 	}
 
 	/*
 	 * We are normally called with only a partially initialized
 	 * VAP.  Since the NFSv3 spec says that server may use the
 	 * file attributes to store the verifier, the spec requires
 	 * us to do a SETATTR RPC. FreeBSD servers store the verifier
 	 * in atime, but we can't really assume that all servers will
 	 * so we ensure that our SETATTR sets both atime and mtime.
 	 * Set the VA_UTIMES_NULL flag for this case, so that
 	 * the server's time will be used.  This is needed to
 	 * work around a bug in some Solaris servers, where
 	 * setting the time TOCLIENT causes the Setattr RPC
 	 * to return NFS_OK, but not set va_mode.
 	 */
 	if (vap->va_mtime.tv_sec == VNOVAL) {
 		vfs_timestamp(&vap->va_mtime);
 		vap->va_vaflags |= VA_UTIMES_NULL;
 	}
 	if (vap->va_atime.tv_sec == VNOVAL)
 		vap->va_atime = vap->va_mtime;
 	return (1);
 }
 
 /*
  * Map nfsv4 errors to errno.h errors.
  * The uid and gid arguments are only used for NFSERR_BADOWNER and that
  * error should only be returned for the Open, Create and Setattr Ops.
  * As such, most calls can just pass in 0 for those arguments.
  */
 APPLESTATIC int
 nfscl_maperr(struct thread *td, int error, uid_t uid, gid_t gid)
 {
 	struct proc *p;
 
 	if (error < 10000 || error >= NFSERR_STALEWRITEVERF)
 		return (error);
 	if (td != NULL)
 		p = td->td_proc;
 	else
 		p = NULL;
 	switch (error) {
 	case NFSERR_BADOWNER:
 		tprintf(p, LOG_INFO,
 		    "No name and/or group mapping for uid,gid:(%d,%d)\n",
 		    uid, gid);
 		return (EPERM);
 	case NFSERR_BADNAME:
 	case NFSERR_BADCHAR:
 		printf("nfsv4 char/name not handled by server\n");
 		return (ENOENT);
 	case NFSERR_STALECLIENTID:
 	case NFSERR_STALESTATEID:
 	case NFSERR_EXPIRED:
 	case NFSERR_BADSTATEID:
 	case NFSERR_BADSESSION:
 		printf("nfsv4 recover err returned %d\n", error);
 		return (EIO);
 	case NFSERR_BADHANDLE:
 	case NFSERR_SERVERFAULT:
 	case NFSERR_BADTYPE:
 	case NFSERR_FHEXPIRED:
 	case NFSERR_RESOURCE:
 	case NFSERR_MOVED:
 	case NFSERR_NOFILEHANDLE:
 	case NFSERR_MINORVERMISMATCH:
 	case NFSERR_OLDSTATEID:
 	case NFSERR_BADSEQID:
 	case NFSERR_LEASEMOVED:
 	case NFSERR_RECLAIMBAD:
 	case NFSERR_BADXDR:
 	case NFSERR_OPILLEGAL:
 		printf("nfsv4 client/server protocol prob err=%d\n",
 		    error);
 		return (EIO);
 	default:
 		tprintf(p, LOG_INFO, "nfsv4 err=%d\n", error);
 		return (EIO);
 	};
 }
 
 /*
  * Check to see if the process for this owner exists. Return 1 if it doesn't
  * and 0 otherwise.
  */
 int
 nfscl_procdoesntexist(u_int8_t *own)
 {
 	union {
 		u_int32_t	lval;
 		u_int8_t	cval[4];
 	} tl;
 	struct proc *p;
 	pid_t pid;
 	int i, ret = 0;
 
 	/* For the single open_owner of all 0 bytes, just return 0. */
 	for (i = 0; i < NFSV4CL_LOCKNAMELEN; i++)
 		if (own[i] != 0)
 			break;
 	if (i == NFSV4CL_LOCKNAMELEN)
 		return (0);
 
 	tl.cval[0] = *own++;
 	tl.cval[1] = *own++;
 	tl.cval[2] = *own++;
 	tl.cval[3] = *own++;
 	pid = tl.lval;
 	p = pfind_any_locked(pid);
 	if (p == NULL)
 		return (1);
 	if (p->p_stats == NULL) {
 		PROC_UNLOCK(p);
 		return (0);
 	}
 	tl.cval[0] = *own++;
 	tl.cval[1] = *own++;
 	tl.cval[2] = *own++;
 	tl.cval[3] = *own++;
 	if (tl.lval != p->p_stats->p_start.tv_sec) {
 		ret = 1;
 	} else {
 		tl.cval[0] = *own++;
 		tl.cval[1] = *own++;
 		tl.cval[2] = *own++;
 		tl.cval[3] = *own;
 		if (tl.lval != p->p_stats->p_start.tv_usec)
 			ret = 1;
 	}
 	PROC_UNLOCK(p);
 	return (ret);
 }
 
 /*
  * - nfs pseudo system call for the client
  */
 /*
  * MPSAFE
  */
 static int
 nfssvc_nfscl(struct thread *td, struct nfssvc_args *uap)
 {
 	struct file *fp;
 	struct nfscbd_args nfscbdarg;
 	struct nfsd_nfscbd_args nfscbdarg2;
 	struct nameidata nd;
 	struct nfscl_dumpmntopts dumpmntopts;
 	cap_rights_t rights;
 	char *buf;
 	int error;
 	struct mount *mp;
 	struct nfsmount *nmp;
 
 	if (uap->flag & NFSSVC_CBADDSOCK) {
 		error = copyin(uap->argp, (caddr_t)&nfscbdarg, sizeof(nfscbdarg));
 		if (error)
 			return (error);
 		/*
 		 * Since we don't know what rights might be required,
 		 * pretend that we need them all. It is better to be too
 		 * careful than too reckless.
 		 */
 		error = fget(td, nfscbdarg.sock,
 		    cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
 		if (error)
 			return (error);
 		if (fp->f_type != DTYPE_SOCKET) {
 			fdrop(fp, td);
 			return (EPERM);
 		}
 		error = nfscbd_addsock(fp);
 		fdrop(fp, td);
 		if (!error && nfscl_enablecallb == 0) {
 			nfsv4_cbport = nfscbdarg.port;
 			nfscl_enablecallb = 1;
 		}
 	} else if (uap->flag & NFSSVC_NFSCBD) {
 		if (uap->argp == NULL) 
 			return (EINVAL);
 		error = copyin(uap->argp, (caddr_t)&nfscbdarg2,
 		    sizeof(nfscbdarg2));
 		if (error)
 			return (error);
 		error = nfscbd_nfsd(td, &nfscbdarg2);
 	} else if (uap->flag & NFSSVC_DUMPMNTOPTS) {
 		error = copyin(uap->argp, &dumpmntopts, sizeof(dumpmntopts));
 		if (error == 0 && (dumpmntopts.ndmnt_blen < 256 ||
 		    dumpmntopts.ndmnt_blen > 1024))
 			error = EINVAL;
 		if (error == 0)
 			error = nfsrv_lookupfilename(&nd,
 			    dumpmntopts.ndmnt_fname, td);
 		if (error == 0 && strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name,
 		    "nfs") != 0) {
 			vput(nd.ni_vp);
 			error = EINVAL;
 		}
 		if (error == 0) {
 			buf = malloc(dumpmntopts.ndmnt_blen, M_TEMP, M_WAITOK);
 			nfscl_retopts(VFSTONFS(nd.ni_vp->v_mount), buf,
 			    dumpmntopts.ndmnt_blen);
 			vput(nd.ni_vp);
 			error = copyout(buf, dumpmntopts.ndmnt_buf,
 			    dumpmntopts.ndmnt_blen);
 			free(buf, M_TEMP);
 		}
 	} else if (uap->flag & NFSSVC_FORCEDISM) {
 		buf = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
 		error = copyinstr(uap->argp, buf, MNAMELEN + 1, NULL);
 		if (error == 0) {
 			nmp = NULL;
 			mtx_lock(&mountlist_mtx);
 			TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 				if (strcmp(mp->mnt_stat.f_mntonname, buf) ==
 				    0 && strcmp(mp->mnt_stat.f_fstypename,
 				    "nfs") == 0 && mp->mnt_data != NULL) {
 					nmp = VFSTONFS(mp);
 					NFSDDSLOCK();
 					if (nfsv4_findmirror(nmp) != NULL) {
 						NFSDDSUNLOCK();
 						error = ENXIO;
 						nmp = NULL;
 						break;
 					}
 					mtx_lock(&nmp->nm_mtx);
 					if ((nmp->nm_privflag &
 					    NFSMNTP_FORCEDISM) == 0) {
 						nmp->nm_privflag |= 
 						   (NFSMNTP_FORCEDISM |
 						    NFSMNTP_CANCELRPCS);
 						mtx_unlock(&nmp->nm_mtx);
 					} else {
 						mtx_unlock(&nmp->nm_mtx);
 						nmp = NULL;
 					}
 					NFSDDSUNLOCK();
 					break;
 				}
 			}
 			mtx_unlock(&mountlist_mtx);
 
 			if (nmp != NULL) {
 				/*
 				 * Call newnfs_nmcancelreqs() to cause
 				 * any RPCs in progress on the mount point to
 				 * fail.
 				 * This will cause any process waiting for an
 				 * RPC to complete while holding a vnode lock
 				 * on the mounted-on vnode (such as "df" or
 				 * a non-forced "umount") to fail.
 				 * This will unlock the mounted-on vnode so
 				 * a forced dismount can succeed.
 				 * Then clear NFSMNTP_CANCELRPCS and wakeup(),
 				 * so that nfs_unmount() can complete.
 				 */
 				newnfs_nmcancelreqs(nmp);
 				mtx_lock(&nmp->nm_mtx);
 				nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 				wakeup(nmp);
 				mtx_unlock(&nmp->nm_mtx);
 			} else if (error == 0)
 				error = EINVAL;
 		}
 		free(buf, M_TEMP);
 	} else {
 		error = EINVAL;
 	}
 	return (error);
 }
 
 extern int (*nfsd_call_nfscl)(struct thread *, struct nfssvc_args *);
 
 /*
  * Called once to initialize data structures...
  */
 static int
 nfscl_modevent(module_t mod, int type, void *data)
 {
 	int error = 0;
 	static int loaded = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		if (loaded)
 			return (0);
 		newnfs_portinit();
 		mtx_init(&ncl_iod_mutex, "ncl_iod_mutex", NULL, MTX_DEF);
 		nfscl_init();
 		NFSD_LOCK();
 		nfsrvd_cbinit(0);
 		NFSD_UNLOCK();
 		ncl_call_invalcaches = ncl_invalcaches;
 		nfsd_call_nfscl = nfssvc_nfscl;
 		loaded = 1;
 		break;
 
 	case MOD_UNLOAD:
 		if (nfs_numnfscbd != 0) {
 			error = EBUSY;
 			break;
 		}
 
 		/*
 		 * XXX: Unloading of nfscl module is unsupported.
 		 */
 #if 0
 		ncl_call_invalcaches = NULL;
 		nfsd_call_nfscl = NULL;
 		uma_zdestroy(ncl_pbuf_zone);
 		/* and get rid of the mutexes */
 		mtx_destroy(&ncl_iod_mutex);
 		loaded = 0;
 		break;
 #else
 		/* FALLTHROUGH */
 #endif
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return error;
 }
 static moduledata_t nfscl_mod = {
 	"nfscl",
 	nfscl_modevent,
 	NULL,
 };
 DECLARE_MODULE(nfscl, nfscl_mod, SI_SUB_VFS, SI_ORDER_FIRST);
 
 /* So that loader and kldload(2) can find us, wherever we are.. */
 MODULE_VERSION(nfscl, 1);
 MODULE_DEPEND(nfscl, nfscommon, 1, 1, 1);
 MODULE_DEPEND(nfscl, krpc, 1, 1, 1);
 MODULE_DEPEND(nfscl, nfssvc, 1, 1, 1);
 MODULE_DEPEND(nfscl, nfslock, 1, 1, 1);
 
Index: projects/clang900-import/sys/kern/kern_sysctl.c
===================================================================
--- projects/clang900-import/sys/kern/kern_sysctl.c	(revision 352536)
+++ projects/clang900-import/sys/kern/kern_sysctl.c	(revision 352537)
@@ -1,2838 +1,2843 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Mike Karels at Berkeley Software Design, Inc.
  *
  * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD
  * project, to make these variables more userfriendly.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_sysctl.c	8.4 (Berkeley) 4/14/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_ddb.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/fail.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
 #include <sys/kdb.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/rmlock.h>
 #include <sys/sbuf.h>
 #include <sys/sx.h>
 #include <sys/sysproto.h>
 #include <sys/uio.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #include <ddb/db_lex.h>
 #endif
 
 #include <net/vnet.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic");
 static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids");
 static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer");
 
 /*
  * The sysctllock protects the MIB tree.  It also protects sysctl
  * contexts used with dynamic sysctls.  The sysctl_register_oid() and
  * sysctl_unregister_oid() routines require the sysctllock to already
  * be held, so the sysctl_wlock() and sysctl_wunlock() routines are
  * provided for the few places in the kernel which need to use that
  * API rather than using the dynamic API.  Use of the dynamic API is
  * strongly encouraged for most code.
  *
  * The sysctlmemlock is used to limit the amount of user memory wired for
  * sysctl requests.  This is implemented by serializing any userland
  * sysctl requests larger than a single page via an exclusive lock.
  */
 static struct rmlock sysctllock;
 static struct sx __exclusive_cache_line sysctlmemlock;
 
 #define	SYSCTL_WLOCK()		rm_wlock(&sysctllock)
 #define	SYSCTL_WUNLOCK()	rm_wunlock(&sysctllock)
 #define	SYSCTL_RLOCK(tracker)	rm_rlock(&sysctllock, (tracker))
 #define	SYSCTL_RUNLOCK(tracker)	rm_runlock(&sysctllock, (tracker))
 #define	SYSCTL_WLOCKED()	rm_wowned(&sysctllock)
 #define	SYSCTL_ASSERT_LOCKED()	rm_assert(&sysctllock, RA_LOCKED)
 #define	SYSCTL_ASSERT_WLOCKED()	rm_assert(&sysctllock, RA_WLOCKED)
 #define	SYSCTL_ASSERT_RLOCKED()	rm_assert(&sysctllock, RA_RLOCKED)
 #define	SYSCTL_INIT()		rm_init_flags(&sysctllock, "sysctl lock", \
 				    RM_SLEEPABLE)
 #define	SYSCTL_SLEEP(ch, wmesg, timo)					\
 				rm_sleep(ch, &sysctllock, 0, wmesg, timo)
 
 static int sysctl_root(SYSCTL_HANDLER_ARGS);
 
 /* Root list */
 struct sysctl_oid_list sysctl__children = SLIST_HEAD_INITIALIZER(&sysctl__children);
 
 static int	sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del,
 		    int recurse);
 static int	sysctl_old_kernel(struct sysctl_req *, const void *, size_t);
 static int	sysctl_new_kernel(struct sysctl_req *, void *, size_t);
 
 static struct sysctl_oid *
 sysctl_find_oidname(const char *name, struct sysctl_oid_list *list)
 {
 	struct sysctl_oid *oidp;
 
 	SYSCTL_ASSERT_LOCKED();
 	SLIST_FOREACH(oidp, list, oid_link) {
 		if (strcmp(oidp->oid_name, name) == 0) {
 			return (oidp);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Initialization of the MIB tree.
  *
  * Order by number in each list.
  */
 void
 sysctl_wlock(void)
 {
 
 	SYSCTL_WLOCK();
 }
 
 void
 sysctl_wunlock(void)
 {
 
 	SYSCTL_WUNLOCK();
 }
 
 static int
 sysctl_root_handler_locked(struct sysctl_oid *oid, void *arg1, intmax_t arg2,
     struct sysctl_req *req, struct rm_priotracker *tracker)
 {
 	int error;
 
 	if (oid->oid_kind & CTLFLAG_DYN)
 		atomic_add_int(&oid->oid_running, 1);
 
 	if (tracker != NULL)
 		SYSCTL_RUNLOCK(tracker);
 	else
 		SYSCTL_WUNLOCK();
 
 	if (!(oid->oid_kind & CTLFLAG_MPSAFE))
 		mtx_lock(&Giant);
 	error = oid->oid_handler(oid, arg1, arg2, req);
 	if (!(oid->oid_kind & CTLFLAG_MPSAFE))
 		mtx_unlock(&Giant);
 
 	KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error);
 
 	if (tracker != NULL)
 		SYSCTL_RLOCK(tracker);
 	else
 		SYSCTL_WLOCK();
 
 	if (oid->oid_kind & CTLFLAG_DYN) {
 		if (atomic_fetchadd_int(&oid->oid_running, -1) == 1 &&
 		    (oid->oid_kind & CTLFLAG_DYING) != 0)
 			wakeup(&oid->oid_running);
 	}
 
 	return (error);
 }
 
 static void
 sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp)
 {
 	struct sysctl_req req;
 	struct sysctl_oid *curr;
 	char *penv = NULL;
 	char path[96];
 	ssize_t rem = sizeof(path);
 	ssize_t len;
 	uint8_t data[512] __aligned(sizeof(uint64_t));
 	int size;
 	int error;
 
 	path[--rem] = 0;
 
 	for (curr = oidp; curr != NULL; curr = SYSCTL_PARENT(curr)) {
 		len = strlen(curr->oid_name);
 		rem -= len;
 		if (curr != oidp)
 			rem -= 1;
 		if (rem < 0) {
 			printf("OID path exceeds %d bytes\n", (int)sizeof(path));
 			return;
 		}
 		memcpy(path + rem, curr->oid_name, len);
 		if (curr != oidp)
 			path[rem + len] = '.';
 	}
 
 	memset(&req, 0, sizeof(req));
 
 	req.td = curthread;
 	req.oldfunc = sysctl_old_kernel;
 	req.newfunc = sysctl_new_kernel;
 	req.lock = REQ_UNWIRED;
 
 	switch (oidp->oid_kind & CTLTYPE) {
 	case CTLTYPE_INT:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(int), GETENV_SIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_UINT:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(int), GETENV_UNSIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_LONG:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(long), GETENV_SIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_ULONG:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(long), GETENV_UNSIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_S8:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(int8_t), GETENV_SIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_S16:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(int16_t), GETENV_SIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_S32:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(int32_t), GETENV_SIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_S64:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(int64_t), GETENV_SIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_U8:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(uint8_t), GETENV_UNSIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_U16:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(uint16_t), GETENV_UNSIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_U32:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(uint32_t), GETENV_UNSIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_U64:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(uint64_t), GETENV_UNSIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_STRING:
 		penv = kern_getenv(path + rem);
 		if (penv == NULL)
 			return;
 		req.newlen = strlen(penv);
 		req.newptr = penv;
 		break;
 	default:
 		return;
 	}
 	error = sysctl_root_handler_locked(oidp, oidp->oid_arg1,
 	    oidp->oid_arg2, &req, NULL);
 	if (error != 0)
 		printf("Setting sysctl %s failed: %d\n", path + rem, error);
 	if (penv != NULL)
 		freeenv(penv);
 }
 
 /*
  * Locate the path to a given oid.  Returns the length of the resulting path,
  * or -1 if the oid was not found.  nodes must have room for CTL_MAXNAME
  * elements and be NULL initialized.
  */
 static int
 sysctl_search_oid(struct sysctl_oid **nodes, struct sysctl_oid *needle)
 {
 	int indx;
 
 	SYSCTL_ASSERT_LOCKED();
 	indx = 0;
 	while (indx < CTL_MAXNAME && indx >= 0) {
 		if (nodes[indx] == NULL && indx == 0)
 			nodes[indx] = SLIST_FIRST(&sysctl__children);
 		else if (nodes[indx] == NULL)
 			nodes[indx] = SLIST_FIRST(&nodes[indx - 1]->oid_children);
 		else
 			nodes[indx] = SLIST_NEXT(nodes[indx], oid_link);
 
 		if (nodes[indx] == needle)
 			return (indx + 1);
 
 		if (nodes[indx] == NULL) {
 			indx--;
 			continue;
 		}
 
 		if ((nodes[indx]->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 			indx++;
 			continue;
 		}
 	}
 	return (-1);
 }
 
 static void
 sysctl_warn_reuse(const char *func, struct sysctl_oid *leaf)
 {
 	struct sysctl_oid *nodes[CTL_MAXNAME];
 	char buf[128];
 	struct sbuf sb;
 	int rc, i;
 
 	(void)sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN | SBUF_INCLUDENUL);
 	sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
 
 	sbuf_printf(&sb, "%s: can't re-use a leaf (", __func__);
 
 	memset(nodes, 0, sizeof(nodes));
 	rc = sysctl_search_oid(nodes, leaf);
 	if (rc > 0) {
 		for (i = 0; i < rc; i++)
 			sbuf_printf(&sb, "%s%.*s", nodes[i]->oid_name,
 			    i != (rc - 1), ".");
 	} else {
 		sbuf_printf(&sb, "%s", leaf->oid_name);
 	}
 	sbuf_printf(&sb, ")!\n");
 
 	(void)sbuf_finish(&sb);
 }
 
 #ifdef SYSCTL_DEBUG
 static int
 sysctl_reuse_test(SYSCTL_HANDLER_ARGS)
 {
 	struct rm_priotracker tracker;
 
 	SYSCTL_RLOCK(&tracker);
 	sysctl_warn_reuse(__func__, oidp);
 	SYSCTL_RUNLOCK(&tracker);
 	return (0);
 }
 SYSCTL_PROC(_sysctl, 0, reuse_test, CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE,
 	0, 0, sysctl_reuse_test, "-", "");
 #endif
 
 void
 sysctl_register_oid(struct sysctl_oid *oidp)
 {
 	struct sysctl_oid_list *parent = oidp->oid_parent;
 	struct sysctl_oid *p;
 	struct sysctl_oid *q;
 	int oid_number;
 	int timeout = 2;
 
 	/*
 	 * First check if another oid with the same name already
 	 * exists in the parent's list.
 	 */
 	SYSCTL_ASSERT_WLOCKED();
 	p = sysctl_find_oidname(oidp->oid_name, parent);
 	if (p != NULL) {
 		if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 			p->oid_refcnt++;
 			return;
 		} else {
 			sysctl_warn_reuse(__func__, p);
 			return;
 		}
 	}
 	/* get current OID number */
 	oid_number = oidp->oid_number;
 
 #if (OID_AUTO >= 0)
 #error "OID_AUTO is expected to be a negative value"
 #endif	
 	/*
 	 * Any negative OID number qualifies as OID_AUTO. Valid OID
 	 * numbers should always be positive.
 	 *
 	 * NOTE: DO NOT change the starting value here, change it in
 	 * <sys/sysctl.h>, and make sure it is at least 256 to
 	 * accommodate e.g. net.inet.raw as a static sysctl node.
 	 */
 	if (oid_number < 0) {
 		static int newoid;
 
 		/*
 		 * By decrementing the next OID number we spend less
 		 * time inserting the OIDs into a sorted list.
 		 */
 		if (--newoid < CTL_AUTO_START)
 			newoid = 0x7fffffff;
 
 		oid_number = newoid;
 	}
 
 	/*
 	 * Insert the OID into the parent's list sorted by OID number.
 	 */
 retry:
 	q = NULL;
 	SLIST_FOREACH(p, parent, oid_link) {
 		/* check if the current OID number is in use */
 		if (oid_number == p->oid_number) {
 			/* get the next valid OID number */
 			if (oid_number < CTL_AUTO_START ||
 			    oid_number == 0x7fffffff) {
 				/* wraparound - restart */
 				oid_number = CTL_AUTO_START;
 				/* don't loop forever */
 				if (!timeout--)
 					panic("sysctl: Out of OID numbers\n");
 				goto retry;
 			} else {
 				oid_number++;
 			}
 		} else if (oid_number < p->oid_number)
 			break;
 		q = p;
 	}
 	/* check for non-auto OID number collision */
 	if (oidp->oid_number >= 0 && oidp->oid_number < CTL_AUTO_START &&
 	    oid_number >= CTL_AUTO_START) {
 		printf("sysctl: OID number(%d) is already in use for '%s'\n",
 		    oidp->oid_number, oidp->oid_name);
 	}
 	/* update the OID number, if any */
 	oidp->oid_number = oid_number;
 	if (q != NULL)
 		SLIST_INSERT_AFTER(q, oidp, oid_link);
 	else
 		SLIST_INSERT_HEAD(parent, oidp, oid_link);
 
 	if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE &&
 #ifdef VIMAGE
 	    (oidp->oid_kind & CTLFLAG_VNET) == 0 &&
 #endif
 	    (oidp->oid_kind & CTLFLAG_TUN) != 0 &&
 	    (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) {
 		/* only fetch value once */
 		oidp->oid_kind |= CTLFLAG_NOFETCH;
 		/* try to fetch value from kernel environment */
 		sysctl_load_tunable_by_oid_locked(oidp);
 	}
 }
 
 void
 sysctl_register_disabled_oid(struct sysctl_oid *oidp)
 {
 
 	/*
 	 * Mark the leaf as dormant if it's not to be immediately enabled.
 	 * We do not disable nodes as they can be shared between modules
 	 * and it is always safe to access a node.
 	 */
 	KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) == 0,
 	    ("internal flag is set in oid_kind"));
 	if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
 		oidp->oid_kind |= CTLFLAG_DORMANT;
 	sysctl_register_oid(oidp);
 }
 
 void
 sysctl_enable_oid(struct sysctl_oid *oidp)
 {
 
 	SYSCTL_ASSERT_WLOCKED();
 	if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 		KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) == 0,
 		    ("sysctl node is marked as dormant"));
 		return;
 	}
 	KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) != 0,
 	    ("enabling already enabled sysctl oid"));
 	oidp->oid_kind &= ~CTLFLAG_DORMANT;
 }
 
 void
 sysctl_unregister_oid(struct sysctl_oid *oidp)
 {
 	struct sysctl_oid *p;
 	int error;
 
 	SYSCTL_ASSERT_WLOCKED();
 	if (oidp->oid_number == OID_AUTO) {
 		error = EINVAL;
 	} else {
 		error = ENOENT;
 		SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
 			if (p == oidp) {
 				SLIST_REMOVE(oidp->oid_parent, oidp,
 				    sysctl_oid, oid_link);
 				error = 0;
 				break;
 			}
 		}
 	}
 
 	/* 
 	 * This can happen when a module fails to register and is
 	 * being unloaded afterwards.  It should not be a panic()
 	 * for normal use.
 	 */
 	if (error) {
 		printf("%s: failed(%d) to unregister sysctl(%s)\n",
 		    __func__, error, oidp->oid_name);
 	}
 }
 
 /* Initialize a new context to keep track of dynamically added sysctls. */
 int
 sysctl_ctx_init(struct sysctl_ctx_list *c)
 {
 
 	if (c == NULL) {
 		return (EINVAL);
 	}
 
 	/*
 	 * No locking here, the caller is responsible for not adding
 	 * new nodes to a context until after this function has
 	 * returned.
 	 */
 	TAILQ_INIT(c);
 	return (0);
 }
 
 /* Free the context, and destroy all dynamic oids registered in this context */
 int
 sysctl_ctx_free(struct sysctl_ctx_list *clist)
 {
 	struct sysctl_ctx_entry *e, *e1;
 	int error;
 
 	error = 0;
 	/*
 	 * First perform a "dry run" to check if it's ok to remove oids.
 	 * XXX FIXME
 	 * XXX This algorithm is a hack. But I don't know any
 	 * XXX better solution for now...
 	 */
 	SYSCTL_WLOCK();
 	TAILQ_FOREACH(e, clist, link) {
 		error = sysctl_remove_oid_locked(e->entry, 0, 0);
 		if (error)
 			break;
 	}
 	/*
 	 * Restore deregistered entries, either from the end,
 	 * or from the place where error occurred.
 	 * e contains the entry that was not unregistered
 	 */
 	if (error)
 		e1 = TAILQ_PREV(e, sysctl_ctx_list, link);
 	else
 		e1 = TAILQ_LAST(clist, sysctl_ctx_list);
 	while (e1 != NULL) {
 		sysctl_register_oid(e1->entry);
 		e1 = TAILQ_PREV(e1, sysctl_ctx_list, link);
 	}
 	if (error) {
 		SYSCTL_WUNLOCK();
 		return(EBUSY);
 	}
 	/* Now really delete the entries */
 	e = TAILQ_FIRST(clist);
 	while (e != NULL) {
 		e1 = TAILQ_NEXT(e, link);
 		error = sysctl_remove_oid_locked(e->entry, 1, 0);
 		if (error)
 			panic("sysctl_remove_oid: corrupt tree, entry: %s",
 			    e->entry->oid_name);
 		free(e, M_SYSCTLOID);
 		e = e1;
 	}
 	SYSCTL_WUNLOCK();
 	return (error);
 }
 
 /* Add an entry to the context */
 struct sysctl_ctx_entry *
 sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
 {
 	struct sysctl_ctx_entry *e;
 
 	SYSCTL_ASSERT_WLOCKED();
 	if (clist == NULL || oidp == NULL)
 		return(NULL);
 	e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK);
 	e->entry = oidp;
 	TAILQ_INSERT_HEAD(clist, e, link);
 	return (e);
 }
 
 /* Find an entry in the context */
 struct sysctl_ctx_entry *
 sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
 {
 	struct sysctl_ctx_entry *e;
 
 	SYSCTL_ASSERT_WLOCKED();
 	if (clist == NULL || oidp == NULL)
 		return(NULL);
 	TAILQ_FOREACH(e, clist, link) {
 		if(e->entry == oidp)
 			return(e);
 	}
 	return (e);
 }
 
 /*
  * Delete an entry from the context.
  * NOTE: this function doesn't free oidp! You have to remove it
  * with sysctl_remove_oid().
  */
 int
 sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
 {
 	struct sysctl_ctx_entry *e;
 
 	if (clist == NULL || oidp == NULL)
 		return (EINVAL);
 	SYSCTL_WLOCK();
 	e = sysctl_ctx_entry_find(clist, oidp);
 	if (e != NULL) {
 		TAILQ_REMOVE(clist, e, link);
 		SYSCTL_WUNLOCK();
 		free(e, M_SYSCTLOID);
 		return (0);
 	} else {
 		SYSCTL_WUNLOCK();
 		return (ENOENT);
 	}
 }
 
 /*
  * Remove dynamically created sysctl trees.
  * oidp - top of the tree to be removed
  * del - if 0 - just deregister, otherwise free up entries as well
  * recurse - if != 0 traverse the subtree to be deleted
  */
 int
 sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse)
 {
 	int error;
 
 	SYSCTL_WLOCK();
 	error = sysctl_remove_oid_locked(oidp, del, recurse);
 	SYSCTL_WUNLOCK();
 	return (error);
 }
 
 int
 sysctl_remove_name(struct sysctl_oid *parent, const char *name,
     int del, int recurse)
 {
 	struct sysctl_oid *p, *tmp;
 	int error;
 
 	error = ENOENT;
 	SYSCTL_WLOCK();
 	SLIST_FOREACH_SAFE(p, SYSCTL_CHILDREN(parent), oid_link, tmp) {
 		if (strcmp(p->oid_name, name) == 0) {
 			error = sysctl_remove_oid_locked(p, del, recurse);
 			break;
 		}
 	}
 	SYSCTL_WUNLOCK();
 
 	return (error);
 }
 
 
 static int
 sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse)
 {
 	struct sysctl_oid *p, *tmp;
 	int error;
 
 	SYSCTL_ASSERT_WLOCKED();
 	if (oidp == NULL)
 		return(EINVAL);
 	if ((oidp->oid_kind & CTLFLAG_DYN) == 0) {
 		printf("Warning: can't remove non-dynamic nodes (%s)!\n",
 		    oidp->oid_name);
 		return (EINVAL);
 	}
 	/*
 	 * WARNING: normal method to do this should be through
 	 * sysctl_ctx_free(). Use recursing as the last resort
 	 * method to purge your sysctl tree of leftovers...
 	 * However, if some other code still references these nodes,
 	 * it will panic.
 	 */
 	if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 		if (oidp->oid_refcnt == 1) {
 			SLIST_FOREACH_SAFE(p,
 			    SYSCTL_CHILDREN(oidp), oid_link, tmp) {
 				if (!recurse) {
 					printf("Warning: failed attempt to "
 					    "remove oid %s with child %s\n",
 					    oidp->oid_name, p->oid_name);
 					return (ENOTEMPTY);
 				}
 				error = sysctl_remove_oid_locked(p, del,
 				    recurse);
 				if (error)
 					return (error);
 			}
 		}
 	}
 	if (oidp->oid_refcnt > 1 ) {
 		oidp->oid_refcnt--;
 	} else {
 		if (oidp->oid_refcnt == 0) {
 			printf("Warning: bad oid_refcnt=%u (%s)!\n",
 				oidp->oid_refcnt, oidp->oid_name);
 			return (EINVAL);
 		}
 		sysctl_unregister_oid(oidp);
 		if (del) {
 			/*
 			 * Wait for all threads running the handler to drain.
 			 * This preserves the previous behavior when the
 			 * sysctl lock was held across a handler invocation,
 			 * and is necessary for module unload correctness.
 			 */
 			while (oidp->oid_running > 0) {
 				oidp->oid_kind |= CTLFLAG_DYING;
 				SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0);
 			}
 			if (oidp->oid_descr)
 				free(__DECONST(char *, oidp->oid_descr),
 				    M_SYSCTLOID);
 			if (oidp->oid_label)
 				free(__DECONST(char *, oidp->oid_label),
 				    M_SYSCTLOID);
 			free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID);
 			free(oidp, M_SYSCTLOID);
 		}
 	}
 	return (0);
 }
 /*
  * Create new sysctls at run time.
  * clist may point to a valid context initialized with sysctl_ctx_init().
  */
 struct sysctl_oid *
 sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
 	int number, const char *name, int kind, void *arg1, intmax_t arg2,
 	int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr,
 	const char *label)
 {
 	struct sysctl_oid *oidp;
 
 	/* You have to hook up somewhere.. */
 	if (parent == NULL)
 		return(NULL);
 	/* Check if the node already exists, otherwise create it */
 	SYSCTL_WLOCK();
 	oidp = sysctl_find_oidname(name, parent);
 	if (oidp != NULL) {
 		if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 			oidp->oid_refcnt++;
 			/* Update the context */
 			if (clist != NULL)
 				sysctl_ctx_entry_add(clist, oidp);
 			SYSCTL_WUNLOCK();
 			return (oidp);
 		} else {
 			sysctl_warn_reuse(__func__, oidp);
 			SYSCTL_WUNLOCK();
 			return (NULL);
 		}
 	}
 	oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO);
 	oidp->oid_parent = parent;
 	SLIST_INIT(&oidp->oid_children);
 	oidp->oid_number = number;
 	oidp->oid_refcnt = 1;
 	oidp->oid_name = strdup(name, M_SYSCTLOID);
 	oidp->oid_handler = handler;
 	oidp->oid_kind = CTLFLAG_DYN | kind;
 	oidp->oid_arg1 = arg1;
 	oidp->oid_arg2 = arg2;
 	oidp->oid_fmt = fmt;
 	if (descr != NULL)
 		oidp->oid_descr = strdup(descr, M_SYSCTLOID);
 	if (label != NULL)
 		oidp->oid_label = strdup(label, M_SYSCTLOID);
 	/* Update the context, if used */
 	if (clist != NULL)
 		sysctl_ctx_entry_add(clist, oidp);
 	/* Register this oid */
 	sysctl_register_oid(oidp);
 	SYSCTL_WUNLOCK();
 	return (oidp);
 }
 
 /*
  * Rename an existing oid.
  */
 void
 sysctl_rename_oid(struct sysctl_oid *oidp, const char *name)
 {
 	char *newname;
 	char *oldname;
 
 	newname = strdup(name, M_SYSCTLOID);
 	SYSCTL_WLOCK();
 	oldname = __DECONST(char *, oidp->oid_name);
 	oidp->oid_name = newname;
 	SYSCTL_WUNLOCK();
 	free(oldname, M_SYSCTLOID);
 }
 
 /*
  * Reparent an existing oid.
  */
 int
 sysctl_move_oid(struct sysctl_oid *oid, struct sysctl_oid_list *parent)
 {
 	struct sysctl_oid *oidp;
 
 	SYSCTL_WLOCK();
 	if (oid->oid_parent == parent) {
 		SYSCTL_WUNLOCK();
 		return (0);
 	}
 	oidp = sysctl_find_oidname(oid->oid_name, parent);
 	if (oidp != NULL) {
 		SYSCTL_WUNLOCK();
 		return (EEXIST);
 	}
 	sysctl_unregister_oid(oid);
 	oid->oid_parent = parent;
 	oid->oid_number = OID_AUTO;
 	sysctl_register_oid(oid);
 	SYSCTL_WUNLOCK();
 	return (0);
 }
 
 /*
  * Register the kernel's oids on startup.
  */
 SET_DECLARE(sysctl_set, struct sysctl_oid);
 
 static void
 sysctl_register_all(void *arg)
 {
 	struct sysctl_oid **oidp;
 
 	sx_init(&sysctlmemlock, "sysctl mem");
 	SYSCTL_INIT();
 	SYSCTL_WLOCK();
 	SET_FOREACH(oidp, sysctl_set)
 		sysctl_register_oid(*oidp);
 	SYSCTL_WUNLOCK();
 }
 SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, NULL);
 
 /*
  * "Staff-functions"
  *
  * These functions implement a presently undocumented interface 
  * used by the sysctl program to walk the tree, and get the type
  * so it can print the value.
  * This interface is under work and consideration, and should probably
  * be killed with a big axe by the first person who can find the time.
  * (be aware though, that the proper interface isn't as obvious as it
  * may seem, there are various conflicting requirements.
  *
- * {0,0}	printf the entire MIB-tree.
- * {0,1,...}	return the name of the "..." OID.
- * {0,2,...}	return the next OID.
- * {0,3}	return the OID of the name in "new"
- * {0,4,...}	return the kind & format info for the "..." OID.
- * {0,5,...}	return the description of the "..." OID.
- * {0,6,...}	return the aggregation label of the "..." OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_DEBUG}		printf the entire MIB-tree.
+ * {CTL_SYSCTL, CTL_SYSCTL_NAME, ...}		return the name of the "..."
+ *						OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_NEXT, ...}		return the next OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_NAME2OID}		return the OID of the name in
+ *						"new"
+ * {CTL_SYSCTL, CTL_SYSCTL_OIDFMT, ...}		return the kind & format info
+ *						for the "..." OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_OIDDESCR, ...}	return the description of the
+ *						"..." OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_OIDLABEL, ...}	return the aggregation label of
+ *						the "..." OID.
  */
 
 #ifdef SYSCTL_DEBUG
 static void
 sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
 {
 	int k;
 	struct sysctl_oid *oidp;
 
 	SYSCTL_ASSERT_LOCKED();
 	SLIST_FOREACH(oidp, l, oid_link) {
 
 		for (k=0; k<i; k++)
 			printf(" ");
 
 		printf("%d %s ", oidp->oid_number, oidp->oid_name);
 
 		printf("%c%c",
 			oidp->oid_kind & CTLFLAG_RD ? 'R':' ',
 			oidp->oid_kind & CTLFLAG_WR ? 'W':' ');
 
 		if (oidp->oid_handler)
 			printf(" *Handler");
 
 		switch (oidp->oid_kind & CTLTYPE) {
 			case CTLTYPE_NODE:
 				printf(" Node\n");
 				if (!oidp->oid_handler) {
 					sysctl_sysctl_debug_dump_node(
 					    SYSCTL_CHILDREN(oidp), i + 2);
 				}
 				break;
 			case CTLTYPE_INT:    printf(" Int\n"); break;
 			case CTLTYPE_UINT:   printf(" u_int\n"); break;
 			case CTLTYPE_LONG:   printf(" Long\n"); break;
 			case CTLTYPE_ULONG:  printf(" u_long\n"); break;
 			case CTLTYPE_STRING: printf(" String\n"); break;
 			case CTLTYPE_S8:     printf(" int8_t\n"); break;
 			case CTLTYPE_S16:    printf(" int16_t\n"); break;
 			case CTLTYPE_S32:    printf(" int32_t\n"); break;
 			case CTLTYPE_S64:    printf(" int64_t\n"); break;
 			case CTLTYPE_U8:     printf(" uint8_t\n"); break;
 			case CTLTYPE_U16:    printf(" uint16_t\n"); break;
 			case CTLTYPE_U32:    printf(" uint32_t\n"); break;
 			case CTLTYPE_U64:    printf(" uint64_t\n"); break;
 			case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break;
 			default:	     printf("\n");
 		}
 
 	}
 }
 
 static int
 sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS)
 {
 	struct rm_priotracker tracker;
 	int error;
 
 	error = priv_check(req->td, PRIV_SYSCTL_DEBUG);
 	if (error)
 		return (error);
 	SYSCTL_RLOCK(&tracker);
 	sysctl_sysctl_debug_dump_node(&sysctl__children, 0);
 	SYSCTL_RUNLOCK(&tracker);
 	return (ENOENT);
 }
 
-SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE,
-	0, 0, sysctl_sysctl_debug, "-", "");
+SYSCTL_PROC(_sysctl, CTL_SYSCTL_DEBUG, debug, CTLTYPE_STRING | CTLFLAG_RD |
+    CTLFLAG_MPSAFE, 0, 0, sysctl_sysctl_debug, "-", "");
 #endif
 
 static int
 sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *) arg1;
 	u_int namelen = arg2;
 	int error = 0;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *lsp = &sysctl__children, *lsp2;
 	struct rm_priotracker tracker;
 	char buf[10];
 
 	SYSCTL_RLOCK(&tracker);
 	while (namelen) {
 		if (!lsp) {
 			snprintf(buf,sizeof(buf),"%d",*name);
 			if (req->oldidx)
 				error = SYSCTL_OUT(req, ".", 1);
 			if (!error)
 				error = SYSCTL_OUT(req, buf, strlen(buf));
 			if (error)
 				goto out;
 			namelen--;
 			name++;
 			continue;
 		}
 		lsp2 = NULL;
 		SLIST_FOREACH(oid, lsp, oid_link) {
 			if (oid->oid_number != *name)
 				continue;
 
 			if (req->oldidx)
 				error = SYSCTL_OUT(req, ".", 1);
 			if (!error)
 				error = SYSCTL_OUT(req, oid->oid_name,
 					strlen(oid->oid_name));
 			if (error)
 				goto out;
 
 			namelen--;
 			name++;
 
 			if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE) 
 				break;
 
 			if (oid->oid_handler)
 				break;
 
 			lsp2 = SYSCTL_CHILDREN(oid);
 			break;
 		}
 		lsp = lsp2;
 	}
 	error = SYSCTL_OUT(req, "", 1);
  out:
 	SYSCTL_RUNLOCK(&tracker);
 	return (error);
 }
 
 /*
  * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in
  * capability mode.
  */
-static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD,
-    sysctl_sysctl_name, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NAME, name, CTLFLAG_RD |
+    CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_name, "");
 
 static int
 sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen, 
 	int *next, int *len, int level, struct sysctl_oid **oidpp)
 {
 	struct sysctl_oid *oidp;
 
 	SYSCTL_ASSERT_LOCKED();
 	*len = level;
 	SLIST_FOREACH(oidp, lsp, oid_link) {
 		*next = oidp->oid_number;
 		*oidpp = oidp;
 
 		if ((oidp->oid_kind & (CTLFLAG_SKIP | CTLFLAG_DORMANT)) != 0)
 			continue;
 
 		if (!namelen) {
 			if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 
 				return (0);
 			if (oidp->oid_handler) 
 				/* We really should call the handler here...*/
 				return (0);
 			lsp = SYSCTL_CHILDREN(oidp);
 			if (!sysctl_sysctl_next_ls(lsp, 0, 0, next+1, 
 				len, level+1, oidpp))
 				return (0);
 			goto emptynode;
 		}
 
 		if (oidp->oid_number < *name)
 			continue;
 
 		if (oidp->oid_number > *name) {
 			if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
 				return (0);
 			if (oidp->oid_handler)
 				return (0);
 			lsp = SYSCTL_CHILDREN(oidp);
 			if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, 
 				next+1, len, level+1, oidpp))
 				return (0);
 			goto next;
 		}
 		if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
 			continue;
 
 		if (oidp->oid_handler)
 			continue;
 
 		lsp = SYSCTL_CHILDREN(oidp);
 		if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, next+1, 
 			len, level+1, oidpp))
 			return (0);
 	next:
 		namelen = 1;
 	emptynode:
 		*len = level;
 	}
 	return (1);
 }
 
 static int
 sysctl_sysctl_next(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *) arg1;
 	u_int namelen = arg2;
 	int i, j, error;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *lsp = &sysctl__children;
 	struct rm_priotracker tracker;
 	int newoid[CTL_MAXNAME];
 
 	SYSCTL_RLOCK(&tracker);
 	i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid);
 	SYSCTL_RUNLOCK(&tracker);
 	if (i)
 		return (ENOENT);
 	error = SYSCTL_OUT(req, newoid, j * sizeof (int));
 	return (error);
 }
 
 /*
  * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in
  * capability mode.
  */
-static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD,
-    sysctl_sysctl_next, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NEXT, next, CTLFLAG_RD |
+    CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_next, "");
 
 static int
 name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp)
 {
 	struct sysctl_oid *oidp;
 	struct sysctl_oid_list *lsp = &sysctl__children;
 	char *p;
 
 	SYSCTL_ASSERT_LOCKED();
 
 	for (*len = 0; *len < CTL_MAXNAME;) {
 		p = strsep(&name, ".");
 
 		oidp = SLIST_FIRST(lsp);
 		for (;; oidp = SLIST_NEXT(oidp, oid_link)) {
 			if (oidp == NULL)
 				return (ENOENT);
 			if (strcmp(p, oidp->oid_name) == 0)
 				break;
 		}
 		*oid++ = oidp->oid_number;
 		(*len)++;
 
 		if (name == NULL || *name == '\0') {
 			if (oidpp)
 				*oidpp = oidp;
 			return (0);
 		}
 
 		if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
 			break;
 
 		if (oidp->oid_handler)
 			break;
 
 		lsp = SYSCTL_CHILDREN(oidp);
 	}
 	return (ENOENT);
 }
 
 static int
 sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
 {
 	char *p;
 	int error, oid[CTL_MAXNAME], len = 0;
 	struct sysctl_oid *op = NULL;
 	struct rm_priotracker tracker;
 	char buf[32];
 
 	if (!req->newlen) 
 		return (ENOENT);
 	if (req->newlen >= MAXPATHLEN)	/* XXX arbitrary, undocumented */
 		return (ENAMETOOLONG);
 
 	p = buf;
 	if (req->newlen >= sizeof(buf))
 		p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK);
 
 	error = SYSCTL_IN(req, p, req->newlen);
 	if (error) {
 		if (p != buf)
 			free(p, M_SYSCTL);
 		return (error);
 	}
 
 	p [req->newlen] = '\0';
 
 	SYSCTL_RLOCK(&tracker);
 	error = name2oid(p, oid, &len, &op);
 	SYSCTL_RUNLOCK(&tracker);
 
 	if (p != buf)
 		free(p, M_SYSCTL);
 
 	if (error)
 		return (error);
 
 	error = SYSCTL_OUT(req, oid, len * sizeof *oid);
 	return (error);
 }
 
 /*
  * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in
  * capability mode.
  */
-SYSCTL_PROC(_sysctl, 3, name2oid,
-    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE
-    | CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", "");
+SYSCTL_PROC(_sysctl, CTL_SYSCTL_NAME2OID, name2oid, CTLTYPE_INT | CTLFLAG_RW |
+    CTLFLAG_ANYBODY | CTLFLAG_MPSAFE | CTLFLAG_CAPRW, 0, 0,
+    sysctl_sysctl_name2oid, "I", "");
 
 static int
 sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
 {
 	struct sysctl_oid *oid;
 	struct rm_priotracker tracker;
 	int error;
 
 	SYSCTL_RLOCK(&tracker);
 	error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
 	if (error)
 		goto out;
 
 	if (oid->oid_fmt == NULL) {
 		error = ENOENT;
 		goto out;
 	}
 	error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind));
 	if (error)
 		goto out;
 	error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1);
  out:
 	SYSCTL_RUNLOCK(&tracker);
 	return (error);
 }
 
 
-static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD,
-    sysctl_sysctl_oidfmt, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDFMT, oidfmt, CTLFLAG_RD |
+    CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidfmt, "");
 
 static int
 sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
 {
 	struct sysctl_oid *oid;
 	struct rm_priotracker tracker;
 	int error;
 
 	SYSCTL_RLOCK(&tracker);
 	error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
 	if (error)
 		goto out;
 
 	if (oid->oid_descr == NULL) {
 		error = ENOENT;
 		goto out;
 	}
 	error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1);
  out:
 	SYSCTL_RUNLOCK(&tracker);
 	return (error);
 }
 
-static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD,
-    sysctl_sysctl_oiddescr, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDDESCR, oiddescr, CTLFLAG_RD |
+    CTLFLAG_MPSAFE|CTLFLAG_CAPRD, sysctl_sysctl_oiddescr, "");
 
 static int
 sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS)
 {
 	struct sysctl_oid *oid;
 	struct rm_priotracker tracker;
 	int error;
 
 	SYSCTL_RLOCK(&tracker);
 	error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
 	if (error)
 		goto out;
 
 	if (oid->oid_label == NULL) {
 		error = ENOENT;
 		goto out;
 	}
 	error = SYSCTL_OUT(req, oid->oid_label, strlen(oid->oid_label) + 1);
  out:
 	SYSCTL_RUNLOCK(&tracker);
 	return (error);
 }
 
-static SYSCTL_NODE(_sysctl, 6, oidlabel,
-    CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDLABEL, oidlabel, CTLFLAG_RD |
+    CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, "");
 
 /*
  * Default "handler" functions.
  */
 
 /*
  * Handle a bool.
  * Two cases:
  *     a variable:  point arg1 at it.
  *     a constant:  pass it in arg2.
  */
 
 int
 sysctl_handle_bool(SYSCTL_HANDLER_ARGS)
 {
 	uint8_t temp;
 	int error;
 
 	/*
 	 * Attempt to get a coherent snapshot by making a copy of the data.
 	 */
 	if (arg1)
 		temp = *(bool *)arg1 ? 1 : 0;
 	else
 		temp = arg2 ? 1 : 0;
 
 	error = SYSCTL_OUT(req, &temp, sizeof(temp));
 	if (error || !req->newptr)
 		return (error);
 
 	if (!arg1)
 		error = EPERM;
 	else {
 		error = SYSCTL_IN(req, &temp, sizeof(temp));
 		if (!error)
 			*(bool *)arg1 = temp ? 1 : 0;
 	}
 	return (error);
 }
 
 /*
  * Handle an int8_t, signed or unsigned.
  * Two cases:
  *     a variable:  point arg1 at it.
  *     a constant:  pass it in arg2.
  */
 
 int
 sysctl_handle_8(SYSCTL_HANDLER_ARGS)
 {
 	int8_t tmpout;
 	int error = 0;
 
 	/*
 	 * Attempt to get a coherent snapshot by making a copy of the data.
 	 */
 	if (arg1)
 		tmpout = *(int8_t *)arg1;
 	else
 		tmpout = arg2;
 	error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
 
 	if (error || !req->newptr)
 		return (error);
 
 	if (!arg1)
 		error = EPERM;
 	else
 		error = SYSCTL_IN(req, arg1, sizeof(tmpout));
 	return (error);
 }
 
 /*
  * Handle an int16_t, signed or unsigned.
  * Two cases:
  *     a variable:  point arg1 at it.
  *     a constant:  pass it in arg2.
  */
 
 int
 sysctl_handle_16(SYSCTL_HANDLER_ARGS)
 {
 	int16_t tmpout;
 	int error = 0;
 
 	/*
 	 * Attempt to get a coherent snapshot by making a copy of the data.
 	 */
 	if (arg1)
 		tmpout = *(int16_t *)arg1;
 	else
 		tmpout = arg2;
 	error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
 
 	if (error || !req->newptr)
 		return (error);
 
 	if (!arg1)
 		error = EPERM;
 	else
 		error = SYSCTL_IN(req, arg1, sizeof(tmpout));
 	return (error);
 }
 
 /*
  * Handle an int32_t, signed or unsigned.
  * Two cases:
  *     a variable:  point arg1 at it.
  *     a constant:  pass it in arg2.
  */
 
 int
 sysctl_handle_32(SYSCTL_HANDLER_ARGS)
 {
 	int32_t tmpout;
 	int error = 0;
 
 	/*
 	 * Attempt to get a coherent snapshot by making a copy of the data.
 	 */
 	if (arg1)
 		tmpout = *(int32_t *)arg1;
 	else
 		tmpout = arg2;
 	error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
 
 	if (error || !req->newptr)
 		return (error);
 
 	if (!arg1)
 		error = EPERM;
 	else
 		error = SYSCTL_IN(req, arg1, sizeof(tmpout));
 	return (error);
 }
 
 /*
  * Handle an int, signed or unsigned.
  * Two cases:
  *     a variable:  point arg1 at it.
  *     a constant:  pass it in arg2.
  */
 
 int
 sysctl_handle_int(SYSCTL_HANDLER_ARGS)
 {
 	int tmpout, error = 0;
 
 	/*
 	 * Attempt to get a coherent snapshot by making a copy of the data.
 	 */
 	if (arg1)
 		tmpout = *(int *)arg1;
 	else
 		tmpout = arg2;
 	error = SYSCTL_OUT(req, &tmpout, sizeof(int));
 
 	if (error || !req->newptr)
 		return (error);
 
 	if (!arg1)
 		error = EPERM;
 	else
 		error = SYSCTL_IN(req, arg1, sizeof(int));
 	return (error);
 }
 
 /*
  * Based on on sysctl_handle_int() convert milliseconds into ticks.
  * Note: this is used by TCP.
  */
 
 int
 sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
 {
 	int error, s, tt;
 
 	tt = *(int *)arg1;
 	s = (int)((int64_t)tt * 1000 / hz);
 
 	error = sysctl_handle_int(oidp, &s, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	tt = (int)((int64_t)s * hz / 1000);
 	if (tt < 1)
 		return (EINVAL);
 
 	*(int *)arg1 = tt;
 	return (0);
 }
 
 
 /*
  * Handle a long, signed or unsigned.
  * Two cases:
  *     a variable:  point arg1 at it.
  *     a constant:  pass it in arg2.
  */
 
 int
 sysctl_handle_long(SYSCTL_HANDLER_ARGS)
 {
 	int error = 0;
 	long tmplong;
 #ifdef SCTL_MASK32
 	int tmpint;
 #endif
 
 	/*
 	 * Attempt to get a coherent snapshot by making a copy of the data.
 	 */
 	if (arg1)
 		tmplong = *(long *)arg1;
 	else
 		tmplong = arg2;
 #ifdef SCTL_MASK32
 	if (req->flags & SCTL_MASK32) {
 		tmpint = tmplong;
 		error = SYSCTL_OUT(req, &tmpint, sizeof(int));
 	} else
 #endif
 		error = SYSCTL_OUT(req, &tmplong, sizeof(long));
 
 	if (error || !req->newptr)
 		return (error);
 
 	if (!arg1)
 		error = EPERM;
 #ifdef SCTL_MASK32
 	else if (req->flags & SCTL_MASK32) {
 		error = SYSCTL_IN(req, &tmpint, sizeof(int));
 		*(long *)arg1 = (long)tmpint;
 	}
 #endif
 	else
 		error = SYSCTL_IN(req, arg1, sizeof(long));
 	return (error);
 }
 
 /*
  * Handle a 64 bit int, signed or unsigned.
  * Two cases:
  *     a variable:  point arg1 at it.
  *     a constant:  pass it in arg2.
  */
 int
 sysctl_handle_64(SYSCTL_HANDLER_ARGS)
 {
 	int error = 0;
 	uint64_t tmpout;
 
 	/*
 	 * Attempt to get a coherent snapshot by making a copy of the data.
 	 */
 	if (arg1)
 		tmpout = *(uint64_t *)arg1;
 	else
 		tmpout = arg2;
 	error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t));
 
 	if (error || !req->newptr)
 		return (error);
 
 	if (!arg1)
 		error = EPERM;
 	else
 		error = SYSCTL_IN(req, arg1, sizeof(uint64_t));
 	return (error);
 }
 
 /*
  * Handle our generic '\0' terminated 'C' string.
  * Two cases:
  * 	a variable string:  point arg1 at it, arg2 is max length.
  * 	a constant string:  point arg1 at it, arg2 is zero.
  */
 
 int
 sysctl_handle_string(SYSCTL_HANDLER_ARGS)
 {
 	size_t outlen;
 	int error = 0, ro_string = 0;
 
 	/*
 	 * A zero-length buffer indicates a fixed size read-only
 	 * string.  In ddb, don't worry about trying to make a malloced
 	 * snapshot.
 	 */
 	if (arg2 == 0 || kdb_active) {
 		arg2 = strlen((char *)arg1) + 1;
 		ro_string = 1;
 	}
 
 	if (req->oldptr != NULL) {
 		char *tmparg;
 
 		if (ro_string) {
 			tmparg = arg1;
 		} else {
 			/* try to make a coherent snapshot of the string */
 			tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK);
 			memcpy(tmparg, arg1, arg2);
 		}
 
 		outlen = strnlen(tmparg, arg2 - 1) + 1;
 		error = SYSCTL_OUT(req, tmparg, outlen);
 
 		if (!ro_string)
 			free(tmparg, M_SYSCTLTMP);
 	} else {
 		outlen = strnlen((char *)arg1, arg2 - 1) + 1;
 		error = SYSCTL_OUT(req, NULL, outlen);
 	}
 	if (error || !req->newptr)
 		return (error);
 
 	if ((req->newlen - req->newidx) >= arg2) {
 		error = EINVAL;
 	} else {
 		arg2 = (req->newlen - req->newidx);
 		error = SYSCTL_IN(req, arg1, arg2);
 		((char *)arg1)[arg2] = '\0';
 	}
 	return (error);
 }
 
 /*
  * Handle any kind of opaque data.
  * arg1 points to it, arg2 is the size.
  */
 
 int
 sysctl_handle_opaque(SYSCTL_HANDLER_ARGS)
 {
 	int error, tries;
 	u_int generation;
 	struct sysctl_req req2;
 
 	/*
 	 * Attempt to get a coherent snapshot, by using the thread
 	 * pre-emption counter updated from within mi_switch() to
 	 * determine if we were pre-empted during a bcopy() or
 	 * copyout(). Make 3 attempts at doing this before giving up.
 	 * If we encounter an error, stop immediately.
 	 */
 	tries = 0;
 	req2 = *req;
 retry:
 	generation = curthread->td_generation;
 	error = SYSCTL_OUT(req, arg1, arg2);
 	if (error)
 		return (error);
 	tries++;
 	if (generation != curthread->td_generation && tries < 3) {
 		*req = req2;
 		goto retry;
 	}
 
 	error = SYSCTL_IN(req, arg1, arg2);
 
 	return (error);
 }
 
 /*
  * Based on on sysctl_handle_int() convert microseconds to a sbintime.
  */
 int
 sysctl_usec_to_sbintime(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int64_t tt;
 	sbintime_t sb;
 
 	tt = *(int64_t *)arg1;
 	sb = sbttous(tt);
 
 	error = sysctl_handle_64(oidp, &sb, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	tt = ustosbt(sb);
 	*(int64_t *)arg1 = tt;
 
 	return (0);
 }
 
 /*
  * Based on on sysctl_handle_int() convert milliseconds to a sbintime.
  */
 int
 sysctl_msec_to_sbintime(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int64_t tt;
 	sbintime_t sb;
 
 	tt = *(int64_t *)arg1;
 	sb = sbttoms(tt);
 
 	error = sysctl_handle_64(oidp, &sb, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	tt = mstosbt(sb);
 	*(int64_t *)arg1 = tt;
 
 	return (0);
 }
 
 /*
  * Convert seconds to a struct timeval.  Intended for use with
  * intervals and thus does not permit negative seconds.
  */
 int
 sysctl_sec_to_timeval(SYSCTL_HANDLER_ARGS)
 {
 	struct timeval *tv;
 	int error, secs;
 
 	tv = arg1;
 	secs = tv->tv_sec;
 
 	error = sysctl_handle_int(oidp, &secs, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 
 	if (secs < 0)
 		return (EINVAL);
 	tv->tv_sec = secs;
 
 	return (0);
 }
 
 /*
  * Transfer functions to/from kernel space.
  * XXX: rather untested at this point
  */
 static int
 sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l)
 {
 	size_t i = 0;
 
 	if (req->oldptr) {
 		i = l;
 		if (req->oldlen <= req->oldidx)
 			i = 0;
 		else
 			if (i > req->oldlen - req->oldidx)
 				i = req->oldlen - req->oldidx;
 		if (i > 0)
 			bcopy(p, (char *)req->oldptr + req->oldidx, i);
 	}
 	req->oldidx += l;
 	if (req->oldptr && i != l)
 		return (ENOMEM);
 	return (0);
 }
 
 static int
 sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l)
 {
 	if (!req->newptr)
 		return (0);
 	if (req->newlen - req->newidx < l)
 		return (EINVAL);
 	bcopy((const char *)req->newptr + req->newidx, p, l);
 	req->newidx += l;
 	return (0);
 }
 
 int
 kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old,
     size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags)
 {
 	int error = 0;
 	struct sysctl_req req;
 
 	bzero(&req, sizeof req);
 
 	req.td = td;
 	req.flags = flags;
 
 	if (oldlenp) {
 		req.oldlen = *oldlenp;
 	}
 	req.validlen = req.oldlen;
 
 	if (old) {
 		req.oldptr= old;
 	}
 
 	if (new != NULL) {
 		req.newlen = newlen;
 		req.newptr = new;
 	}
 
 	req.oldfunc = sysctl_old_kernel;
 	req.newfunc = sysctl_new_kernel;
 	req.lock = REQ_UNWIRED;
 
 	error = sysctl_root(0, name, namelen, &req);
 
 	if (req.lock == REQ_WIRED && req.validlen > 0)
 		vsunlock(req.oldptr, req.validlen);
 
 	if (error && error != ENOMEM)
 		return (error);
 
 	if (retval) {
 		if (req.oldptr && req.oldidx > req.validlen)
 			*retval = req.validlen;
 		else
 			*retval = req.oldidx;
 	}
 	return (error);
 }
 
 int
 kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp,
     void *new, size_t newlen, size_t *retval, int flags)
 {
         int oid[CTL_MAXNAME];
         size_t oidlen, plen;
 	int error;
 
-	oid[0] = 0;		/* sysctl internal magic */
-	oid[1] = 3;		/* name2oid */
+	oid[0] = CTL_SYSCTL;
+	oid[1] = CTL_SYSCTL_NAME2OID;
 	oidlen = sizeof(oid);
 
 	error = kernel_sysctl(td, oid, 2, oid, &oidlen,
 	    (void *)name, strlen(name), &plen, flags);
 	if (error)
 		return (error);
 
 	error = kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp,
 	    new, newlen, retval, flags);
 	return (error);
 }
 
 /*
  * Transfer function to/from user space.
  */
 static int
 sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
 {
 	size_t i, len, origidx;
 	int error;
 
 	origidx = req->oldidx;
 	req->oldidx += l;
 	if (req->oldptr == NULL)
 		return (0);
 	/*
 	 * If we have not wired the user supplied buffer and we are currently
 	 * holding locks, drop a witness warning, as it's possible that
 	 * write operations to the user page can sleep.
 	 */
 	if (req->lock != REQ_WIRED)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "sysctl_old_user()");
 	i = l;
 	len = req->validlen;
 	if (len <= origidx)
 		i = 0;
 	else {
 		if (i > len - origidx)
 			i = len - origidx;
 		if (req->lock == REQ_WIRED) {
 			error = copyout_nofault(p, (char *)req->oldptr +
 			    origidx, i);
 		} else
 			error = copyout(p, (char *)req->oldptr + origidx, i);
 		if (error != 0)
 			return (error);
 	}
 	if (i < l)
 		return (ENOMEM);
 	return (0);
 }
 
 static int
 sysctl_new_user(struct sysctl_req *req, void *p, size_t l)
 {
 	int error;
 
 	if (!req->newptr)
 		return (0);
 	if (req->newlen - req->newidx < l)
 		return (EINVAL);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 	    "sysctl_new_user()");
 	error = copyin((const char *)req->newptr + req->newidx, p, l);
 	req->newidx += l;
 	return (error);
 }
 
 /*
  * Wire the user space destination buffer.  If set to a value greater than
  * zero, the len parameter limits the maximum amount of wired memory.
  */
 int
 sysctl_wire_old_buffer(struct sysctl_req *req, size_t len)
 {
 	int ret;
 	size_t wiredlen;
 
 	wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen;
 	ret = 0;
 	if (req->lock != REQ_WIRED && req->oldptr &&
 	    req->oldfunc == sysctl_old_user) {
 		if (wiredlen != 0) {
 			ret = vslock(req->oldptr, wiredlen);
 			if (ret != 0) {
 				if (ret != ENOMEM)
 					return (ret);
 				wiredlen = 0;
 			}
 		}
 		req->lock = REQ_WIRED;
 		req->validlen = wiredlen;
 	}
 	return (0);
 }
 
 int
 sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid,
     int *nindx, struct sysctl_req *req)
 {
 	struct sysctl_oid_list *lsp;
 	struct sysctl_oid *oid;
 	int indx;
 
 	SYSCTL_ASSERT_LOCKED();
 	lsp = &sysctl__children;
 	indx = 0;
 	while (indx < CTL_MAXNAME) {
 		SLIST_FOREACH(oid, lsp, oid_link) {
 			if (oid->oid_number == name[indx])
 				break;
 		}
 		if (oid == NULL)
 			return (ENOENT);
 
 		indx++;
 		if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 			if (oid->oid_handler != NULL || indx == namelen) {
 				*noid = oid;
 				if (nindx != NULL)
 					*nindx = indx;
 				KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0,
 				    ("%s found DYING node %p", __func__, oid));
 				return (0);
 			}
 			lsp = SYSCTL_CHILDREN(oid);
 		} else if (indx == namelen) {
 			if ((oid->oid_kind & CTLFLAG_DORMANT) != 0)
 				return (ENOENT);
 			*noid = oid;
 			if (nindx != NULL)
 				*nindx = indx;
 			KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0,
 			    ("%s found DYING node %p", __func__, oid));
 			return (0);
 		} else {
 			return (ENOTDIR);
 		}
 	}
 	return (ENOENT);
 }
 
 /*
  * Traverse our tree, and find the right node, execute whatever it points
  * to, and return the resulting error code.
  */
 
 static int
 sysctl_root(SYSCTL_HANDLER_ARGS)
 {
 	struct sysctl_oid *oid;
 	struct rm_priotracker tracker;
 	int error, indx, lvl;
 
 	SYSCTL_RLOCK(&tracker);
 
 	error = sysctl_find_oid(arg1, arg2, &oid, &indx, req);
 	if (error)
 		goto out;
 
 	if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 		/*
 		 * You can't call a sysctl when it's a node, but has
 		 * no handler.  Inform the user that it's a node.
 		 * The indx may or may not be the same as namelen.
 		 */
 		if (oid->oid_handler == NULL) {
 			error = EISDIR;
 			goto out;
 		}
 	}
 
 	/* Is this sysctl writable? */
 	if (req->newptr && !(oid->oid_kind & CTLFLAG_WR)) {
 		error = EPERM;
 		goto out;
 	}
 
 	KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL"));
 
 #ifdef CAPABILITY_MODE
 	/*
 	 * If the process is in capability mode, then don't permit reading or
 	 * writing unless specifically granted for the node.
 	 */
 	if (IN_CAPABILITY_MODE(req->td)) {
 		if ((req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD)) ||
 		    (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR))) {
 			error = EPERM;
 			goto out;
 		}
 	}
 #endif
 
 	/* Is this sysctl sensitive to securelevels? */
 	if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) {
 		lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE;
 		error = securelevel_gt(req->td->td_ucred, lvl);
 		if (error)
 			goto out;
 	}
 
 	/* Is this sysctl writable by only privileged users? */
 	if (req->newptr && !(oid->oid_kind & CTLFLAG_ANYBODY)) {
 		int priv;
 
 		if (oid->oid_kind & CTLFLAG_PRISON)
 			priv = PRIV_SYSCTL_WRITEJAIL;
 #ifdef VIMAGE
 		else if ((oid->oid_kind & CTLFLAG_VNET) &&
 		     prison_owns_vnet(req->td->td_ucred))
 			priv = PRIV_SYSCTL_WRITEJAIL;
 #endif
 		else
 			priv = PRIV_SYSCTL_WRITE;
 		error = priv_check(req->td, priv);
 		if (error)
 			goto out;
 	}
 
 	if (!oid->oid_handler) {
 		error = EINVAL;
 		goto out;
 	}
 
 	if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 		arg1 = (int *)arg1 + indx;
 		arg2 -= indx;
 	} else {
 		arg1 = oid->oid_arg1;
 		arg2 = oid->oid_arg2;
 	}
 #ifdef MAC
 	error = mac_system_check_sysctl(req->td->td_ucred, oid, arg1, arg2,
 	    req);
 	if (error != 0)
 		goto out;
 #endif
 #ifdef VIMAGE
 	if ((oid->oid_kind & CTLFLAG_VNET) && arg1 != NULL)
 		arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
 #endif
 	error = sysctl_root_handler_locked(oid, arg1, arg2, req, &tracker);
 
 out:
 	SYSCTL_RUNLOCK(&tracker);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct sysctl_args {
 	int	*name;
 	u_int	namelen;
 	void	*old;
 	size_t	*oldlenp;
 	void	*new;
 	size_t	newlen;
 };
 #endif
 int
 sys___sysctl(struct thread *td, struct sysctl_args *uap)
 {
 	int error, i, name[CTL_MAXNAME];
 	size_t j;
 
 	if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
 		return (EINVAL);
 
  	error = copyin(uap->name, &name, uap->namelen * sizeof(int));
  	if (error)
 		return (error);
 
 	error = userland_sysctl(td, name, uap->namelen,
 		uap->old, uap->oldlenp, 0,
 		uap->new, uap->newlen, &j, 0);
 	if (error && error != ENOMEM)
 		return (error);
 	if (uap->oldlenp) {
 		i = copyout(&j, uap->oldlenp, sizeof(j));
 		if (i)
 			return (i);
 	}
 	return (error);
 }
 
 int
 kern___sysctlbyname(struct thread *td, const char *oname, size_t namelen,
     void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval,
     int flags, bool inkernel)
 {
 	int oid[CTL_MAXNAME];
 	char namebuf[16];
 	char *name;
 	size_t oidlen;
 	int error;
 
 	if (namelen > MAXPATHLEN || namelen == 0)
 		return (EINVAL);
 	name = namebuf;
 	if (namelen > sizeof(namebuf))
 		name = malloc(namelen, M_SYSCTL, M_WAITOK);
 	error = copyin(oname, name, namelen);
 	if (error != 0)
 		goto out;
 
-	oid[0] = 0;
-	oid[1] = 3;
+	oid[0] = CTL_SYSCTL;
+	oid[1] = CTL_SYSCTL_NAME2OID;
 	oidlen = sizeof(oid);
 	error = kernel_sysctl(td, oid, 2, oid, &oidlen, (void *)name, namelen,
 	    retval, flags);
 	if (error != 0)
 		goto out;
 	error = userland_sysctl(td, oid, *retval / sizeof(int), old, oldlenp,
 	    inkernel, new, newlen, retval, flags);
 
 out:
 	if (namelen > sizeof(namebuf))
 		free(name, M_SYSCTL);
 	return (error);
 }
 
 #ifndef	_SYS_SYSPROTO_H_
 struct __sysctlbyname_args {
 	const char	*name;
 	size_t	namelen;
 	void	*old;
 	size_t	*oldlenp;
 	void	*new;
 	size_t	newlen;
 };
 #endif
 int
 sys___sysctlbyname(struct thread *td, struct __sysctlbyname_args *uap)
 {
 	size_t rv;
 	int error;
 
 	error = kern___sysctlbyname(td, uap->name, uap->namelen, uap->old,
 	    uap->oldlenp, uap->new, uap->newlen, &rv, 0, 0);
 	if (error != 0)
 		return (error);
 	if (uap->oldlenp != NULL)
 		error = copyout(&rv, uap->oldlenp, sizeof(rv));
 
 	return (error);
 }
 
 /*
  * This is used from various compatibility syscalls too.  That's why name
  * must be in kernel space.
  */
 int
 userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
     size_t *oldlenp, int inkernel, const void *new, size_t newlen,
     size_t *retval, int flags)
 {
 	int error = 0, memlocked;
 	struct sysctl_req req;
 
 	bzero(&req, sizeof req);
 
 	req.td = td;
 	req.flags = flags;
 
 	if (oldlenp) {
 		if (inkernel) {
 			req.oldlen = *oldlenp;
 		} else {
 			error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp));
 			if (error)
 				return (error);
 		}
 	}
 	req.validlen = req.oldlen;
 	req.oldptr = old;
 
 	if (new != NULL) {
 		req.newlen = newlen;
 		req.newptr = new;
 	}
 
 	req.oldfunc = sysctl_old_user;
 	req.newfunc = sysctl_new_user;
 	req.lock = REQ_UNWIRED;
 
 #ifdef KTRACE
 	if (KTRPOINT(curthread, KTR_SYSCTL))
 		ktrsysctl(name, namelen);
 #endif
 	memlocked = 0;
 	if (req.oldptr && req.oldlen > 4 * PAGE_SIZE) {
 		memlocked = 1;
 		sx_xlock(&sysctlmemlock);
 	}
 	CURVNET_SET(TD_TO_VNET(td));
 
 	for (;;) {
 		req.oldidx = 0;
 		req.newidx = 0;
 		error = sysctl_root(0, name, namelen, &req);
 		if (error != EAGAIN)
 			break;
 		kern_yield(PRI_USER);
 	}
 
 	CURVNET_RESTORE();
 
 	if (req.lock == REQ_WIRED && req.validlen > 0)
 		vsunlock(req.oldptr, req.validlen);
 	if (memlocked)
 		sx_xunlock(&sysctlmemlock);
 
 	if (error && error != ENOMEM)
 		return (error);
 
 	if (retval) {
 		if (req.oldptr && req.oldidx > req.validlen)
 			*retval = req.validlen;
 		else
 			*retval = req.oldidx;
 	}
 	return (error);
 }
 
 /*
  * Drain into a sysctl struct.  The user buffer should be wired if a page
  * fault would cause issue.
  */
 static int
 sbuf_sysctl_drain(void *arg, const char *data, int len)
 {
 	struct sysctl_req *req = arg;
 	int error;
 
 	error = SYSCTL_OUT(req, data, len);
 	KASSERT(error >= 0, ("Got unexpected negative value %d", error));
 	return (error == 0 ? len : -error);
 }
 
 struct sbuf *
 sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length,
     struct sysctl_req *req)
 {
 
 	/* Supply a default buffer size if none given. */
 	if (buf == NULL && length == 0)
 		length = 64;
 	s = sbuf_new(s, buf, length, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
 	sbuf_set_drain(s, sbuf_sysctl_drain, req);
 	return (s);
 }
 
 #ifdef DDB
 
 /* The current OID the debugger is working with */
 static struct sysctl_oid *g_ddb_oid;
 
 /* The current flags specified by the user */
 static int g_ddb_sysctl_flags;
 
 /* Check to see if the last sysctl printed */
 static int g_ddb_sysctl_printed;
 
 static const int ctl_sign[CTLTYPE+1] = {
 	[CTLTYPE_INT] = 1,
 	[CTLTYPE_LONG] = 1,
 	[CTLTYPE_S8] = 1,
 	[CTLTYPE_S16] = 1,
 	[CTLTYPE_S32] = 1,
 	[CTLTYPE_S64] = 1,
 };
 
 static const int ctl_size[CTLTYPE+1] = {
 	[CTLTYPE_INT] = sizeof(int),
 	[CTLTYPE_UINT] = sizeof(u_int),
 	[CTLTYPE_LONG] = sizeof(long),
 	[CTLTYPE_ULONG] = sizeof(u_long),
 	[CTLTYPE_S8] = sizeof(int8_t),
 	[CTLTYPE_S16] = sizeof(int16_t),
 	[CTLTYPE_S32] = sizeof(int32_t),
 	[CTLTYPE_S64] = sizeof(int64_t),
 	[CTLTYPE_U8] = sizeof(uint8_t),
 	[CTLTYPE_U16] = sizeof(uint16_t),
 	[CTLTYPE_U32] = sizeof(uint32_t),
 	[CTLTYPE_U64] = sizeof(uint64_t),
 };
 
 #define DB_SYSCTL_NAME_ONLY	0x001	/* Compare with -N */
 #define DB_SYSCTL_VALUE_ONLY	0x002	/* Compare with -n */
 #define DB_SYSCTL_OPAQUE	0x004	/* Compare with -o */
 #define DB_SYSCTL_HEX		0x008	/* Compare with -x */
 
 #define DB_SYSCTL_SAFE_ONLY	0x100	/* Only simple types */
 
 static const char db_sysctl_modifs[] = {
 	'N', 'n', 'o', 'x',
 };
 
 static const int db_sysctl_modif_values[] = {
 	DB_SYSCTL_NAME_ONLY, DB_SYSCTL_VALUE_ONLY,
 	DB_SYSCTL_OPAQUE, DB_SYSCTL_HEX,
 };
 
 /* Handlers considered safe to print while recursing */
 static int (* const db_safe_handlers[])(SYSCTL_HANDLER_ARGS) = {
 	sysctl_handle_bool,
 	sysctl_handle_8,
 	sysctl_handle_16,
 	sysctl_handle_32,
 	sysctl_handle_64,
 	sysctl_handle_int,
 	sysctl_handle_long,
 	sysctl_handle_string,
 	sysctl_handle_opaque,
 };
 
 /*
  * Use in place of sysctl_old_kernel to print sysctl values.
  *
  * Compare to the output handling in show_var from sbin/sysctl/sysctl.c
  */
 static int
 sysctl_old_ddb(struct sysctl_req *req, const void *ptr, size_t len)
 {
 	const u_char *val, *p;
 	const char *sep1;
 	size_t intlen, slen;
 	uintmax_t umv;
 	intmax_t mv;
 	int sign, ctltype, hexlen, xflag, error;
 
 	/* Suppress false-positive GCC uninitialized variable warnings */
 	mv = 0;
 	umv = 0;
 
 	slen = len;
 	val = p = ptr;
 
 	if (ptr == NULL) {
 		error = 0;
 		goto out;
 	}
 
 	/* We are going to print */
 	g_ddb_sysctl_printed = 1;
 
 	xflag = g_ddb_sysctl_flags & DB_SYSCTL_HEX;
 
 	ctltype = (g_ddb_oid->oid_kind & CTLTYPE);
 	sign = ctl_sign[ctltype];
 	intlen = ctl_size[ctltype];
 
 	switch (ctltype) {
 	case CTLTYPE_NODE:
 	case CTLTYPE_STRING:
 		db_printf("%.*s", (int) len, (const char *) p);
 		error = 0;
 		goto out;
 
 	case CTLTYPE_INT:
 	case CTLTYPE_UINT:
 	case CTLTYPE_LONG:
 	case CTLTYPE_ULONG:
 	case CTLTYPE_S8:
 	case CTLTYPE_S16:
 	case CTLTYPE_S32:
 	case CTLTYPE_S64:
 	case CTLTYPE_U8:
 	case CTLTYPE_U16:
 	case CTLTYPE_U32:
 	case CTLTYPE_U64:
 		hexlen = 2 + (intlen * CHAR_BIT + 3) / 4;
 		sep1 = "";
 		while (len >= intlen) {
 			switch (ctltype) {
 			case CTLTYPE_INT:
 			case CTLTYPE_UINT:
 				umv = *(const u_int *)p;
 				mv = *(const int *)p;
 				break;
 			case CTLTYPE_LONG:
 			case CTLTYPE_ULONG:
 				umv = *(const u_long *)p;
 				mv = *(const long *)p;
 				break;
 			case CTLTYPE_S8:
 			case CTLTYPE_U8:
 				umv = *(const uint8_t *)p;
 				mv = *(const int8_t *)p;
 				break;
 			case CTLTYPE_S16:
 			case CTLTYPE_U16:
 				umv = *(const uint16_t *)p;
 				mv = *(const int16_t *)p;
 				break;
 			case CTLTYPE_S32:
 			case CTLTYPE_U32:
 				umv = *(const uint32_t *)p;
 				mv = *(const int32_t *)p;
 				break;
 			case CTLTYPE_S64:
 			case CTLTYPE_U64:
 				umv = *(const uint64_t *)p;
 				mv = *(const int64_t *)p;
 				break;
 			}
 
 			db_printf("%s", sep1);
 			if (xflag)
 				db_printf("%#0*jx", hexlen, umv);
 			else if (!sign)
 				db_printf("%ju", umv);
 			else if (g_ddb_oid->oid_fmt[1] == 'K') {
 				/* Kelvins are currently unsupported. */
 				error = EOPNOTSUPP;
 				goto out;
 			} else
 				db_printf("%jd", mv);
 
 			sep1 = " ";
 			len -= intlen;
 			p += intlen;
 		}
 		error = 0;
 		goto out;
 
 	case CTLTYPE_OPAQUE:
 		/* TODO: Support struct functions. */
 
 		/* FALLTHROUGH */
 	default:
 		db_printf("Format:%s Length:%zu Dump:0x",
 		    g_ddb_oid->oid_fmt, len);
 		while (len-- && (xflag || p < val + 16))
 			db_printf("%02x", *p++);
 		if (!xflag && len > 16)
 			db_printf("...");
 		error = 0;
 		goto out;
 	}
 
 out:
 	req->oldidx += slen;
 	return (error);
 }
 
 /*
  * Avoid setting new sysctl values from the debugger
  */
 static int
 sysctl_new_ddb(struct sysctl_req *req, void *p, size_t l)
 {
 
 	if (!req->newptr)
 		return (0);
 
 	/* Changing sysctls from the debugger is currently unsupported */
 	return (EPERM);
 }
 
 /*
  * Run a sysctl handler with the DDB oldfunc and newfunc attached.
  * Instead of copying any output to a buffer we'll dump it right to
  * the console.
  */
 static int
 db_sysctl(struct sysctl_oid *oidp, int *name, u_int namelen,
     void *old, size_t *oldlenp, size_t *retval, int flags)
 {
 	struct sysctl_req req;
 	int error;
 
 	/* Setup the request */
 	bzero(&req, sizeof req);
 	req.td = kdb_thread;
 	req.oldfunc = sysctl_old_ddb;
 	req.newfunc = sysctl_new_ddb;
 	req.lock = REQ_UNWIRED;
 	if (oldlenp) {
 		req.oldlen = *oldlenp;
 	}
 	req.validlen = req.oldlen;
 	if (old) {
 		req.oldptr = old;
 	}
 
 	/* Setup our globals for sysctl_old_ddb */
 	g_ddb_oid = oidp;
 	g_ddb_sysctl_flags = flags;
 	g_ddb_sysctl_printed = 0;
 
 	error = sysctl_root(0, name, namelen, &req);
 
 	/* Reset globals */
 	g_ddb_oid = NULL;
 	g_ddb_sysctl_flags = 0;
 
 	if (retval) {
 		if (req.oldptr && req.oldidx > req.validlen)
 			*retval = req.validlen;
 		else
 			*retval = req.oldidx;
 	}
 	return (error);
 }
 
 /*
  * Show a sysctl's name
  */
 static void
 db_show_oid_name(int *oid, size_t nlen)
 {
 	struct sysctl_oid *oidp;
 	int qoid[CTL_MAXNAME+2];
 	int error;
 
 	qoid[0] = 0;
 	memcpy(qoid + 2, oid, nlen * sizeof(int));
 	qoid[1] = 1;
 
 	error = sysctl_find_oid(qoid, nlen + 2, &oidp, NULL, NULL);
 	if (error)
 		db_error("sysctl name oid");
 
 	error = db_sysctl(oidp, qoid, nlen + 2, NULL, NULL, NULL, 0);
 	if (error)
 		db_error("sysctl name");
 }
 
 /*
  * Check to see if an OID is safe to print from ddb.
  */
 static bool
 db_oid_safe(const struct sysctl_oid *oidp)
 {
 	for (unsigned int i = 0; i < nitems(db_safe_handlers); ++i) {
 		if (oidp->oid_handler == db_safe_handlers[i])
 			return (true);
 	}
 
 	return (false);
 }
 
 /*
  * Show a sysctl at a specific OID
  * Compare to the input handling in show_var from sbin/sysctl/sysctl.c
  */
 static int
 db_show_oid(struct sysctl_oid *oidp, int *oid, size_t nlen, int flags)
 {
 	int error, xflag, oflag, Nflag, nflag;
 	size_t len;
 
 	xflag = flags & DB_SYSCTL_HEX;
 	oflag = flags & DB_SYSCTL_OPAQUE;
 	nflag = flags & DB_SYSCTL_VALUE_ONLY;
 	Nflag = flags & DB_SYSCTL_NAME_ONLY;
 
 	if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_OPAQUE &&
 	    (!xflag && !oflag))
 		return (0);
 
 	if (Nflag) {
 		db_show_oid_name(oid, nlen);
 		error = 0;
 		goto out;
 	}
 
 	if (!nflag) {
 		db_show_oid_name(oid, nlen);
 		db_printf(": ");
 	}
 
 	if ((flags & DB_SYSCTL_SAFE_ONLY) && !db_oid_safe(oidp)) {
 		db_printf("Skipping, unsafe to print while recursing.");
 		error = 0;
 		goto out;
 	}
 
 	/* Try once, and ask about the size */
 	len = 0;
 	error = db_sysctl(oidp, oid, nlen,
 	    NULL, NULL, &len, flags);
 	if (error)
 		goto out;
 
 	if (!g_ddb_sysctl_printed)
 		/* Lie about the size */
 		error = db_sysctl(oidp, oid, nlen,
 		    (void *) 1, &len, NULL, flags);
 
 out:
 	db_printf("\n");
 	return (error);
 }
 
 /*
  * Show all sysctls under a specific OID
  * Compare to sysctl_all from sbin/sysctl/sysctl.c
  */
 static int
 db_show_sysctl_all(int *oid, size_t len, int flags)
 {
 	struct sysctl_oid *oidp;
 	int name1[CTL_MAXNAME + 2], name2[CTL_MAXNAME + 2];
 	size_t l1, l2;
 
-	name1[0] = 0;
-	name1[1] = 2;
+	name1[0] = CTL_SYSCTL;
+	name1[1] = CTL_SYSCTL_NEXT;
 	l1 = 2;
 	if (len) {
 		memcpy(name1+2, oid, len * sizeof(int));
 		l1 +=len;
 	} else {
 		name1[2] = 1;
 		l1++;
 	}
 	for (;;) {
 		int i, error;
 
 		l2 = sizeof(name2);
 		error = kernel_sysctl(kdb_thread, name1, l1,
 		    name2, &l2, NULL, 0, &l2, 0);
 		if (error != 0) {
 			if (error == ENOENT)
 				return (0);
 			else
 				db_error("sysctl(getnext)");
 		}
 
 		l2 /= sizeof(int);
 
 		if (l2 < (unsigned int)len)
 			return (0);
 
 		for (i = 0; i < len; i++)
 			if (name2[i] != oid[i])
 				return (0);
 
 		/* Find the OID in question */
 		error = sysctl_find_oid(name2, l2, &oidp, NULL, NULL);
 		if (error)
 			return (error);
 
 		i = db_show_oid(oidp, name2, l2, flags | DB_SYSCTL_SAFE_ONLY);
 
 		if (db_pager_quit)
 			return (0);
 
 		memcpy(name1+2, name2, l2 * sizeof(int));
 		l1 = 2 + l2;
 	}
 }
 
 /*
  * Show a sysctl by its user facing string
  */
 static int
 db_sysctlbyname(char *name, int flags)
 {
 	struct sysctl_oid *oidp;
 	int oid[CTL_MAXNAME];
 	int error, nlen;
 
 	error = name2oid(name, oid, &nlen, &oidp);
 	if (error) {
 		return (error);
 	}
 
 	if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 		db_show_sysctl_all(oid, nlen, flags);
 	} else {
 		error = db_show_oid(oidp, oid, nlen, flags);
 	}
 
 	return (error);
 }
 
 static void
 db_sysctl_cmd_usage(void)
 {
 	db_printf(
 	    " sysctl [/Nnox] <sysctl>					    \n"
 	    "								    \n"
 	    " <sysctl> The name of the sysctl to show.			    \n"
 	    "								    \n"
 	    " Show a sysctl by hooking into SYSCTL_IN and SYSCTL_OUT.	    \n"
 	    " This will work for most sysctls, but should not be used	    \n"
 	    " with sysctls that are known to malloc.			    \n"
 	    "								    \n"
 	    " While recursing any \"unsafe\" sysctls will be skipped.	    \n"
 	    " Call sysctl directly on the sysctl to try printing the	    \n"
 	    " skipped sysctl. This is unsafe and may make the ddb	    \n"
 	    " session unusable.						    \n"
 	    "								    \n"
 	    " Arguments:						    \n"
 	    "	/N	Display only the name of the sysctl.		    \n"
 	    "	/n	Display only the value of the sysctl.		    \n"
 	    "	/o	Display opaque values.				    \n"
 	    "	/x	Display the sysctl in hex.			    \n"
 	    "								    \n"
 	    "For example:						    \n"
 	    "sysctl vm.v_free_min					    \n"
 	    "vn.v_free_min: 12669					    \n"
 	    );
 }
 
 /*
  * Show a specific sysctl similar to sysctl (8).
  */
 DB_FUNC(sysctl, db_sysctl_cmd, db_cmd_table, CS_OWN, NULL)
 {
 	char name[TOK_STRING_SIZE];
 	int error, i, t, flags;
 
 	/* Parse the modifiers */
 	t = db_read_token();
 	if (t == tSLASH || t == tMINUS) {
 		t = db_read_token();
 		if (t != tIDENT) {
 			db_printf("Bad modifier\n");
 			error = EINVAL;
 			goto out;
 		}
 		db_strcpy(modif, db_tok_string);
 	}
 	else {
 		db_unread_token(t);
 		modif[0] = '\0';
 	}
 
 	flags = 0;
 	for (i = 0; i < nitems(db_sysctl_modifs); i++) {
 		if (strchr(modif, db_sysctl_modifs[i])) {
 			flags |= db_sysctl_modif_values[i];
 		}
 	}
 
 	/* Parse the sysctl names */
 	t = db_read_token();
 	if (t != tIDENT) {
 		db_printf("Need sysctl name\n");
 		error = EINVAL;
 		goto out;
 	}
 
 	/* Copy the name into a temporary buffer */
 	db_strcpy(name, db_tok_string);
 
 	/* Ensure there is no trailing cruft */
 	t = db_read_token();
 	if (t != tEOL) {
 		db_printf("Unexpected sysctl argument\n");
 		error = EINVAL;
 		goto out;
 	}
 
 	error = db_sysctlbyname(name, flags);
 	if (error == ENOENT) {
 		db_printf("unknown oid: '%s'\n", db_tok_string);
 		goto out;
 	} else if (error) {
 		db_printf("%s: error: %d\n", db_tok_string, error);
 		goto out;
 	}
 
 out:
 	/* Ensure we eat all of our text */
 	db_flush_lex();
 
 	if (error == EINVAL) {
 		db_sysctl_cmd_usage();
 	}
 }
 
 #endif /* DDB */
Index: projects/clang900-import/sys/kern/vfs_cluster.c
===================================================================
--- projects/clang900-import/sys/kern/vfs_cluster.c	(revision 352536)
+++ projects/clang900-import/sys/kern/vfs_cluster.c	(revision 352537)
@@ -1,1079 +1,1097 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  * Modifications/enhancements:
  * 	Copyright (c) 1995 John S. Dyson.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_cluster.c	8.7 (Berkeley) 2/13/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_debug_cluster.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/vmmeter.h>
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <sys/sysctl.h>
 
 #if defined(CLUSTERDEBUG)
 static int	rcluster= 0;
 SYSCTL_INT(_debug, OID_AUTO, rcluster, CTLFLAG_RW, &rcluster, 0,
     "Debug VFS clustering code");
 #endif
 
 static MALLOC_DEFINE(M_SEGMENT, "cl_savebuf", "cluster_save buffer");
 static uma_zone_t cluster_pbuf_zone;
 
 static void cluster_init(void *);
 static struct cluster_save *cluster_collectbufs(struct vnode *vp,
 	    struct buf *last_bp, int gbflags);
 static struct buf *cluster_rbuild(struct vnode *vp, u_quad_t filesize,
 	    daddr_t lbn, daddr_t blkno, long size, int run, int gbflags,
 	    struct buf *fbp);
 static void cluster_callback(struct buf *);
 
 static int write_behind = 1;
 SYSCTL_INT(_vfs, OID_AUTO, write_behind, CTLFLAG_RW, &write_behind, 0,
     "Cluster write-behind; 0: disable, 1: enable, 2: backed off");
 
 static int read_max = 64;
 SYSCTL_INT(_vfs, OID_AUTO, read_max, CTLFLAG_RW, &read_max, 0,
     "Cluster read-ahead max block count");
 
 static int read_min = 1;
 SYSCTL_INT(_vfs, OID_AUTO, read_min, CTLFLAG_RW, &read_min, 0,
     "Cluster read min block count");
 
 SYSINIT(cluster, SI_SUB_CPU, SI_ORDER_ANY, cluster_init, NULL);
 
 static void
 cluster_init(void *dummy)
 {
 
 	cluster_pbuf_zone = pbuf_zsecond_create("clpbuf", nswbuf / 2);
 }
 
 /*
  * Read data to a buf, including read-ahead if we find this to be beneficial.
  * cluster_read replaces bread.
  */
 int
 cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
     struct ucred *cred, long totread, int seqcount, int gbflags,
     struct buf **bpp)
 {
 	struct buf *bp, *rbp, *reqbp;
 	struct bufobj *bo;
 	struct thread *td;
 	daddr_t blkno, origblkno;
 	int maxra, racluster;
 	int error, ncontig;
 	int i;
 
 	error = 0;
 	td = curthread;
 	bo = &vp->v_bufobj;
 	if (!unmapped_buf_allowed)
 		gbflags &= ~GB_UNMAPPED;
 
 	/*
 	 * Try to limit the amount of read-ahead by a few
 	 * ad-hoc parameters.  This needs work!!!
 	 */
 	racluster = vp->v_mount->mnt_iosize_max / size;
 	maxra = seqcount;
 	maxra = min(read_max, maxra);
 	maxra = min(nbuf/8, maxra);
 	if (((u_quad_t)(lblkno + maxra + 1) * size) > filesize)
 		maxra = (filesize / size) - lblkno;
 
 	/*
 	 * get the requested block
 	 */
 	error = getblkx(vp, lblkno, size, 0, 0, gbflags, &bp);
 	if (error != 0) {
 		*bpp = NULL;
 		return (error);
 	}
 	gbflags &= ~GB_NOSPARSE;
 	origblkno = lblkno;
 	*bpp = reqbp = bp;
 
 	/*
 	 * if it is in the cache, then check to see if the reads have been
 	 * sequential.  If they have, then try some read-ahead, otherwise
 	 * back-off on prospective read-aheads.
 	 */
 	if (bp->b_flags & B_CACHE) {
 		if (!seqcount) {
 			return 0;
 		} else if ((bp->b_flags & B_RAM) == 0) {
 			return 0;
 		} else {
 			bp->b_flags &= ~B_RAM;
 			BO_RLOCK(bo);
 			for (i = 1; i < maxra; i++) {
 				/*
 				 * Stop if the buffer does not exist or it
 				 * is invalid (about to go away?)
 				 */
 				rbp = gbincore(&vp->v_bufobj, lblkno+i);
 				if (rbp == NULL || (rbp->b_flags & B_INVAL))
 					break;
 
 				/*
 				 * Set another read-ahead mark so we know 
 				 * to check again. (If we can lock the
 				 * buffer without waiting)
 				 */
 				if ((((i % racluster) == (racluster - 1)) ||
 				    (i == (maxra - 1))) 
 				    && (0 == BUF_LOCK(rbp, 
 					LK_EXCLUSIVE | LK_NOWAIT, NULL))) {
 					rbp->b_flags |= B_RAM;
 					BUF_UNLOCK(rbp);
 				}			
 			}
 			BO_RUNLOCK(bo);
 			if (i >= maxra) {
 				return 0;
 			}
 			lblkno += i;
 		}
 		reqbp = bp = NULL;
 	/*
 	 * If it isn't in the cache, then get a chunk from
 	 * disk if sequential, otherwise just get the block.
 	 */
 	} else {
 		off_t firstread = bp->b_offset;
 		int nblks;
 		long minread;
 
 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("cluster_read: no buffer offset"));
 
 		ncontig = 0;
 
 		/*
 		 * Adjust totread if needed
 		 */
 		minread = read_min * size;
 		if (minread > totread)
 			totread = minread;
 
 		/*
 		 * Compute the total number of blocks that we should read
 		 * synchronously.
 		 */
 		if (firstread + totread > filesize)
 			totread = filesize - firstread;
 		nblks = howmany(totread, size);
 		if (nblks > racluster)
 			nblks = racluster;
 
 		/*
 		 * Now compute the number of contiguous blocks.
 		 */
 		if (nblks > 1) {
 	    		error = VOP_BMAP(vp, lblkno, NULL,
 				&blkno, &ncontig, NULL);
 			/*
 			 * If this failed to map just do the original block.
 			 */
 			if (error || blkno == -1)
 				ncontig = 0;
 		}
 
 		/*
 		 * If we have contiguous data available do a cluster
 		 * otherwise just read the requested block.
 		 */
 		if (ncontig) {
 			/* Account for our first block. */
 			ncontig = min(ncontig + 1, nblks);
 			if (ncontig < nblks)
 				nblks = ncontig;
 			bp = cluster_rbuild(vp, filesize, lblkno,
 			    blkno, size, nblks, gbflags, bp);
 			lblkno += (bp->b_bufsize / size);
 		} else {
 			bp->b_flags |= B_RAM;
 			bp->b_iocmd = BIO_READ;
 			lblkno += 1;
 		}
 	}
 
 	/*
 	 * handle the synchronous read so that it is available ASAP.
 	 */
 	if (bp) {
 		if ((bp->b_flags & B_CLUSTER) == 0) {
 			vfs_busy_pages(bp, 0);
 		}
 		bp->b_flags &= ~B_INVAL;
 		bp->b_ioflags &= ~BIO_ERROR;
 		if ((bp->b_flags & B_ASYNC) || bp->b_iodone != NULL)
 			BUF_KERNPROC(bp);
 		bp->b_iooffset = dbtob(bp->b_blkno);
 		bstrategy(bp);
 #ifdef RACCT
 		if (racct_enable) {
 			PROC_LOCK(td->td_proc);
 			racct_add_buf(td->td_proc, bp, 0);
 			PROC_UNLOCK(td->td_proc);
 		}
 #endif /* RACCT */
 		td->td_ru.ru_inblock++;
 	}
 
 	/*
 	 * If we have been doing sequential I/O, then do some read-ahead.
 	 */
 	while (lblkno < (origblkno + maxra)) {
 		error = VOP_BMAP(vp, lblkno, NULL, &blkno, &ncontig, NULL);
 		if (error)
 			break;
 
 		if (blkno == -1)
 			break;
 
 		/*
 		 * We could throttle ncontig here by maxra but we might as
 		 * well read the data if it is contiguous.  We're throttled
 		 * by racluster anyway.
 		 */
 		if (ncontig) {
 			ncontig = min(ncontig + 1, racluster);
 			rbp = cluster_rbuild(vp, filesize, lblkno, blkno,
 			    size, ncontig, gbflags, NULL);
 			lblkno += (rbp->b_bufsize / size);
 			if (rbp->b_flags & B_DELWRI) {
 				bqrelse(rbp);
 				continue;
 			}
 		} else {
 			rbp = getblk(vp, lblkno, size, 0, 0, gbflags);
 			lblkno += 1;
 			if (rbp->b_flags & B_DELWRI) {
 				bqrelse(rbp);
 				continue;
 			}
 			rbp->b_flags |= B_ASYNC | B_RAM;
 			rbp->b_iocmd = BIO_READ;
 			rbp->b_blkno = blkno;
 		}
 		if (rbp->b_flags & B_CACHE) {
 			rbp->b_flags &= ~B_ASYNC;
 			bqrelse(rbp);
 			continue;
 		}
 		if ((rbp->b_flags & B_CLUSTER) == 0) {
 			vfs_busy_pages(rbp, 0);
 		}
 		rbp->b_flags &= ~B_INVAL;
 		rbp->b_ioflags &= ~BIO_ERROR;
 		if ((rbp->b_flags & B_ASYNC) || rbp->b_iodone != NULL)
 			BUF_KERNPROC(rbp);
 		rbp->b_iooffset = dbtob(rbp->b_blkno);
 		bstrategy(rbp);
 #ifdef RACCT
 		if (racct_enable) {
 			PROC_LOCK(td->td_proc);
 			racct_add_buf(td->td_proc, rbp, 0);
 			PROC_UNLOCK(td->td_proc);
 		}
 #endif /* RACCT */
 		td->td_ru.ru_inblock++;
 	}
 
 	if (reqbp) {
 		/*
 		 * Like bread, always brelse() the buffer when
 		 * returning an error.
 		 */
 		error = bufwait(reqbp);
 		if (error != 0) {
 			brelse(reqbp);
 			*bpp = NULL;
 		}
 	}
 	return (error);
 }
 
 /*
  * If blocks are contiguous on disk, use this to provide clustered
  * read ahead.  We will read as many blocks as possible sequentially
  * and then parcel them up into logical blocks in the buffer hash table.
  */
 static struct buf *
 cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
     daddr_t blkno, long size, int run, int gbflags, struct buf *fbp)
 {
 	struct buf *bp, *tbp;
 	daddr_t bn;
 	off_t off;
 	long tinc, tsize;
 	int i, inc, j, k, toff;
 
 	KASSERT(size == vp->v_mount->mnt_stat.f_iosize,
 	    ("cluster_rbuild: size %ld != f_iosize %jd\n",
 	    size, (intmax_t)vp->v_mount->mnt_stat.f_iosize));
 
 	/*
 	 * avoid a division
 	 */
 	while ((u_quad_t) size * (lbn + run) > filesize) {
 		--run;
 	}
 
 	if (fbp) {
 		tbp = fbp;
 		tbp->b_iocmd = BIO_READ; 
 	} else {
 		tbp = getblk(vp, lbn, size, 0, 0, gbflags);
 		if (tbp->b_flags & B_CACHE)
 			return tbp;
 		tbp->b_flags |= B_ASYNC | B_RAM;
 		tbp->b_iocmd = BIO_READ;
 	}
 	tbp->b_blkno = blkno;
 	if( (tbp->b_flags & B_MALLOC) ||
 		((tbp->b_flags & B_VMIO) == 0) || (run <= 1) )
 		return tbp;
 
 	bp = uma_zalloc(cluster_pbuf_zone, M_NOWAIT);
 	if (bp == NULL)
 		return tbp;
 
 	/*
 	 * We are synthesizing a buffer out of vm_page_t's, but
 	 * if the block size is not page aligned then the starting
 	 * address may not be either.  Inherit the b_data offset
 	 * from the original buffer.
 	 */
 	bp->b_flags = B_ASYNC | B_CLUSTER | B_VMIO;
 	if ((gbflags & GB_UNMAPPED) != 0) {
 		bp->b_data = unmapped_buf;
 	} else {
 		bp->b_data = (char *)((vm_offset_t)bp->b_data |
 		    ((vm_offset_t)tbp->b_data & PAGE_MASK));
 	}
 	bp->b_iocmd = BIO_READ;
 	bp->b_iodone = cluster_callback;
 	bp->b_blkno = blkno;
 	bp->b_lblkno = lbn;
 	bp->b_offset = tbp->b_offset;
 	KASSERT(bp->b_offset != NOOFFSET, ("cluster_rbuild: no buffer offset"));
 	pbgetvp(vp, bp);
 
 	TAILQ_INIT(&bp->b_cluster.cluster_head);
 
 	bp->b_bcount = 0;
 	bp->b_bufsize = 0;
 	bp->b_npages = 0;
 
 	inc = btodb(size);
 	for (bn = blkno, i = 0; i < run; ++i, bn += inc) {
 		if (i == 0) {
 			VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
 			vfs_drain_busy_pages(tbp);
 			vm_object_pip_add(tbp->b_bufobj->bo_object,
 			    tbp->b_npages);
 			for (k = 0; k < tbp->b_npages; k++)
 				vm_page_sbusy(tbp->b_pages[k]);
 			VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
 		} else {
 			if ((bp->b_npages * PAGE_SIZE) +
 			    round_page(size) > vp->v_mount->mnt_iosize_max) {
 				break;
 			}
 
 			tbp = getblk(vp, lbn + i, size, 0, 0, GB_LOCK_NOWAIT |
 			    (gbflags & GB_UNMAPPED));
 
 			/* Don't wait around for locked bufs. */
 			if (tbp == NULL)
 				break;
 
 			/*
 			 * Stop scanning if the buffer is fully valid
 			 * (marked B_CACHE), or locked (may be doing a
 			 * background write), or if the buffer is not
 			 * VMIO backed.  The clustering code can only deal
 			 * with VMIO-backed buffers.  The bo lock is not
 			 * required for the BKGRDINPROG check since it
 			 * can not be set without the buf lock.
 			 */
 			if ((tbp->b_vflags & BV_BKGRDINPROG) ||
 			    (tbp->b_flags & B_CACHE) ||
 			    (tbp->b_flags & B_VMIO) == 0) {
 				bqrelse(tbp);
 				break;
 			}
 
 			/*
 			 * The buffer must be completely invalid in order to
 			 * take part in the cluster.  If it is partially valid
 			 * then we stop.
 			 */
 			off = tbp->b_offset;
 			tsize = size;
 			VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
 			for (j = 0; tsize > 0; j++) {
 				toff = off & PAGE_MASK;
 				tinc = tsize;
 				if (toff + tinc > PAGE_SIZE)
 					tinc = PAGE_SIZE - toff;
 				VM_OBJECT_ASSERT_WLOCKED(tbp->b_pages[j]->object);
 				if ((tbp->b_pages[j]->valid &
 				    vm_page_bits(toff, tinc)) != 0)
 					break;
 				if (vm_page_xbusied(tbp->b_pages[j]))
 					break;
 				vm_object_pip_add(tbp->b_bufobj->bo_object, 1);
 				vm_page_sbusy(tbp->b_pages[j]);
 				off += tinc;
 				tsize -= tinc;
 			}
 			if (tsize > 0) {
 clean_sbusy:
 				vm_object_pip_wakeupn(tbp->b_bufobj->bo_object,
 				    j);
 				for (k = 0; k < j; k++)
 					vm_page_sunbusy(tbp->b_pages[k]);
 				VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
 				bqrelse(tbp);
 				break;
 			}
 			VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
 
 			/*
 			 * Set a read-ahead mark as appropriate
 			 */
 			if ((fbp && (i == 1)) || (i == (run - 1)))
 				tbp->b_flags |= B_RAM;
 
 			/*
 			 * Set the buffer up for an async read (XXX should
 			 * we do this only if we do not wind up brelse()ing?).
 			 * Set the block number if it isn't set, otherwise
 			 * if it is make sure it matches the block number we
 			 * expect.
 			 */
 			tbp->b_flags |= B_ASYNC;
 			tbp->b_iocmd = BIO_READ;
 			if (tbp->b_blkno == tbp->b_lblkno) {
 				tbp->b_blkno = bn;
 			} else if (tbp->b_blkno != bn) {
 				VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
 				goto clean_sbusy;
 			}
 		}
 		/*
 		 * XXX fbp from caller may not be B_ASYNC, but we are going
 		 * to biodone() it in cluster_callback() anyway
 		 */
 		BUF_KERNPROC(tbp);
 		TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
 			tbp, b_cluster.cluster_entry);
 		VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
 		for (j = 0; j < tbp->b_npages; j += 1) {
 			vm_page_t m;
 			m = tbp->b_pages[j];
 			if ((bp->b_npages == 0) ||
 			    (bp->b_pages[bp->b_npages-1] != m)) {
 				bp->b_pages[bp->b_npages] = m;
 				bp->b_npages++;
 			}
 			if (m->valid == VM_PAGE_BITS_ALL)
 				tbp->b_pages[j] = bogus_page;
 		}
 		VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
 		/*
 		 * Don't inherit tbp->b_bufsize as it may be larger due to
 		 * a non-page-aligned size.  Instead just aggregate using
 		 * 'size'.
 		 */
 		if (tbp->b_bcount != size)
 			printf("warning: tbp->b_bcount wrong %ld vs %ld\n", tbp->b_bcount, size);
 		if (tbp->b_bufsize != size)
 			printf("warning: tbp->b_bufsize wrong %ld vs %ld\n", tbp->b_bufsize, size);
 		bp->b_bcount += size;
 		bp->b_bufsize += size;
 	}
 
 	/*
 	 * Fully valid pages in the cluster are already good and do not need
 	 * to be re-read from disk.  Replace the page with bogus_page
 	 */
 	VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
 	for (j = 0; j < bp->b_npages; j++) {
 		VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[j]->object);
 		if (bp->b_pages[j]->valid == VM_PAGE_BITS_ALL)
 			bp->b_pages[j] = bogus_page;
 	}
 	VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
 	if (bp->b_bufsize > bp->b_kvasize)
 		panic("cluster_rbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
 		    bp->b_bufsize, bp->b_kvasize);
 
 	if (buf_mapped(bp)) {
 		pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
 		    (vm_page_t *)bp->b_pages, bp->b_npages);
 	}
 	return (bp);
 }
 
 /*
  * Cleanup after a clustered read or write.
  * This is complicated by the fact that any of the buffers might have
  * extra memory (if there were no empty buffer headers at allocbuf time)
  * that we will need to shift around.
  */
 static void
 cluster_callback(struct buf *bp)
 {
 	struct buf *nbp, *tbp;
 	int error = 0;
 
 	/*
 	 * Must propagate errors to all the components.
 	 */
 	if (bp->b_ioflags & BIO_ERROR)
 		error = bp->b_error;
 
 	if (buf_mapped(bp)) {
 		pmap_qremove(trunc_page((vm_offset_t) bp->b_data),
 		    bp->b_npages);
 	}
 	/*
 	 * Move memory from the large cluster buffer into the component
 	 * buffers and mark IO as done on these.
 	 */
 	for (tbp = TAILQ_FIRST(&bp->b_cluster.cluster_head);
 		tbp; tbp = nbp) {
 		nbp = TAILQ_NEXT(&tbp->b_cluster, cluster_entry);
 		if (error) {
 			tbp->b_ioflags |= BIO_ERROR;
 			tbp->b_error = error;
 		} else {
 			tbp->b_dirtyoff = tbp->b_dirtyend = 0;
 			tbp->b_flags &= ~B_INVAL;
 			tbp->b_ioflags &= ~BIO_ERROR;
 			/*
 			 * XXX the bdwrite()/bqrelse() issued during
 			 * cluster building clears B_RELBUF (see bqrelse()
 			 * comment).  If direct I/O was specified, we have
 			 * to restore it here to allow the buffer and VM
 			 * to be freed.
 			 */
 			if (tbp->b_flags & B_DIRECT)
 				tbp->b_flags |= B_RELBUF;
 		}
 		bufdone(tbp);
 	}
 	pbrelvp(bp);
 	uma_zfree(cluster_pbuf_zone, bp);
 }
 
 /*
  *	cluster_wbuild_wb:
  *
  *	Implement modified write build for cluster.
  *
  *		write_behind = 0	write behind disabled
  *		write_behind = 1	write behind normal (default)
  *		write_behind = 2	write behind backed-off
  */
 
 static __inline int
 cluster_wbuild_wb(struct vnode *vp, long size, daddr_t start_lbn, int len,
     int gbflags)
 {
 	int r = 0;
 
 	switch (write_behind) {
 	case 2:
 		if (start_lbn < len)
 			break;
 		start_lbn -= len;
 		/* FALLTHROUGH */
 	case 1:
 		r = cluster_wbuild(vp, size, start_lbn, len, gbflags);
 		/* FALLTHROUGH */
 	default:
 		/* FALLTHROUGH */
 		break;
 	}
 	return(r);
 }
 
 /*
  * Do clustered write for FFS.
  *
  * Three cases:
  *	1. Write is not sequential (write asynchronously)
  *	Write is sequential:
  *	2.	beginning of cluster - begin cluster
  *	3.	middle of a cluster - add to cluster
  *	4.	end of a cluster - asynchronously write cluster
  */
 void
 cluster_write(struct vnode *vp, struct buf *bp, u_quad_t filesize, int seqcount,
     int gbflags)
 {
 	daddr_t lbn;
 	int maxclen, cursize;
 	int lblocksize;
 	int async;
 
 	if (!unmapped_buf_allowed)
 		gbflags &= ~GB_UNMAPPED;
 
 	if (vp->v_type == VREG) {
 		async = DOINGASYNC(vp);
 		lblocksize = vp->v_mount->mnt_stat.f_iosize;
 	} else {
 		async = 0;
 		lblocksize = bp->b_bufsize;
 	}
 	lbn = bp->b_lblkno;
 	KASSERT(bp->b_offset != NOOFFSET, ("cluster_write: no buffer offset"));
 
 	/* Initialize vnode to beginning of file. */
 	if (lbn == 0)
 		vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
 
 	if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 ||
 	    (bp->b_blkno != vp->v_lasta + btodb(lblocksize))) {
 		maxclen = vp->v_mount->mnt_iosize_max / lblocksize - 1;
 		if (vp->v_clen != 0) {
 			/*
 			 * Next block is not sequential.
 			 *
 			 * If we are not writing at end of file, the process
 			 * seeked to another point in the file since its last
 			 * write, or we have reached our maximum cluster size,
 			 * then push the previous cluster. Otherwise try
 			 * reallocating to make it sequential.
 			 *
 			 * Change to algorithm: only push previous cluster if
 			 * it was sequential from the point of view of the
 			 * seqcount heuristic, otherwise leave the buffer 
 			 * intact so we can potentially optimize the I/O
 			 * later on in the buf_daemon or update daemon
 			 * flush.
 			 */
 			cursize = vp->v_lastw - vp->v_cstart + 1;
 			if (((u_quad_t) bp->b_offset + lblocksize) != filesize ||
 			    lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
 				if (!async && seqcount > 0) {
 					cluster_wbuild_wb(vp, lblocksize,
 					    vp->v_cstart, cursize, gbflags);
 				}
 			} else {
 				struct buf **bpp, **endbp;
 				struct cluster_save *buflist;
 
 				buflist = cluster_collectbufs(vp, bp, gbflags);
+				if (buflist == NULL) {
+					/*
+					 * Cluster build failed so just write
+					 * it now.
+					 */
+					bawrite(bp);
+					return;
+				}
 				endbp = &buflist->bs_children
 				    [buflist->bs_nchildren - 1];
 				if (VOP_REALLOCBLKS(vp, buflist)) {
 					/*
 					 * Failed, push the previous cluster
 					 * if *really* writing sequentially
 					 * in the logical file (seqcount > 1),
 					 * otherwise delay it in the hopes that
 					 * the low level disk driver can
 					 * optimize the write ordering.
 					 */
 					for (bpp = buflist->bs_children;
 					     bpp < endbp; bpp++)
 						brelse(*bpp);
 					free(buflist, M_SEGMENT);
 					if (seqcount > 1) {
 						cluster_wbuild_wb(vp, 
 						    lblocksize, vp->v_cstart, 
 						    cursize, gbflags);
 					}
 				} else {
 					/*
 					 * Succeeded, keep building cluster.
 					 */
 					for (bpp = buflist->bs_children;
 					     bpp <= endbp; bpp++)
 						bdwrite(*bpp);
 					free(buflist, M_SEGMENT);
 					vp->v_lastw = lbn;
 					vp->v_lasta = bp->b_blkno;
 					return;
 				}
 			}
 		}
 		/*
 		 * Consider beginning a cluster. If at end of file, make
 		 * cluster as large as possible, otherwise find size of
 		 * existing cluster.
 		 */
 		if ((vp->v_type == VREG) &&
 			((u_quad_t) bp->b_offset + lblocksize) != filesize &&
 		    (bp->b_blkno == bp->b_lblkno) &&
 		    (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen, NULL) ||
 		     bp->b_blkno == -1)) {
 			bawrite(bp);
 			vp->v_clen = 0;
 			vp->v_lasta = bp->b_blkno;
 			vp->v_cstart = lbn + 1;
 			vp->v_lastw = lbn;
 			return;
 		}
 		vp->v_clen = maxclen;
 		if (!async && maxclen == 0) {	/* I/O not contiguous */
 			vp->v_cstart = lbn + 1;
 			bawrite(bp);
 		} else {	/* Wait for rest of cluster */
 			vp->v_cstart = lbn;
 			bdwrite(bp);
 		}
 	} else if (lbn == vp->v_cstart + vp->v_clen) {
 		/*
 		 * At end of cluster, write it out if seqcount tells us we
 		 * are operating sequentially, otherwise let the buf or
 		 * update daemon handle it.
 		 */
 		bdwrite(bp);
 		if (seqcount > 1) {
 			cluster_wbuild_wb(vp, lblocksize, vp->v_cstart,
 			    vp->v_clen + 1, gbflags);
 		}
 		vp->v_clen = 0;
 		vp->v_cstart = lbn + 1;
 	} else if (vm_page_count_severe()) {
 		/*
 		 * We are low on memory, get it going NOW
 		 */
 		bawrite(bp);
 	} else {
 		/*
 		 * In the middle of a cluster, so just delay the I/O for now.
 		 */
 		bdwrite(bp);
 	}
 	vp->v_lastw = lbn;
 	vp->v_lasta = bp->b_blkno;
 }
 
 
 /*
  * This is an awful lot like cluster_rbuild...wish they could be combined.
  * The last lbn argument is the current block on which I/O is being
  * performed.  Check to see that it doesn't fall in the middle of
  * the current block (if last_bp == NULL).
  */
 int
 cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len,
     int gbflags)
 {
 	struct buf *bp, *tbp;
 	struct bufobj *bo;
 	int i, j;
 	int totalwritten = 0;
 	int dbsize = btodb(size);
 
 	if (!unmapped_buf_allowed)
 		gbflags &= ~GB_UNMAPPED;
 
 	bo = &vp->v_bufobj;
 	while (len > 0) {
 		/*
 		 * If the buffer is not delayed-write (i.e. dirty), or it
 		 * is delayed-write but either locked or inval, it cannot
 		 * partake in the clustered write.
 		 */
 		BO_LOCK(bo);
 		if ((tbp = gbincore(&vp->v_bufobj, start_lbn)) == NULL ||
 		    (tbp->b_vflags & BV_BKGRDINPROG)) {
 			BO_UNLOCK(bo);
 			++start_lbn;
 			--len;
 			continue;
 		}
 		if (BUF_LOCK(tbp,
 		    LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, BO_LOCKPTR(bo))) {
 			++start_lbn;
 			--len;
 			continue;
 		}
 		if ((tbp->b_flags & (B_INVAL | B_DELWRI)) != B_DELWRI) {
 			BUF_UNLOCK(tbp);
 			++start_lbn;
 			--len;
 			continue;
 		}
 		bremfree(tbp);
 		tbp->b_flags &= ~B_DONE;
 
 		/*
 		 * Extra memory in the buffer, punt on this buffer.
 		 * XXX we could handle this in most cases, but we would
 		 * have to push the extra memory down to after our max
 		 * possible cluster size and then potentially pull it back
 		 * up if the cluster was terminated prematurely--too much
 		 * hassle.
 		 */
 		if (((tbp->b_flags & (B_CLUSTEROK | B_MALLOC | B_VMIO)) != 
 		     (B_CLUSTEROK | B_VMIO)) ||
 		  (tbp->b_bcount != tbp->b_bufsize) ||
 		  (tbp->b_bcount != size) ||
 		  (len == 1) ||
 		  ((bp = uma_zalloc(cluster_pbuf_zone,
 		  (vp->v_vflag & VV_MD) != 0 ? M_NOWAIT : M_WAITOK)) == NULL)) {
 			totalwritten += tbp->b_bufsize;
 			bawrite(tbp);
 			++start_lbn;
 			--len;
 			continue;
 		}
 
 		/*
 		 * We got a pbuf to make the cluster in.
 		 * so initialise it.
 		 */
 		TAILQ_INIT(&bp->b_cluster.cluster_head);
 		bp->b_bcount = 0;
 		bp->b_bufsize = 0;
 		bp->b_npages = 0;
 		if (tbp->b_wcred != NOCRED)
 			bp->b_wcred = crhold(tbp->b_wcred);
 
 		bp->b_blkno = tbp->b_blkno;
 		bp->b_lblkno = tbp->b_lblkno;
 		bp->b_offset = tbp->b_offset;
 
 		/*
 		 * We are synthesizing a buffer out of vm_page_t's, but
 		 * if the block size is not page aligned then the starting
 		 * address may not be either.  Inherit the b_data offset
 		 * from the original buffer.
 		 */
 		if ((gbflags & GB_UNMAPPED) == 0 ||
 		    (tbp->b_flags & B_VMIO) == 0) {
 			bp->b_data = (char *)((vm_offset_t)bp->b_data |
 			    ((vm_offset_t)tbp->b_data & PAGE_MASK));
 		} else {
 			bp->b_data = unmapped_buf;
 		}
 		bp->b_flags |= B_CLUSTER | (tbp->b_flags & (B_VMIO |
 		    B_NEEDCOMMIT));
 		bp->b_iodone = cluster_callback;
 		pbgetvp(vp, bp);
 		/*
 		 * From this location in the file, scan forward to see
 		 * if there are buffers with adjacent data that need to
 		 * be written as well.
 		 */
 		for (i = 0; i < len; ++i, ++start_lbn) {
 			if (i != 0) { /* If not the first buffer */
 				/*
 				 * If the adjacent data is not even in core it
 				 * can't need to be written.
 				 */
 				BO_LOCK(bo);
 				if ((tbp = gbincore(bo, start_lbn)) == NULL ||
 				    (tbp->b_vflags & BV_BKGRDINPROG)) {
 					BO_UNLOCK(bo);
 					break;
 				}
 
 				/*
 				 * If it IS in core, but has different
 				 * characteristics, or is locked (which
 				 * means it could be undergoing a background
 				 * I/O or be in a weird state), then don't
 				 * cluster with it.
 				 */
 				if (BUF_LOCK(tbp,
 				    LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
 				    BO_LOCKPTR(bo)))
 					break;
 
 				if ((tbp->b_flags & (B_VMIO | B_CLUSTEROK |
 				    B_INVAL | B_DELWRI | B_NEEDCOMMIT))
 				    != (B_DELWRI | B_CLUSTEROK |
 				    (bp->b_flags & (B_VMIO | B_NEEDCOMMIT))) ||
 				    tbp->b_wcred != bp->b_wcred) {
 					BUF_UNLOCK(tbp);
 					break;
 				}
 
 				/*
 				 * Check that the combined cluster
 				 * would make sense with regard to pages
 				 * and would not be too large
 				 */
 				if ((tbp->b_bcount != size) ||
 				  ((bp->b_blkno + (dbsize * i)) !=
 				    tbp->b_blkno) ||
 				  ((tbp->b_npages + bp->b_npages) >
 				    (vp->v_mount->mnt_iosize_max / PAGE_SIZE))) {
 					BUF_UNLOCK(tbp);
 					break;
 				}
 
 				/*
 				 * Ok, it's passed all the tests,
 				 * so remove it from the free list
 				 * and mark it busy. We will use it.
 				 */
 				bremfree(tbp);
 				tbp->b_flags &= ~B_DONE;
 			} /* end of code for non-first buffers only */
 			/*
 			 * If the IO is via the VM then we do some
 			 * special VM hackery (yuck).  Since the buffer's
 			 * block size may not be page-aligned it is possible
 			 * for a page to be shared between two buffers.  We
 			 * have to get rid of the duplication when building
 			 * the cluster.
 			 */
 			if (tbp->b_flags & B_VMIO) {
 				vm_page_t m;
 
 				VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
 				if (i == 0) {
 					vfs_drain_busy_pages(tbp);
 				} else { /* if not first buffer */
 					for (j = 0; j < tbp->b_npages; j += 1) {
 						m = tbp->b_pages[j];
 						if (vm_page_xbusied(m)) {
 							VM_OBJECT_WUNLOCK(
 							    tbp->b_object);
 							bqrelse(tbp);
 							goto finishcluster;
 						}
 					}
 				}
 				for (j = 0; j < tbp->b_npages; j += 1) {
 					m = tbp->b_pages[j];
 					vm_page_sbusy(m);
 					vm_object_pip_add(m->object, 1);
 					if ((bp->b_npages == 0) ||
 					  (bp->b_pages[bp->b_npages - 1] != m)) {
 						bp->b_pages[bp->b_npages] = m;
 						bp->b_npages++;
 					}
 				}
 				VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
 			}
 			bp->b_bcount += size;
 			bp->b_bufsize += size;
 			/*
 			 * If any of the clustered buffers have their
 			 * B_BARRIER flag set, transfer that request to
 			 * the cluster.
 			 */
 			bp->b_flags |= (tbp->b_flags & B_BARRIER);
 			tbp->b_flags &= ~(B_DONE | B_BARRIER);
 			tbp->b_flags |= B_ASYNC;
 			tbp->b_ioflags &= ~BIO_ERROR;
 			tbp->b_iocmd = BIO_WRITE;
 			bundirty(tbp);
 			reassignbuf(tbp);		/* put on clean list */
 			bufobj_wref(tbp->b_bufobj);
 			BUF_KERNPROC(tbp);
 			buf_track(tbp, __func__);
 			TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
 				tbp, b_cluster.cluster_entry);
 		}
 	finishcluster:
 		if (buf_mapped(bp)) {
 			pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
 			    (vm_page_t *)bp->b_pages, bp->b_npages);
 		}
 		if (bp->b_bufsize > bp->b_kvasize)
 			panic(
 			    "cluster_wbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
 			    bp->b_bufsize, bp->b_kvasize);
 		totalwritten += bp->b_bufsize;
 		bp->b_dirtyoff = 0;
 		bp->b_dirtyend = bp->b_bufsize;
 		bawrite(bp);
 
 		len -= i;
 	}
 	return totalwritten;
 }
 
 /*
  * Collect together all the buffers in a cluster.
  * Plus add one additional buffer.
  */
 static struct cluster_save *
 cluster_collectbufs(struct vnode *vp, struct buf *last_bp, int gbflags)
 {
 	struct cluster_save *buflist;
 	struct buf *bp;
 	daddr_t lbn;
-	int i, len;
+	int i, j, len, error;
 
 	len = vp->v_lastw - vp->v_cstart + 1;
 	buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist),
 	    M_SEGMENT, M_WAITOK);
 	buflist->bs_nchildren = 0;
 	buflist->bs_children = (struct buf **) (buflist + 1);
 	for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) {
-		(void)bread_gb(vp, lbn, last_bp->b_bcount, NOCRED,
+		error = bread_gb(vp, lbn, last_bp->b_bcount, NOCRED,
 		    gbflags, &bp);
+		if (error != 0) {
+			/*
+			 * If read fails, release collected buffers
+			 * and return failure.
+			 */
+			for (j = 0; j < i; j++)
+				brelse(buflist->bs_children[j]);
+			free(buflist, M_SEGMENT);
+			return (NULL);
+		}
 		buflist->bs_children[i] = bp;
 		if (bp->b_blkno == bp->b_lblkno)
 			VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
 				NULL, NULL);
 	}
 	buflist->bs_children[i] = bp = last_bp;
 	if (bp->b_blkno == bp->b_lblkno)
 		VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
 	buflist->bs_nchildren = i + 1;
 	return (buflist);
 }
Index: projects/clang900-import/sys/kern/vfs_default.c
===================================================================
--- projects/clang900-import/sys/kern/vfs_default.c	(revision 352536)
+++ projects/clang900-import/sys/kern/vfs_default.c	(revision 352537)
@@ -1,1404 +1,1408 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed
  * to Berkeley by John Heidemann of the UCLA Ficus project.
  *
  * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/event.h>
 #include <sys/filio.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/lockf.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/rwlock.h>
 #include <sys/fcntl.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/poll.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 
 static int	vop_nolookup(struct vop_lookup_args *);
 static int	vop_norename(struct vop_rename_args *);
 static int	vop_nostrategy(struct vop_strategy_args *);
 static int	get_next_dirent(struct vnode *vp, struct dirent **dpp,
 				char *dirbuf, int dirbuflen, off_t *off,
 				char **cpos, int *len, int *eofflag,
 				struct thread *td);
 static int	dirent_exists(struct vnode *vp, const char *dirname,
 			      struct thread *td);
 
 #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
 
 static int vop_stdis_text(struct vop_is_text_args *ap);
 static int vop_stdunset_text(struct vop_unset_text_args *ap);
 static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
 static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap);
 static int vop_stdfdatasync(struct vop_fdatasync_args *ap);
 static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
 static int vop_stdioctl(struct vop_ioctl_args *ap);
 
 /*
  * This vnode table stores what we want to do if the filesystem doesn't
  * implement a particular VOP.
  *
  * If there is no specific entry here, we will return EOPNOTSUPP.
  *
  * Note that every filesystem has to implement either vop_access
  * or vop_accessx; failing to do so will result in immediate crash
  * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(),
  * which calls vop_stdaccess() etc.
  */
 
 struct vop_vector default_vnodeops = {
 	.vop_default =		NULL,
 	.vop_bypass =		VOP_EOPNOTSUPP,
 
 	.vop_access =		vop_stdaccess,
 	.vop_accessx =		vop_stdaccessx,
 	.vop_advise =		vop_stdadvise,
 	.vop_advlock =		vop_stdadvlock,
 	.vop_advlockasync =	vop_stdadvlockasync,
 	.vop_advlockpurge =	vop_stdadvlockpurge,
 	.vop_allocate =		vop_stdallocate,
 	.vop_bmap =		vop_stdbmap,
 	.vop_close =		VOP_NULL,
 	.vop_fsync =		VOP_NULL,
 	.vop_fdatasync =	vop_stdfdatasync,
 	.vop_getpages =		vop_stdgetpages,
 	.vop_getpages_async =	vop_stdgetpages_async,
 	.vop_getwritemount = 	vop_stdgetwritemount,
 	.vop_inactive =		VOP_NULL,
 	.vop_need_inactive =	vop_stdneed_inactive,
 	.vop_ioctl =		vop_stdioctl,
 	.vop_kqfilter =		vop_stdkqfilter,
 	.vop_islocked =		vop_stdislocked,
 	.vop_lock1 =		vop_stdlock,
 	.vop_lookup =		vop_nolookup,
 	.vop_open =		VOP_NULL,
 	.vop_pathconf =		VOP_EINVAL,
 	.vop_poll =		vop_nopoll,
 	.vop_putpages =		vop_stdputpages,
 	.vop_readlink =		VOP_EINVAL,
 	.vop_rename =		vop_norename,
 	.vop_revoke =		VOP_PANIC,
 	.vop_strategy =		vop_nostrategy,
 	.vop_unlock =		vop_stdunlock,
 	.vop_vptocnp =		vop_stdvptocnp,
 	.vop_vptofh =		vop_stdvptofh,
 	.vop_unp_bind =		vop_stdunp_bind,
 	.vop_unp_connect =	vop_stdunp_connect,
 	.vop_unp_detach =	vop_stdunp_detach,
 	.vop_is_text =		vop_stdis_text,
 	.vop_set_text =		vop_stdset_text,
 	.vop_unset_text =	vop_stdunset_text,
 	.vop_add_writecount =	vop_stdadd_writecount,
 	.vop_copy_file_range =	vop_stdcopy_file_range,
 };
 
 /*
  * Series of placeholder functions for various error returns for
  * VOPs.
  */
 
 int
 vop_eopnotsupp(struct vop_generic_args *ap)
 {
 	/*
 	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
 	*/
 
 	return (EOPNOTSUPP);
 }
 
 int
 vop_ebadf(struct vop_generic_args *ap)
 {
 
 	return (EBADF);
 }
 
 int
 vop_enotty(struct vop_generic_args *ap)
 {
 
 	return (ENOTTY);
 }
 
 int
 vop_einval(struct vop_generic_args *ap)
 {
 
 	return (EINVAL);
 }
 
 int
 vop_enoent(struct vop_generic_args *ap)
 {
 
 	return (ENOENT);
 }
 
 int
 vop_null(struct vop_generic_args *ap)
 {
 
 	return (0);
 }
 
 /*
  * Helper function to panic on some bad VOPs in some filesystems.
  */
 int
 vop_panic(struct vop_generic_args *ap)
 {
 
 	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
 }
 
 /*
  * vop_std<something> and vop_no<something> are default functions for use by
  * filesystems that need the "default reasonable" implementation for a
  * particular operation.
  *
  * The documentation for the operations they implement exists (if it exists)
  * in the VOP_<SOMETHING>(9) manpage (all uppercase).
  */
 
 /*
  * Default vop for filesystems that do not support name lookup
  */
 static int
 vop_nolookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 
 	*ap->a_vpp = NULL;
 	return (ENOTDIR);
 }
 
 /*
  * vop_norename:
  *
  * Handle unlock and reference counting for arguments of vop_rename
  * for filesystems that do not implement rename operation.
  */
 static int
 vop_norename(struct vop_rename_args *ap)
 {
 
 	vop_rename_fail(ap);
 	return (EOPNOTSUPP);
 }
 
 /*
  *	vop_nostrategy:
  *
  *	Strategy routine for VFS devices that have none.
  *
  *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
  *	routine.  Typically this is done for a BIO_READ strategy call.
  *	Typically B_INVAL is assumed to already be clear prior to a write
  *	and should not be cleared manually unless you just made the buffer
  *	invalid.  BIO_ERROR should be cleared either way.
  */
 
 static int
 vop_nostrategy (struct vop_strategy_args *ap)
 {
 	printf("No strategy for buffer at %p\n", ap->a_bp);
 	vn_printf(ap->a_vp, "vnode ");
 	ap->a_bp->b_ioflags |= BIO_ERROR;
 	ap->a_bp->b_error = EOPNOTSUPP;
 	bufdone(ap->a_bp);
 	return (EOPNOTSUPP);
 }
 
 static int
 get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
 		int dirbuflen, off_t *off, char **cpos, int *len,
 		int *eofflag, struct thread *td)
 {
 	int error, reclen;
 	struct uio uio;
 	struct iovec iov;
 	struct dirent *dp;
 
 	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
 	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
 
 	if (*len == 0) {
 		iov.iov_base = dirbuf;
 		iov.iov_len = dirbuflen;
 
 		uio.uio_iov = &iov;
 		uio.uio_iovcnt = 1;
 		uio.uio_offset = *off;
 		uio.uio_resid = dirbuflen;
 		uio.uio_segflg = UIO_SYSSPACE;
 		uio.uio_rw = UIO_READ;
 		uio.uio_td = td;
 
 		*eofflag = 0;
 
 #ifdef MAC
 		error = mac_vnode_check_readdir(td->td_ucred, vp);
 		if (error == 0)
 #endif
 			error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
 		    		NULL, NULL);
 		if (error)
 			return (error);
 
 		*off = uio.uio_offset;
 
 		*cpos = dirbuf;
 		*len = (dirbuflen - uio.uio_resid);
 
 		if (*len == 0)
 			return (ENOENT);
 	}
 
 	dp = (struct dirent *)(*cpos);
 	reclen = dp->d_reclen;
 	*dpp = dp;
 
 	/* check for malformed directory.. */
 	if (reclen < DIRENT_MINSIZE)
 		return (EINVAL);
 
 	*cpos += reclen;
 	*len -= reclen;
 
 	return (0);
 }
 
 /*
  * Check if a named file exists in a given directory vnode.
  */
 static int
 dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
 {
 	char *dirbuf, *cpos;
 	int error, eofflag, dirbuflen, len, found;
 	off_t off;
 	struct dirent *dp;
 	struct vattr va;
 
 	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
 	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
 
 	found = 0;
 
 	error = VOP_GETATTR(vp, &va, td->td_ucred);
 	if (error)
 		return (found);
 
 	dirbuflen = DEV_BSIZE;
 	if (dirbuflen < va.va_blocksize)
 		dirbuflen = va.va_blocksize;
 	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
 
 	off = 0;
 	len = 0;
 	do {
 		error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
 					&cpos, &len, &eofflag, td);
 		if (error)
 			goto out;
 
 		if (dp->d_type != DT_WHT && dp->d_fileno != 0 &&
 		    strcmp(dp->d_name, dirname) == 0) {
 			found = 1;
 			goto out;
 		}
 	} while (len > 0 || !eofflag);
 
 out:
 	free(dirbuf, M_TEMP);
 	return (found);
 }
 
 int
 vop_stdaccess(struct vop_access_args *ap)
 {
 
 	KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
 	    VAPPEND)) == 0, ("invalid bit in accmode"));
 
 	return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td));
 }
 
 int
 vop_stdaccessx(struct vop_accessx_args *ap)
 {
 	int error;
 	accmode_t accmode = ap->a_accmode;
 
 	error = vfs_unixify_accmode(&accmode);
 	if (error != 0)
 		return (error);
 
 	if (accmode == 0)
 		return (0);
 
 	return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td));
 }
 
 /*
  * Advisory record locking support
  */
 int
 vop_stdadvlock(struct vop_advlock_args *ap)
 {
 	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 
 	vp = ap->a_vp;
 	if (ap->a_fl->l_whence == SEEK_END) {
 		/*
 		 * The NFSv4 server must avoid doing a vn_lock() here, since it
 		 * can deadlock the nfsd threads, due to a LOR.  Fortunately
 		 * the NFSv4 server always uses SEEK_SET and this code is
 		 * only required for the SEEK_END case.
 		 */
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
 		VOP_UNLOCK(vp, 0);
 		if (error)
 			return (error);
 	} else
 		vattr.va_size = 0;
 
 	return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size));
 }
 
 int
 vop_stdadvlockasync(struct vop_advlockasync_args *ap)
 {
 	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 
 	vp = ap->a_vp;
 	if (ap->a_fl->l_whence == SEEK_END) {
 		/* The size argument is only needed for SEEK_END. */
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
 		VOP_UNLOCK(vp, 0);
 		if (error)
 			return (error);
 	} else
 		vattr.va_size = 0;
 
 	return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size));
 }
 
 int
 vop_stdadvlockpurge(struct vop_advlockpurge_args *ap)
 {
 	struct vnode *vp;
 
 	vp = ap->a_vp;
 	lf_purgelocks(vp, &vp->v_lockf);
 	return (0);
 }
 
 /*
  * vop_stdpathconf:
  *
  * Standard implementation of POSIX pathconf, to get information about limits
  * for a filesystem.
  * Override per filesystem for the case where the filesystem has smaller
  * limits.
  */
 int
 vop_stdpathconf(ap)
 	struct vop_pathconf_args /* {
 	struct vnode *a_vp;
 	int a_name;
 	int *a_retval;
 	} */ *ap;
 {
 
 	switch (ap->a_name) {
 		case _PC_ASYNC_IO:
 			*ap->a_retval = _POSIX_ASYNCHRONOUS_IO;
 			return (0);
 		case _PC_PATH_MAX:
 			*ap->a_retval = PATH_MAX;
 			return (0);
 		case _PC_ACL_EXTENDED:
 		case _PC_ACL_NFS4:
 		case _PC_CAP_PRESENT:
 		case _PC_INF_PRESENT:
 		case _PC_MAC_PRESENT:
 			*ap->a_retval = 0;
 			return (0);
 		default:
 			return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Standard lock, unlock and islocked functions.
  */
 int
 vop_stdlock(ap)
 	struct vop_lock1_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 		char *file;
 		int line;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct mtx *ilk;
 
 	ilk = VI_MTX(vp);
 	return (lockmgr_lock_fast_path(vp->v_vnlock, ap->a_flags,
 	    &ilk->lock_object, ap->a_file, ap->a_line));
 }
 
 /* See above. */
 int
 vop_stdunlock(ap)
 	struct vop_unlock_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct mtx *ilk;
 
 	ilk = VI_MTX(vp);
 	return (lockmgr_unlock_fast_path(vp->v_vnlock, ap->a_flags,
 	    &ilk->lock_object));
 }
 
 /* See above. */
 int
 vop_stdislocked(ap)
 	struct vop_islocked_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 
 	return (lockstatus(ap->a_vp->v_vnlock));
 }
 
 /*
  * Return true for select/poll.
  */
 int
 vop_nopoll(ap)
 	struct vop_poll_args /* {
 		struct vnode *a_vp;
 		int  a_events;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 
 	return (poll_no_poll(ap->a_events));
 }
 
 /*
  * Implement poll for local filesystems that support it.
  */
 int
 vop_stdpoll(ap)
 	struct vop_poll_args /* {
 		struct vnode *a_vp;
 		int  a_events;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	if (ap->a_events & ~POLLSTANDARD)
 		return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
 }
 
 /*
  * Return our mount point, as we will take charge of the writes.
  */
 int
 vop_stdgetwritemount(ap)
 	struct vop_getwritemount_args /* {
 		struct vnode *a_vp;
 		struct mount **a_mpp;
 	} */ *ap;
 {
 	struct mount *mp;
 	struct vnode *vp;
 
 	/*
 	 * Note that having a reference does not prevent forced unmount from
 	 * setting ->v_mount to NULL after the lock gets released. This is of
 	 * no consequence for typical consumers (most notably vn_start_write)
 	 * since in this case the vnode is VI_DOOMED. Unmount might have
 	 * progressed far enough that its completion is only delayed by the
 	 * reference obtained here. The consumer only needs to concern itself
 	 * with releasing it.
 	 */
 	vp = ap->a_vp;
 	mp = vp->v_mount;
 	if (mp == NULL) {
 		*(ap->a_mpp) = NULL;
 		return (0);
 	}
 	if (vfs_op_thread_enter(mp)) {
-		if (mp == vp->v_mount)
+		if (mp == vp->v_mount) {
 			vfs_mp_count_add_pcpu(mp, ref, 1);
-		else
+			vfs_op_thread_exit(mp);
+		} else {
+			vfs_op_thread_exit(mp);
 			mp = NULL;
-		vfs_op_thread_exit(mp);
+		}
 	} else {
 		MNT_ILOCK(mp);
-		if (mp == vp->v_mount)
+		if (mp == vp->v_mount) {
 			MNT_REF(mp);
-		else
+			MNT_IUNLOCK(mp);
+		} else {
+			MNT_IUNLOCK(mp);
 			mp = NULL;
-		MNT_IUNLOCK(mp);
+		}
 	}
 	*(ap->a_mpp) = mp;
 	return (0);
 }
 
 /*
  * If the file system doesn't implement VOP_BMAP, then return sensible defaults:
  * - Return the vnode's bufobj instead of any underlying device's bufobj
  * - Calculate the physical block number as if there were equal size
  *   consecutive blocks, but
  * - Report no contiguous runs of blocks.
  */
 int
 vop_stdbmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct bufobj **a_bop;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 
 	if (ap->a_bop != NULL)
 		*ap->a_bop = &ap->a_vp->v_bufobj;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	return (0);
 }
 
 int
 vop_stdfsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		int a_waitfor;
 		struct thread *a_td;
 	} */ *ap;
 {
 
 	return (vn_fsync_buf(ap->a_vp, ap->a_waitfor));
 }
 
 static int
 vop_stdfdatasync(struct vop_fdatasync_args *ap)
 {
 
 	return (VOP_FSYNC(ap->a_vp, MNT_WAIT, ap->a_td));
 }
 
 int
 vop_stdfdatasync_buf(struct vop_fdatasync_args *ap)
 {
 
 	return (vn_fsync_buf(ap->a_vp, MNT_WAIT));
 }
 
 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
 int
 vop_stdgetpages(ap)
 	struct vop_getpages_args /* {
 		struct vnode *a_vp;
 		vm_page_t *a_m;
 		int a_count;
 		int *a_rbehind;
 		int *a_rahead;
 	} */ *ap;
 {
 
 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
 	    ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL);
 }
 
 static int
 vop_stdgetpages_async(struct vop_getpages_async_args *ap)
 {
 	int error;
 
 	error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
 	    ap->a_rahead);
 	ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error);
 	return (error);
 }
 
 int
 vop_stdkqfilter(struct vop_kqfilter_args *ap)
 {
 	return vfs_kqfilter(ap);
 }
 
 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
 int
 vop_stdputpages(ap)
 	struct vop_putpages_args /* {
 		struct vnode *a_vp;
 		vm_page_t *a_m;
 		int a_count;
 		int a_sync;
 		int *a_rtvals;
 	} */ *ap;
 {
 
 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
 	     ap->a_sync, ap->a_rtvals);
 }
 
 int
 vop_stdvptofh(struct vop_vptofh_args *ap)
 {
 	return (EOPNOTSUPP);
 }
 
 int
 vop_stdvptocnp(struct vop_vptocnp_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode **dvp = ap->a_vpp;
 	struct ucred *cred = ap->a_cred;
 	char *buf = ap->a_buf;
 	int *buflen = ap->a_buflen;
 	char *dirbuf, *cpos;
 	int i, error, eofflag, dirbuflen, flags, locked, len, covered;
 	off_t off;
 	ino_t fileno;
 	struct vattr va;
 	struct nameidata nd;
 	struct thread *td;
 	struct dirent *dp;
 	struct vnode *mvp;
 
 	i = *buflen;
 	error = 0;
 	covered = 0;
 	td = curthread;
 
 	if (vp->v_type != VDIR)
 		return (ENOENT);
 
 	error = VOP_GETATTR(vp, &va, cred);
 	if (error)
 		return (error);
 
 	VREF(vp);
 	locked = VOP_ISLOCKED(vp);
 	VOP_UNLOCK(vp, 0);
 	NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
 	    "..", vp, td);
 	flags = FREAD;
 	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL);
 	if (error) {
 		vn_lock(vp, locked | LK_RETRY);
 		return (error);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	mvp = *dvp = nd.ni_vp;
 
 	if (vp->v_mount != (*dvp)->v_mount &&
 	    ((*dvp)->v_vflag & VV_ROOT) &&
 	    ((*dvp)->v_mount->mnt_flag & MNT_UNION)) {
 		*dvp = (*dvp)->v_mount->mnt_vnodecovered;
 		VREF(mvp);
 		VOP_UNLOCK(mvp, 0);
 		vn_close(mvp, FREAD, cred, td);
 		VREF(*dvp);
 		vn_lock(*dvp, LK_SHARED | LK_RETRY);
 		covered = 1;
 	}
 
 	fileno = va.va_fileid;
 
 	dirbuflen = DEV_BSIZE;
 	if (dirbuflen < va.va_blocksize)
 		dirbuflen = va.va_blocksize;
 	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
 
 	if ((*dvp)->v_type != VDIR) {
 		error = ENOENT;
 		goto out;
 	}
 
 	off = 0;
 	len = 0;
 	do {
 		/* call VOP_READDIR of parent */
 		error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
 					&cpos, &len, &eofflag, td);
 		if (error)
 			goto out;
 
 		if ((dp->d_type != DT_WHT) &&
 		    (dp->d_fileno == fileno)) {
 			if (covered) {
 				VOP_UNLOCK(*dvp, 0);
 				vn_lock(mvp, LK_SHARED | LK_RETRY);
 				if (dirent_exists(mvp, dp->d_name, td)) {
 					error = ENOENT;
 					VOP_UNLOCK(mvp, 0);
 					vn_lock(*dvp, LK_SHARED | LK_RETRY);
 					goto out;
 				}
 				VOP_UNLOCK(mvp, 0);
 				vn_lock(*dvp, LK_SHARED | LK_RETRY);
 			}
 			i -= dp->d_namlen;
 
 			if (i < 0) {
 				error = ENOMEM;
 				goto out;
 			}
 			if (dp->d_namlen == 1 && dp->d_name[0] == '.') {
 				error = ENOENT;
 			} else {
 				bcopy(dp->d_name, buf + i, dp->d_namlen);
 				error = 0;
 			}
 			goto out;
 		}
 	} while (len > 0 || !eofflag);
 	error = ENOENT;
 
 out:
 	free(dirbuf, M_TEMP);
 	if (!error) {
 		*buflen = i;
 		vref(*dvp);
 	}
 	if (covered) {
 		vput(*dvp);
 		vrele(mvp);
 	} else {
 		VOP_UNLOCK(mvp, 0);
 		vn_close(mvp, FREAD, cred, td);
 	}
 	vn_lock(vp, locked | LK_RETRY);
 	return (error);
 }
 
 int
 vop_stdallocate(struct vop_allocate_args *ap)
 {
 #ifdef __notyet__
 	struct statfs *sfs;
 	off_t maxfilesize = 0;
 #endif
 	struct iovec aiov;
 	struct vattr vattr, *vap;
 	struct uio auio;
 	off_t fsize, len, cur, offset;
 	uint8_t *buf;
 	struct thread *td;
 	struct vnode *vp;
 	size_t iosize;
 	int error;
 
 	buf = NULL;
 	error = 0;
 	td = curthread;
 	vap = &vattr;
 	vp = ap->a_vp;
 	len = *ap->a_len;
 	offset = *ap->a_offset;
 
 	error = VOP_GETATTR(vp, vap, td->td_ucred);
 	if (error != 0)
 		goto out;
 	fsize = vap->va_size;
 	iosize = vap->va_blocksize;
 	if (iosize == 0)
 		iosize = BLKDEV_IOSIZE;
 	if (iosize > MAXPHYS)
 		iosize = MAXPHYS;
 	buf = malloc(iosize, M_TEMP, M_WAITOK);
 
 #ifdef __notyet__
 	/*
 	 * Check if the filesystem sets f_maxfilesize; if not use
 	 * VOP_SETATTR to perform the check.
 	 */
 	sfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = VFS_STATFS(vp->v_mount, sfs, td);
 	if (error == 0)
 		maxfilesize = sfs->f_maxfilesize;
 	free(sfs, M_STATFS);
 	if (error != 0)
 		goto out;
 	if (maxfilesize) {
 		if (offset > maxfilesize || len > maxfilesize ||
 		    offset + len > maxfilesize) {
 			error = EFBIG;
 			goto out;
 		}
 	} else
 #endif
 	if (offset + len > vap->va_size) {
 		/*
 		 * Test offset + len against the filesystem's maxfilesize.
 		 */
 		VATTR_NULL(vap);
 		vap->va_size = offset + len;
 		error = VOP_SETATTR(vp, vap, td->td_ucred);
 		if (error != 0)
 			goto out;
 		VATTR_NULL(vap);
 		vap->va_size = fsize;
 		error = VOP_SETATTR(vp, vap, td->td_ucred);
 		if (error != 0)
 			goto out;
 	}
 
 	for (;;) {
 		/*
 		 * Read and write back anything below the nominal file
 		 * size.  There's currently no way outside the filesystem
 		 * to know whether this area is sparse or not.
 		 */
 		cur = iosize;
 		if ((offset % iosize) != 0)
 			cur -= (offset % iosize);
 		if (cur > len)
 			cur = len;
 		if (offset < fsize) {
 			aiov.iov_base = buf;
 			aiov.iov_len = cur;
 			auio.uio_iov = &aiov;
 			auio.uio_iovcnt = 1;
 			auio.uio_offset = offset;
 			auio.uio_resid = cur;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_READ;
 			auio.uio_td = td;
 			error = VOP_READ(vp, &auio, 0, td->td_ucred);
 			if (error != 0)
 				break;
 			if (auio.uio_resid > 0) {
 				bzero(buf + cur - auio.uio_resid,
 				    auio.uio_resid);
 			}
 		} else {
 			bzero(buf, cur);
 		}
 
 		aiov.iov_base = buf;
 		aiov.iov_len = cur;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = offset;
 		auio.uio_resid = cur;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_rw = UIO_WRITE;
 		auio.uio_td = td;
 
 		error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
 		if (error != 0)
 			break;
 
 		len -= cur;
 		offset += cur;
 		if (len == 0)
 			break;
 		if (should_yield())
 			break;
 	}
 
  out:
 	*ap->a_len = len;
 	*ap->a_offset = offset;
 	free(buf, M_TEMP);
 	return (error);
 }
 
 int
 vop_stdadvise(struct vop_advise_args *ap)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 	daddr_t startn, endn;
 	off_t bstart, bend, start, end;
 	int bsize, error;
 
 	vp = ap->a_vp;
 	switch (ap->a_advice) {
 	case POSIX_FADV_WILLNEED:
 		/*
 		 * Do nothing for now.  Filesystems should provide a
 		 * custom method which starts an asynchronous read of
 		 * the requested region.
 		 */
 		error = 0;
 		break;
 	case POSIX_FADV_DONTNEED:
 		error = 0;
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (vp->v_iflag & VI_DOOMED) {
 			VOP_UNLOCK(vp, 0);
 			break;
 		}
 
 		/*
 		 * Round to block boundaries (and later possibly further to
 		 * page boundaries).  Applications cannot reasonably be aware  
 		 * of the boundaries, and the rounding must be to expand at
 		 * both extremities to cover enough.  It still doesn't cover
 		 * read-ahead.  For partial blocks, this gives unnecessary
 		 * discarding of buffers but is efficient enough since the
 		 * pages usually remain in VMIO for some time.
 		 */
 		bsize = vp->v_bufobj.bo_bsize;
 		bstart = rounddown(ap->a_start, bsize);
 		bend = roundup(ap->a_end, bsize);
 
 		/*
 		 * Deactivate pages in the specified range from the backing VM
 		 * object.  Pages that are resident in the buffer cache will
 		 * remain wired until their corresponding buffers are released
 		 * below.
 		 */
 		if (vp->v_object != NULL) {
 			start = trunc_page(bstart);
 			end = round_page(bend);
 			VM_OBJECT_RLOCK(vp->v_object);
 			vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start),
 			    OFF_TO_IDX(end));
 			VM_OBJECT_RUNLOCK(vp->v_object);
 		}
 
 		bo = &vp->v_bufobj;
 		BO_RLOCK(bo);
 		startn = bstart / bsize;
 		endn = bend / bsize;
 		error = bnoreuselist(&bo->bo_clean, bo, startn, endn);
 		if (error == 0)
 			error = bnoreuselist(&bo->bo_dirty, bo, startn, endn);
 		BO_RUNLOCK(bo);
 		VOP_UNLOCK(vp, 0);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 int
 vop_stdunp_bind(struct vop_unp_bind_args *ap)
 {
 
 	ap->a_vp->v_unpcb = ap->a_unpcb;
 	return (0);
 }
 
 int
 vop_stdunp_connect(struct vop_unp_connect_args *ap)
 {
 
 	*ap->a_unpcb = ap->a_vp->v_unpcb;
 	return (0);
 }
 
 int
 vop_stdunp_detach(struct vop_unp_detach_args *ap)
 {
 
 	ap->a_vp->v_unpcb = NULL;
 	return (0);
 }
 
 static int
 vop_stdis_text(struct vop_is_text_args *ap)
 {
 
 	return (ap->a_vp->v_writecount < 0);
 }
 
 int
 vop_stdset_text(struct vop_set_text_args *ap)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	int error;
 
 	vp = ap->a_vp;
 	VI_LOCK(vp);
 	if (vp->v_writecount > 0) {
 		error = ETXTBSY;
 	} else {
 		/*
 		 * If requested by fs, keep a use reference to the
 		 * vnode until the last text reference is released.
 		 */
 		mp = vp->v_mount;
 		if (mp != NULL && (mp->mnt_kern_flag & MNTK_TEXT_REFS) != 0 &&
 		    vp->v_writecount == 0) {
 			vp->v_iflag |= VI_TEXT_REF;
 			vrefl(vp);
 		}
 
 		vp->v_writecount--;
 		error = 0;
 	}
 	VI_UNLOCK(vp);
 	return (error);
 }
 
 static int
 vop_stdunset_text(struct vop_unset_text_args *ap)
 {
 	struct vnode *vp;
 	int error;
 	bool last;
 
 	vp = ap->a_vp;
 	last = false;
 	VI_LOCK(vp);
 	if (vp->v_writecount < 0) {
 		if ((vp->v_iflag & VI_TEXT_REF) != 0 &&
 		    vp->v_writecount == -1) {
 			last = true;
 			vp->v_iflag &= ~VI_TEXT_REF;
 		}
 		vp->v_writecount++;
 		error = 0;
 	} else {
 		error = EINVAL;
 	}
 	VI_UNLOCK(vp);
 	if (last)
 		vunref(vp);
 	return (error);
 }
 
 static int
 vop_stdadd_writecount(struct vop_add_writecount_args *ap)
 {
 	struct vnode *vp;
 	int error;
 
 	vp = ap->a_vp;
 	VI_LOCK_FLAGS(vp, MTX_DUPOK);
 	if (vp->v_writecount < 0) {
 		error = ETXTBSY;
 	} else {
 		VNASSERT(vp->v_writecount + ap->a_inc >= 0, vp,
 		    ("neg writecount increment %d", ap->a_inc));
 		vp->v_writecount += ap->a_inc;
 		error = 0;
 	}
 	VI_UNLOCK(vp);
 	return (error);
 }
 
 int
 vop_stdneed_inactive(struct vop_need_inactive_args *ap)
 {
 
 	return (1);
 }
 
 static int
 vop_stdioctl(struct vop_ioctl_args *ap)
 {
 	struct vnode *vp;
 	struct vattr va;
 	off_t *offp;
 	int error;
 
 	switch (ap->a_command) {
 	case FIOSEEKDATA:
 	case FIOSEEKHOLE:
 		vp = ap->a_vp;
 		error = vn_lock(vp, LK_SHARED);
 		if (error != 0)
 			return (EBADF);
 		if (vp->v_type == VREG)
 			error = VOP_GETATTR(vp, &va, ap->a_cred);
 		else
 			error = ENOTTY;
 		if (error == 0) {
 			offp = ap->a_data;
 			if (*offp < 0 || *offp >= va.va_size)
 				error = ENXIO;
 			else if (ap->a_command == FIOSEEKHOLE)
 				*offp = va.va_size;
 		}
 		VOP_UNLOCK(vp, 0);
 		break;
 	default:
 		error = ENOTTY;
 		break;
 	}
 	return (error);
 }
 
 /*
  * vfs default ops
  * used to fill the vfs function table to get reasonable default return values.
  */
 int
 vfs_stdroot (mp, flags, vpp)
 	struct mount *mp;
 	int flags;
 	struct vnode **vpp;
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdstatfs (mp, sbp)
 	struct mount *mp;
 	struct statfs *sbp;
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdquotactl (mp, cmds, uid, arg)
 	struct mount *mp;
 	int cmds;
 	uid_t uid;
 	void *arg;
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdsync(mp, waitfor)
 	struct mount *mp;
 	int waitfor;
 {
 	struct vnode *vp, *mvp;
 	struct thread *td;
 	int error, lockreq, allerror = 0;
 
 	td = curthread;
 	lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
 	if (waitfor != MNT_WAIT)
 		lockreq |= LK_NOWAIT;
 	/*
 	 * Force stale buffer cache information to be flushed.
 	 */
 loop:
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		if (vp->v_bufobj.bo_dirty.bv_cnt == 0) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		if ((error = vget(vp, lockreq, td)) != 0) {
 			if (error == ENOENT) {
 				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 				goto loop;
 			}
 			continue;
 		}
 		error = VOP_FSYNC(vp, waitfor, td);
 		if (error)
 			allerror = error;
 		vput(vp);
 	}
 	return (allerror);
 }
 
 int
 vfs_stdnosync (mp, waitfor)
 	struct mount *mp;
 	int waitfor;
 {
 
 	return (0);
 }
 
 static int
 vop_stdcopy_file_range(struct vop_copy_file_range_args *ap)
 {
 	int error;
 
 	error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
 	    ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, ap->a_incred,
 	    ap->a_outcred, ap->a_fsizetd);
 	return (error);
 }
 
 int
 vfs_stdvget (mp, ino, flags, vpp)
 	struct mount *mp;
 	ino_t ino;
 	int flags;
 	struct vnode **vpp;
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdfhtovp (mp, fhp, flags, vpp)
 	struct mount *mp;
 	struct fid *fhp;
 	int flags;
 	struct vnode **vpp;
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdinit (vfsp)
 	struct vfsconf *vfsp;
 {
 
 	return (0);
 }
 
 int
 vfs_stduninit (vfsp)
 	struct vfsconf *vfsp;
 {
 
 	return(0);
 }
 
 int
 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)
 	struct mount *mp;
 	int cmd;
 	struct vnode *filename_vp;
 	int attrnamespace;
 	const char *attrname;
 {
 
 	if (filename_vp != NULL)
 		VOP_UNLOCK(filename_vp, 0);
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdsysctl(mp, op, req)
 	struct mount *mp;
 	fsctlop_t op;
 	struct sysctl_req *req;
 {
 
 	return (EOPNOTSUPP);
 }
 
 static vop_bypass_t *
 bp_by_off(struct vop_vector *vop, struct vop_generic_args *a)
 {
 
 	return (*(vop_bypass_t **)((char *)vop + a->a_desc->vdesc_vop_offset));
 }
 
 int
 vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a)
 {
 	vop_bypass_t *bp;
 	int prev_stops, rc;
 
 	for (; vop != NULL; vop = vop->vop_default) {
 		bp = bp_by_off(vop, a);
 		if (bp != NULL)
 			break;
 
 		/*
 		 * Bypass is not really supported.  It is done for
 		 * fallback to unimplemented vops in the default
 		 * vector.
 		 */
 		bp = vop->vop_bypass;
 		if (bp != NULL)
 			break;
 	}
 	MPASS(bp != NULL);
 
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	rc = bp(a);
 	sigallowstop(prev_stops);
 	return (rc);
 }
Index: projects/clang900-import/sys/kern/vfs_vnops.c
===================================================================
--- projects/clang900-import/sys/kern/vfs_vnops.c	(revision 352536)
+++ projects/clang900-import/sys/kern/vfs_vnops.c	(revision 352537)
@@ -1,3128 +1,3132 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
  * Copyright (c) 2013, 2014 The FreeBSD Foundation
  *
  * Portions of this software were developed by Konstantin Belousov
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_vnops.c	8.2 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_hwpmc_hooks.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/disk.h>
 #include <sys/fail.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/stat.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/vnode.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/filio.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/ttycom.h>
 #include <sys/conf.h>
 #include <sys/syslog.h>
 #include <sys/unistd.h>
 #include <sys/user.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 #endif
 
 static fo_rdwr_t	vn_read;
 static fo_rdwr_t	vn_write;
 static fo_rdwr_t	vn_io_fault;
 static fo_truncate_t	vn_truncate;
 static fo_ioctl_t	vn_ioctl;
 static fo_poll_t	vn_poll;
 static fo_kqfilter_t	vn_kqfilter;
 static fo_stat_t	vn_statfile;
 static fo_close_t	vn_closefile;
 static fo_mmap_t	vn_mmap;
 
 struct 	fileops vnops = {
 	.fo_read = vn_io_fault,
 	.fo_write = vn_io_fault,
 	.fo_truncate = vn_truncate,
 	.fo_ioctl = vn_ioctl,
 	.fo_poll = vn_poll,
 	.fo_kqfilter = vn_kqfilter,
 	.fo_stat = vn_statfile,
 	.fo_close = vn_closefile,
 	.fo_chmod = vn_chmod,
 	.fo_chown = vn_chown,
 	.fo_sendfile = vn_sendfile,
 	.fo_seek = vn_seek,
 	.fo_fill_kinfo = vn_fill_kinfo,
 	.fo_mmap = vn_mmap,
 	.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
 };
 
 static const int io_hold_cnt = 16;
 static int vn_io_fault_enable = 1;
 SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_enable, CTLFLAG_RW,
     &vn_io_fault_enable, 0, "Enable vn_io_fault lock avoidance");
 static int vn_io_fault_prefault = 0;
 SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_prefault, CTLFLAG_RW,
     &vn_io_fault_prefault, 0, "Enable vn_io_fault prefaulting");
 static u_long vn_io_faults_cnt;
 SYSCTL_ULONG(_debug, OID_AUTO, vn_io_faults, CTLFLAG_RD,
     &vn_io_faults_cnt, 0, "Count of vn_io_fault lock avoidance triggers");
 
 /*
  * Returns true if vn_io_fault mode of handling the i/o request should
  * be used.
  */
 static bool
 do_vn_io_fault(struct vnode *vp, struct uio *uio)
 {
 	struct mount *mp;
 
 	return (uio->uio_segflg == UIO_USERSPACE && vp->v_type == VREG &&
 	    (mp = vp->v_mount) != NULL &&
 	    (mp->mnt_kern_flag & MNTK_NO_IOPF) != 0 && vn_io_fault_enable);
 }
 
 /*
  * Structure used to pass arguments to vn_io_fault1(), to do either
  * file- or vnode-based I/O calls.
  */
 struct vn_io_fault_args {
 	enum {
 		VN_IO_FAULT_FOP,
 		VN_IO_FAULT_VOP
 	} kind;
 	struct ucred *cred;
 	int flags;
 	union {
 		struct fop_args_tag {
 			struct file *fp;
 			fo_rdwr_t *doio;
 		} fop_args;
 		struct vop_args_tag {
 			struct vnode *vp;
 		} vop_args;
 	} args;
 };
 
 static int vn_io_fault1(struct vnode *vp, struct uio *uio,
     struct vn_io_fault_args *args, struct thread *td);
 
 int
 vn_open(struct nameidata *ndp, int *flagp, int cmode, struct file *fp)
 {
 	struct thread *td = ndp->ni_cnd.cn_thread;
 
 	return (vn_open_cred(ndp, flagp, cmode, 0, td->td_ucred, fp));
 }
 
 /*
  * Common code for vnode open operations via a name lookup.
  * Lookup the vnode and invoke VOP_CREATE if needed.
  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
  * 
  * Note that this does NOT free nameidata for the successful case,
  * due to the NDINIT being done elsewhere.
  */
 int
 vn_open_cred(struct nameidata *ndp, int *flagp, int cmode, u_int vn_open_flags,
     struct ucred *cred, struct file *fp)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct thread *td = ndp->ni_cnd.cn_thread;
 	struct vattr vat;
 	struct vattr *vap = &vat;
 	int fmode, error;
 
 restart:
 	fmode = *flagp;
 	if ((fmode & (O_CREAT | O_EXCL | O_DIRECTORY)) == (O_CREAT |
 	    O_EXCL | O_DIRECTORY))
 		return (EINVAL);
 	else if ((fmode & (O_CREAT | O_DIRECTORY)) == O_CREAT) {
 		ndp->ni_cnd.cn_nameiop = CREATE;
 		/*
 		 * Set NOCACHE to avoid flushing the cache when
 		 * rolling in many files at once.
 		*/
 		ndp->ni_cnd.cn_flags = ISOPEN | LOCKPARENT | LOCKLEAF | NOCACHE;
 		if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
 			ndp->ni_cnd.cn_flags |= FOLLOW;
 		if ((fmode & O_BENEATH) != 0)
 			ndp->ni_cnd.cn_flags |= BENEATH;
 		if (!(vn_open_flags & VN_OPEN_NOAUDIT))
 			ndp->ni_cnd.cn_flags |= AUDITVNODE1;
 		if (vn_open_flags & VN_OPEN_NOCAPCHECK)
 			ndp->ni_cnd.cn_flags |= NOCAPCHECK;
 		bwillwrite();
 		if ((error = namei(ndp)) != 0)
 			return (error);
 		if (ndp->ni_vp == NULL) {
 			VATTR_NULL(vap);
 			vap->va_type = VREG;
 			vap->va_mode = cmode;
 			if (fmode & O_EXCL)
 				vap->va_vaflags |= VA_EXCLUSIVE;
 			if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) {
 				NDFREE(ndp, NDF_ONLY_PNBUF);
 				vput(ndp->ni_dvp);
 				if ((error = vn_start_write(NULL, &mp,
 				    V_XSLEEP | PCATCH)) != 0)
 					return (error);
 				goto restart;
 			}
 			if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0)
 				ndp->ni_cnd.cn_flags |= MAKEENTRY;
 #ifdef MAC
 			error = mac_vnode_check_create(cred, ndp->ni_dvp,
 			    &ndp->ni_cnd, vap);
 			if (error == 0)
 #endif
 				error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
 						   &ndp->ni_cnd, vap);
 			vput(ndp->ni_dvp);
 			vn_finished_write(mp);
 			if (error) {
 				NDFREE(ndp, NDF_ONLY_PNBUF);
 				return (error);
 			}
 			fmode &= ~O_TRUNC;
 			vp = ndp->ni_vp;
 		} else {
 			if (ndp->ni_dvp == ndp->ni_vp)
 				vrele(ndp->ni_dvp);
 			else
 				vput(ndp->ni_dvp);
 			ndp->ni_dvp = NULL;
 			vp = ndp->ni_vp;
 			if (fmode & O_EXCL) {
 				error = EEXIST;
 				goto bad;
 			}
+			if (vp->v_type == VDIR) {
+				error = EISDIR;
+				goto bad;
+			}
 			fmode &= ~O_CREAT;
 		}
 	} else {
 		ndp->ni_cnd.cn_nameiop = LOOKUP;
 		ndp->ni_cnd.cn_flags = ISOPEN |
 		    ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF;
 		if (!(fmode & FWRITE))
 			ndp->ni_cnd.cn_flags |= LOCKSHARED;
 		if ((fmode & O_BENEATH) != 0)
 			ndp->ni_cnd.cn_flags |= BENEATH;
 		if (!(vn_open_flags & VN_OPEN_NOAUDIT))
 			ndp->ni_cnd.cn_flags |= AUDITVNODE1;
 		if (vn_open_flags & VN_OPEN_NOCAPCHECK)
 			ndp->ni_cnd.cn_flags |= NOCAPCHECK;
 		if ((error = namei(ndp)) != 0)
 			return (error);
 		vp = ndp->ni_vp;
 	}
 	error = vn_open_vnode(vp, fmode, cred, td, fp);
 	if (error)
 		goto bad;
 	*flagp = fmode;
 	return (0);
 bad:
 	NDFREE(ndp, NDF_ONLY_PNBUF);
 	vput(vp);
 	*flagp = fmode;
 	ndp->ni_vp = NULL;
 	return (error);
 }
 
 static int
 vn_open_vnode_advlock(struct vnode *vp, int fmode, struct file *fp)
 {
 	struct flock lf;
 	int error, lock_flags, type;
 
 	ASSERT_VOP_LOCKED(vp, "vn_open_vnode_advlock");
 	if ((fmode & (O_EXLOCK | O_SHLOCK)) == 0)
 		return (0);
 	KASSERT(fp != NULL, ("open with flock requires fp"));
 	if (fp->f_type != DTYPE_NONE && fp->f_type != DTYPE_VNODE)
 		return (EOPNOTSUPP);
 
 	lock_flags = VOP_ISLOCKED(vp);
 	VOP_UNLOCK(vp, 0);
 
 	lf.l_whence = SEEK_SET;
 	lf.l_start = 0;
 	lf.l_len = 0;
 	lf.l_type = (fmode & O_EXLOCK) != 0 ? F_WRLCK : F_RDLCK;
 	type = F_FLOCK;
 	if ((fmode & FNONBLOCK) == 0)
 		type |= F_WAIT;
 	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
 	if (error == 0)
 		fp->f_flag |= FHASLOCK;
 
 	vn_lock(vp, lock_flags | LK_RETRY);
 	if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
 		error = ENOENT;
 	return (error);
 }
 
 /*
  * Common code for vnode open operations once a vnode is located.
  * Check permissions, and call the VOP_OPEN routine.
  */
 int
 vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred,
     struct thread *td, struct file *fp)
 {
 	accmode_t accmode;
 	int error;
 
 	if (vp->v_type == VLNK)
 		return (EMLINK);
 	if (vp->v_type == VSOCK)
 		return (EOPNOTSUPP);
 	if (vp->v_type != VDIR && fmode & O_DIRECTORY)
 		return (ENOTDIR);
 	accmode = 0;
 	if (fmode & (FWRITE | O_TRUNC)) {
 		if (vp->v_type == VDIR)
 			return (EISDIR);
 		accmode |= VWRITE;
 	}
 	if (fmode & FREAD)
 		accmode |= VREAD;
 	if (fmode & FEXEC)
 		accmode |= VEXEC;
 	if ((fmode & O_APPEND) && (fmode & FWRITE))
 		accmode |= VAPPEND;
 #ifdef MAC
 	if (fmode & O_CREAT)
 		accmode |= VCREAT;
 	if (fmode & O_VERIFY)
 		accmode |= VVERIFY;
 	error = mac_vnode_check_open(cred, vp, accmode);
 	if (error)
 		return (error);
 
 	accmode &= ~(VCREAT | VVERIFY);
 #endif
 	if ((fmode & O_CREAT) == 0 && accmode != 0) {
 		error = VOP_ACCESS(vp, accmode, cred, td);
 		if (error != 0)
 			return (error);
 	}
 	if (vp->v_type == VFIFO && VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
 		vn_lock(vp, LK_UPGRADE | LK_RETRY);
 	error = VOP_OPEN(vp, fmode, cred, td, fp);
 	if (error != 0)
 		return (error);
 
 	error = vn_open_vnode_advlock(vp, fmode, fp);
 	if (error == 0 && (fmode & FWRITE) != 0) {
 		error = VOP_ADD_WRITECOUNT(vp, 1);
 		if (error == 0) {
 			CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
 			     __func__, vp, vp->v_writecount);
 		}
 	}
 
 	/*
 	 * Error from advlock or VOP_ADD_WRITECOUNT() still requires
 	 * calling VOP_CLOSE() to pair with earlier VOP_OPEN().
 	 * Arrange for that by having fdrop() to use vn_closefile().
 	 */
 	if (error != 0) {
 		fp->f_flag |= FOPENFAILED;
 		fp->f_vnode = vp;
 		if (fp->f_ops == &badfileops) {
 			fp->f_type = DTYPE_VNODE;
 			fp->f_ops = &vnops;
 		}
 		vref(vp);
 	}
 
 	ASSERT_VOP_LOCKED(vp, "vn_open_vnode");
 	return (error);
 
 }
 
 /*
  * Check for write permissions on the specified vnode.
  * Prototype text segments cannot be written.
  * It is racy.
  */
 int
 vn_writechk(struct vnode *vp)
 {
 
 	ASSERT_VOP_LOCKED(vp, "vn_writechk");
 	/*
 	 * If there's shared text associated with
 	 * the vnode, try to free it up once.  If
 	 * we fail, we can't allow writing.
 	 */
 	if (VOP_IS_TEXT(vp))
 		return (ETXTBSY);
 
 	return (0);
 }
 
 /*
  * Vnode close call
  */
 static int
 vn_close1(struct vnode *vp, int flags, struct ucred *file_cred,
     struct thread *td, bool keep_ref)
 {
 	struct mount *mp;
 	int error, lock_flags;
 
 	if (vp->v_type != VFIFO && (flags & FWRITE) == 0 &&
 	    MNT_EXTENDED_SHARED(vp->v_mount))
 		lock_flags = LK_SHARED;
 	else
 		lock_flags = LK_EXCLUSIVE;
 
 	vn_start_write(vp, &mp, V_WAIT);
 	vn_lock(vp, lock_flags | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	if ((flags & (FWRITE | FOPENFAILED)) == FWRITE) {
 		VOP_ADD_WRITECOUNT_CHECKED(vp, -1);
 		CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d",
 		    __func__, vp, vp->v_writecount);
 	}
 	error = VOP_CLOSE(vp, flags, file_cred, td);
 	if (keep_ref)
 		VOP_UNLOCK(vp, 0);
 	else
 		vput(vp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 int
 vn_close(struct vnode *vp, int flags, struct ucred *file_cred,
     struct thread *td)
 {
 
 	return (vn_close1(vp, flags, file_cred, td, false));
 }
 
 /*
  * Heuristic to detect sequential operation.
  */
 static int
 sequential_heuristic(struct uio *uio, struct file *fp)
 {
 
 	ASSERT_VOP_LOCKED(fp->f_vnode, __func__);
 	if (fp->f_flag & FRDAHEAD)
 		return (fp->f_seqcount << IO_SEQSHIFT);
 
 	/*
 	 * Offset 0 is handled specially.  open() sets f_seqcount to 1 so
 	 * that the first I/O is normally considered to be slightly
 	 * sequential.  Seeking to offset 0 doesn't change sequentiality
 	 * unless previous seeks have reduced f_seqcount to 0, in which
 	 * case offset 0 is not special.
 	 */
 	if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
 	    uio->uio_offset == fp->f_nextoff) {
 		/*
 		 * f_seqcount is in units of fixed-size blocks so that it
 		 * depends mainly on the amount of sequential I/O and not
 		 * much on the number of sequential I/O's.  The fixed size
 		 * of 16384 is hard-coded here since it is (not quite) just
 		 * a magic size that works well here.  This size is more
 		 * closely related to the best I/O size for real disks than
 		 * to any block size used by software.
 		 */
 		if (uio->uio_resid >= IO_SEQMAX * 16384)
 			fp->f_seqcount = IO_SEQMAX;
 		else {
 			fp->f_seqcount += howmany(uio->uio_resid, 16384);
 			if (fp->f_seqcount > IO_SEQMAX)
 				fp->f_seqcount = IO_SEQMAX;
 		}
 		return (fp->f_seqcount << IO_SEQSHIFT);
 	}
 
 	/* Not sequential.  Quickly draw-down sequentiality. */
 	if (fp->f_seqcount > 1)
 		fp->f_seqcount = 1;
 	else
 		fp->f_seqcount = 0;
 	return (0);
 }
 
 /*
  * Package up an I/O request on a vnode into a uio and do it.
  */
 int
 vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset,
     enum uio_seg segflg, int ioflg, struct ucred *active_cred,
     struct ucred *file_cred, ssize_t *aresid, struct thread *td)
 {
 	struct uio auio;
 	struct iovec aiov;
 	struct mount *mp;
 	struct ucred *cred;
 	void *rl_cookie;
 	struct vn_io_fault_args args;
 	int error, lock_flags;
 
 	if (offset < 0 && vp->v_type != VCHR)
 		return (EINVAL);
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	aiov.iov_base = base;
 	aiov.iov_len = len;
 	auio.uio_resid = len;
 	auio.uio_offset = offset;
 	auio.uio_segflg = segflg;
 	auio.uio_rw = rw;
 	auio.uio_td = td;
 	error = 0;
 
 	if ((ioflg & IO_NODELOCKED) == 0) {
 		if ((ioflg & IO_RANGELOCKED) == 0) {
 			if (rw == UIO_READ) {
 				rl_cookie = vn_rangelock_rlock(vp, offset,
 				    offset + len);
 			} else {
 				rl_cookie = vn_rangelock_wlock(vp, offset,
 				    offset + len);
 			}
 		} else
 			rl_cookie = NULL;
 		mp = NULL;
 		if (rw == UIO_WRITE) { 
 			if (vp->v_type != VCHR &&
 			    (error = vn_start_write(vp, &mp, V_WAIT | PCATCH))
 			    != 0)
 				goto out;
 			if (MNT_SHARED_WRITES(mp) ||
 			    ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount)))
 				lock_flags = LK_SHARED;
 			else
 				lock_flags = LK_EXCLUSIVE;
 		} else
 			lock_flags = LK_SHARED;
 		vn_lock(vp, lock_flags | LK_RETRY);
 	} else
 		rl_cookie = NULL;
 
 	ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 #ifdef MAC
 	if ((ioflg & IO_NOMACCHECK) == 0) {
 		if (rw == UIO_READ)
 			error = mac_vnode_check_read(active_cred, file_cred,
 			    vp);
 		else
 			error = mac_vnode_check_write(active_cred, file_cred,
 			    vp);
 	}
 #endif
 	if (error == 0) {
 		if (file_cred != NULL)
 			cred = file_cred;
 		else
 			cred = active_cred;
 		if (do_vn_io_fault(vp, &auio)) {
 			args.kind = VN_IO_FAULT_VOP;
 			args.cred = cred;
 			args.flags = ioflg;
 			args.args.vop_args.vp = vp;
 			error = vn_io_fault1(vp, &auio, &args, td);
 		} else if (rw == UIO_READ) {
 			error = VOP_READ(vp, &auio, ioflg, cred);
 		} else /* if (rw == UIO_WRITE) */ {
 			error = VOP_WRITE(vp, &auio, ioflg, cred);
 		}
 	}
 	if (aresid)
 		*aresid = auio.uio_resid;
 	else
 		if (auio.uio_resid && error == 0)
 			error = EIO;
 	if ((ioflg & IO_NODELOCKED) == 0) {
 		VOP_UNLOCK(vp, 0);
 		if (mp != NULL)
 			vn_finished_write(mp);
 	}
  out:
 	if (rl_cookie != NULL)
 		vn_rangelock_unlock(vp, rl_cookie);
 	return (error);
 }
 
 /*
  * Package up an I/O request on a vnode into a uio and do it.  The I/O
  * request is split up into smaller chunks and we try to avoid saturating
  * the buffer cache while potentially holding a vnode locked, so we 
  * check bwillwrite() before calling vn_rdwr().  We also call kern_yield()
  * to give other processes a chance to lock the vnode (either other processes
  * core'ing the same binary, or unrelated processes scanning the directory).
  */
 int
 vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, void *base, size_t len,
     off_t offset, enum uio_seg segflg, int ioflg, struct ucred *active_cred,
     struct ucred *file_cred, size_t *aresid, struct thread *td)
 {
 	int error = 0;
 	ssize_t iaresid;
 
 	do {
 		int chunk;
 
 		/*
 		 * Force `offset' to a multiple of MAXBSIZE except possibly
 		 * for the first chunk, so that filesystems only need to
 		 * write full blocks except possibly for the first and last
 		 * chunks.
 		 */
 		chunk = MAXBSIZE - (uoff_t)offset % MAXBSIZE;
 
 		if (chunk > len)
 			chunk = len;
 		if (rw != UIO_READ && vp->v_type == VREG)
 			bwillwrite();
 		iaresid = 0;
 		error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
 		    ioflg, active_cred, file_cred, &iaresid, td);
 		len -= chunk;	/* aresid calc already includes length */
 		if (error)
 			break;
 		offset += chunk;
 		base = (char *)base + chunk;
 		kern_yield(PRI_USER);
 	} while (len);
 	if (aresid)
 		*aresid = len + iaresid;
 	return (error);
 }
 
 off_t
 foffset_lock(struct file *fp, int flags)
 {
 	struct mtx *mtxp;
 	off_t res;
 
 	KASSERT((flags & FOF_OFFSET) == 0, ("FOF_OFFSET passed"));
 
 #if OFF_MAX <= LONG_MAX
 	/*
 	 * Caller only wants the current f_offset value.  Assume that
 	 * the long and shorter integer types reads are atomic.
 	 */
 	if ((flags & FOF_NOLOCK) != 0)
 		return (fp->f_offset);
 #endif
 
 	/*
 	 * According to McKusick the vn lock was protecting f_offset here.
 	 * It is now protected by the FOFFSET_LOCKED flag.
 	 */
 	mtxp = mtx_pool_find(mtxpool_sleep, fp);
 	mtx_lock(mtxp);
 	if ((flags & FOF_NOLOCK) == 0) {
 		while (fp->f_vnread_flags & FOFFSET_LOCKED) {
 			fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
 			msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
 			    "vofflock", 0);
 		}
 		fp->f_vnread_flags |= FOFFSET_LOCKED;
 	}
 	res = fp->f_offset;
 	mtx_unlock(mtxp);
 	return (res);
 }
 
 void
 foffset_unlock(struct file *fp, off_t val, int flags)
 {
 	struct mtx *mtxp;
 
 	KASSERT((flags & FOF_OFFSET) == 0, ("FOF_OFFSET passed"));
 
 #if OFF_MAX <= LONG_MAX
 	if ((flags & FOF_NOLOCK) != 0) {
 		if ((flags & FOF_NOUPDATE) == 0)
 			fp->f_offset = val;
 		if ((flags & FOF_NEXTOFF) != 0)
 			fp->f_nextoff = val;
 		return;
 	}
 #endif
 
 	mtxp = mtx_pool_find(mtxpool_sleep, fp);
 	mtx_lock(mtxp);
 	if ((flags & FOF_NOUPDATE) == 0)
 		fp->f_offset = val;
 	if ((flags & FOF_NEXTOFF) != 0)
 		fp->f_nextoff = val;
 	if ((flags & FOF_NOLOCK) == 0) {
 		KASSERT((fp->f_vnread_flags & FOFFSET_LOCKED) != 0,
 		    ("Lost FOFFSET_LOCKED"));
 		if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING)
 			wakeup(&fp->f_vnread_flags);
 		fp->f_vnread_flags = 0;
 	}
 	mtx_unlock(mtxp);
 }
 
 void
 foffset_lock_uio(struct file *fp, struct uio *uio, int flags)
 {
 
 	if ((flags & FOF_OFFSET) == 0)
 		uio->uio_offset = foffset_lock(fp, flags);
 }
 
 void
 foffset_unlock_uio(struct file *fp, struct uio *uio, int flags)
 {
 
 	if ((flags & FOF_OFFSET) == 0)
 		foffset_unlock(fp, uio->uio_offset, flags);
 }
 
 static int
 get_advice(struct file *fp, struct uio *uio)
 {
 	struct mtx *mtxp;
 	int ret;
 
 	ret = POSIX_FADV_NORMAL;
 	if (fp->f_advice == NULL || fp->f_vnode->v_type != VREG)
 		return (ret);
 
 	mtxp = mtx_pool_find(mtxpool_sleep, fp);
 	mtx_lock(mtxp);
 	if (fp->f_advice != NULL &&
 	    uio->uio_offset >= fp->f_advice->fa_start &&
 	    uio->uio_offset + uio->uio_resid <= fp->f_advice->fa_end)
 		ret = fp->f_advice->fa_advice;
 	mtx_unlock(mtxp);
 	return (ret);
 }
 
 /*
  * File table vnode read routine.
  */
 static int
 vn_read(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags,
     struct thread *td)
 {
 	struct vnode *vp;
 	off_t orig_offset;
 	int error, ioflag;
 	int advice;
 
 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
 	    uio->uio_td, td));
 	KASSERT(flags & FOF_OFFSET, ("No FOF_OFFSET"));
 	vp = fp->f_vnode;
 	ioflag = 0;
 	if (fp->f_flag & FNONBLOCK)
 		ioflag |= IO_NDELAY;
 	if (fp->f_flag & O_DIRECT)
 		ioflag |= IO_DIRECT;
 	advice = get_advice(fp, uio);
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 
 	switch (advice) {
 	case POSIX_FADV_NORMAL:
 	case POSIX_FADV_SEQUENTIAL:
 	case POSIX_FADV_NOREUSE:
 		ioflag |= sequential_heuristic(uio, fp);
 		break;
 	case POSIX_FADV_RANDOM:
 		/* Disable read-ahead for random I/O. */
 		break;
 	}
 	orig_offset = uio->uio_offset;
 
 #ifdef MAC
 	error = mac_vnode_check_read(active_cred, fp->f_cred, vp);
 	if (error == 0)
 #endif
 		error = VOP_READ(vp, uio, ioflag, fp->f_cred);
 	fp->f_nextoff = uio->uio_offset;
 	VOP_UNLOCK(vp, 0);
 	if (error == 0 && advice == POSIX_FADV_NOREUSE &&
 	    orig_offset != uio->uio_offset)
 		/*
 		 * Use POSIX_FADV_DONTNEED to flush pages and buffers
 		 * for the backing file after a POSIX_FADV_NOREUSE
 		 * read(2).
 		 */
 		error = VOP_ADVISE(vp, orig_offset, uio->uio_offset - 1,
 		    POSIX_FADV_DONTNEED);
 	return (error);
 }
 
 /*
  * File table vnode write routine.
  */
 static int
 vn_write(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags,
     struct thread *td)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	off_t orig_offset;
 	int error, ioflag, lock_flags;
 	int advice;
 
 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
 	    uio->uio_td, td));
 	KASSERT(flags & FOF_OFFSET, ("No FOF_OFFSET"));
 	vp = fp->f_vnode;
 	if (vp->v_type == VREG)
 		bwillwrite();
 	ioflag = IO_UNIT;
 	if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
 		ioflag |= IO_APPEND;
 	if (fp->f_flag & FNONBLOCK)
 		ioflag |= IO_NDELAY;
 	if (fp->f_flag & O_DIRECT)
 		ioflag |= IO_DIRECT;
 	if ((fp->f_flag & O_FSYNC) ||
 	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
 		ioflag |= IO_SYNC;
 	mp = NULL;
 	if (vp->v_type != VCHR &&
 	    (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		goto unlock;
 
 	advice = get_advice(fp, uio);
 
 	if (MNT_SHARED_WRITES(mp) ||
 	    (mp == NULL && MNT_SHARED_WRITES(vp->v_mount))) {
 		lock_flags = LK_SHARED;
 	} else {
 		lock_flags = LK_EXCLUSIVE;
 	}
 
 	vn_lock(vp, lock_flags | LK_RETRY);
 	switch (advice) {
 	case POSIX_FADV_NORMAL:
 	case POSIX_FADV_SEQUENTIAL:
 	case POSIX_FADV_NOREUSE:
 		ioflag |= sequential_heuristic(uio, fp);
 		break;
 	case POSIX_FADV_RANDOM:
 		/* XXX: Is this correct? */
 		break;
 	}
 	orig_offset = uio->uio_offset;
 
 #ifdef MAC
 	error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
 	if (error == 0)
 #endif
 		error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
 	fp->f_nextoff = uio->uio_offset;
 	VOP_UNLOCK(vp, 0);
 	if (vp->v_type != VCHR)
 		vn_finished_write(mp);
 	if (error == 0 && advice == POSIX_FADV_NOREUSE &&
 	    orig_offset != uio->uio_offset)
 		/*
 		 * Use POSIX_FADV_DONTNEED to flush pages and buffers
 		 * for the backing file after a POSIX_FADV_NOREUSE
 		 * write(2).
 		 */
 		error = VOP_ADVISE(vp, orig_offset, uio->uio_offset - 1,
 		    POSIX_FADV_DONTNEED);
 unlock:
 	return (error);
 }
 
 /*
  * The vn_io_fault() is a wrapper around vn_read() and vn_write() to
  * prevent the following deadlock:
  *
  * Assume that the thread A reads from the vnode vp1 into userspace
  * buffer buf1 backed by the pages of vnode vp2.  If a page in buf1 is
  * currently not resident, then system ends up with the call chain
  *   vn_read() -> VOP_READ(vp1) -> uiomove() -> [Page Fault] ->
  *     vm_fault(buf1) -> vnode_pager_getpages(vp2) -> VOP_GETPAGES(vp2)
  * which establishes lock order vp1->vn_lock, then vp2->vn_lock.
  * If, at the same time, thread B reads from vnode vp2 into buffer buf2
  * backed by the pages of vnode vp1, and some page in buf2 is not
  * resident, we get a reversed order vp2->vn_lock, then vp1->vn_lock.
  *
  * To prevent the lock order reversal and deadlock, vn_io_fault() does
  * not allow page faults to happen during VOP_READ() or VOP_WRITE().
  * Instead, it first tries to do the whole range i/o with pagefaults
  * disabled. If all pages in the i/o buffer are resident and mapped,
  * VOP will succeed (ignoring the genuine filesystem errors).
  * Otherwise, we get back EFAULT, and vn_io_fault() falls back to do
  * i/o in chunks, with all pages in the chunk prefaulted and held
  * using vm_fault_quick_hold_pages().
  *
  * Filesystems using this deadlock avoidance scheme should use the
  * array of the held pages from uio, saved in the curthread->td_ma,
  * instead of doing uiomove().  A helper function
  * vn_io_fault_uiomove() converts uiomove request into
  * uiomove_fromphys() over td_ma array.
  *
  * Since vnode locks do not cover the whole i/o anymore, rangelocks
  * make the current i/o request atomic with respect to other i/os and
  * truncations.
  */
 
 /*
  * Decode vn_io_fault_args and perform the corresponding i/o.
  */
 static int
 vn_io_fault_doio(struct vn_io_fault_args *args, struct uio *uio,
     struct thread *td)
 {
 	int error, save;
 
 	error = 0;
 	save = vm_fault_disable_pagefaults();
 	switch (args->kind) {
 	case VN_IO_FAULT_FOP:
 		error = (args->args.fop_args.doio)(args->args.fop_args.fp,
 		    uio, args->cred, args->flags, td);
 		break;
 	case VN_IO_FAULT_VOP:
 		if (uio->uio_rw == UIO_READ) {
 			error = VOP_READ(args->args.vop_args.vp, uio,
 			    args->flags, args->cred);
 		} else if (uio->uio_rw == UIO_WRITE) {
 			error = VOP_WRITE(args->args.vop_args.vp, uio,
 			    args->flags, args->cred);
 		}
 		break;
 	default:
 		panic("vn_io_fault_doio: unknown kind of io %d %d",
 		    args->kind, uio->uio_rw);
 	}
 	vm_fault_enable_pagefaults(save);
 	return (error);
 }
 
 static int
 vn_io_fault_touch(char *base, const struct uio *uio)
 {
 	int r;
 
 	r = fubyte(base);
 	if (r == -1 || (uio->uio_rw == UIO_READ && subyte(base, r) == -1))
 		return (EFAULT);
 	return (0);
 }
 
 static int
 vn_io_fault_prefault_user(const struct uio *uio)
 {
 	char *base;
 	const struct iovec *iov;
 	size_t len;
 	ssize_t resid;
 	int error, i;
 
 	KASSERT(uio->uio_segflg == UIO_USERSPACE,
 	    ("vn_io_fault_prefault userspace"));
 
 	error = i = 0;
 	iov = uio->uio_iov;
 	resid = uio->uio_resid;
 	base = iov->iov_base;
 	len = iov->iov_len;
 	while (resid > 0) {
 		error = vn_io_fault_touch(base, uio);
 		if (error != 0)
 			break;
 		if (len < PAGE_SIZE) {
 			if (len != 0) {
 				error = vn_io_fault_touch(base + len - 1, uio);
 				if (error != 0)
 					break;
 				resid -= len;
 			}
 			if (++i >= uio->uio_iovcnt)
 				break;
 			iov = uio->uio_iov + i;
 			base = iov->iov_base;
 			len = iov->iov_len;
 		} else {
 			len -= PAGE_SIZE;
 			base += PAGE_SIZE;
 			resid -= PAGE_SIZE;
 		}
 	}
 	return (error);
 }
 
 /*
  * Common code for vn_io_fault(), agnostic to the kind of i/o request.
  * Uses vn_io_fault_doio() to make the call to an actual i/o function.
  * Used from vn_rdwr() and vn_io_fault(), which encode the i/o request
  * into args and call vn_io_fault1() to handle faults during the user
  * mode buffer accesses.
  */
 static int
 vn_io_fault1(struct vnode *vp, struct uio *uio, struct vn_io_fault_args *args,
     struct thread *td)
 {
 	vm_page_t ma[io_hold_cnt + 2];
 	struct uio *uio_clone, short_uio;
 	struct iovec short_iovec[1];
 	vm_page_t *prev_td_ma;
 	vm_prot_t prot;
 	vm_offset_t addr, end;
 	size_t len, resid;
 	ssize_t adv;
 	int error, cnt, saveheld, prev_td_ma_cnt;
 
 	if (vn_io_fault_prefault) {
 		error = vn_io_fault_prefault_user(uio);
 		if (error != 0)
 			return (error); /* Or ignore ? */
 	}
 
 	prot = uio->uio_rw == UIO_READ ? VM_PROT_WRITE : VM_PROT_READ;
 
 	/*
 	 * The UFS follows IO_UNIT directive and replays back both
 	 * uio_offset and uio_resid if an error is encountered during the
 	 * operation.  But, since the iovec may be already advanced,
 	 * uio is still in an inconsistent state.
 	 *
 	 * Cache a copy of the original uio, which is advanced to the redo
 	 * point using UIO_NOCOPY below.
 	 */
 	uio_clone = cloneuio(uio);
 	resid = uio->uio_resid;
 
 	short_uio.uio_segflg = UIO_USERSPACE;
 	short_uio.uio_rw = uio->uio_rw;
 	short_uio.uio_td = uio->uio_td;
 
 	error = vn_io_fault_doio(args, uio, td);
 	if (error != EFAULT)
 		goto out;
 
 	atomic_add_long(&vn_io_faults_cnt, 1);
 	uio_clone->uio_segflg = UIO_NOCOPY;
 	uiomove(NULL, resid - uio->uio_resid, uio_clone);
 	uio_clone->uio_segflg = uio->uio_segflg;
 
 	saveheld = curthread_pflags_set(TDP_UIOHELD);
 	prev_td_ma = td->td_ma;
 	prev_td_ma_cnt = td->td_ma_cnt;
 
 	while (uio_clone->uio_resid != 0) {
 		len = uio_clone->uio_iov->iov_len;
 		if (len == 0) {
 			KASSERT(uio_clone->uio_iovcnt >= 1,
 			    ("iovcnt underflow"));
 			uio_clone->uio_iov++;
 			uio_clone->uio_iovcnt--;
 			continue;
 		}
 		if (len > io_hold_cnt * PAGE_SIZE)
 			len = io_hold_cnt * PAGE_SIZE;
 		addr = (uintptr_t)uio_clone->uio_iov->iov_base;
 		end = round_page(addr + len);
 		if (end < addr) {
 			error = EFAULT;
 			break;
 		}
 		cnt = atop(end - trunc_page(addr));
 		/*
 		 * A perfectly misaligned address and length could cause
 		 * both the start and the end of the chunk to use partial
 		 * page.  +2 accounts for such a situation.
 		 */
 		cnt = vm_fault_quick_hold_pages(&td->td_proc->p_vmspace->vm_map,
 		    addr, len, prot, ma, io_hold_cnt + 2);
 		if (cnt == -1) {
 			error = EFAULT;
 			break;
 		}
 		short_uio.uio_iov = &short_iovec[0];
 		short_iovec[0].iov_base = (void *)addr;
 		short_uio.uio_iovcnt = 1;
 		short_uio.uio_resid = short_iovec[0].iov_len = len;
 		short_uio.uio_offset = uio_clone->uio_offset;
 		td->td_ma = ma;
 		td->td_ma_cnt = cnt;
 
 		error = vn_io_fault_doio(args, &short_uio, td);
 		vm_page_unhold_pages(ma, cnt);
 		adv = len - short_uio.uio_resid;
 
 		uio_clone->uio_iov->iov_base =
 		    (char *)uio_clone->uio_iov->iov_base + adv;
 		uio_clone->uio_iov->iov_len -= adv;
 		uio_clone->uio_resid -= adv;
 		uio_clone->uio_offset += adv;
 
 		uio->uio_resid -= adv;
 		uio->uio_offset += adv;
 
 		if (error != 0 || adv == 0)
 			break;
 	}
 	td->td_ma = prev_td_ma;
 	td->td_ma_cnt = prev_td_ma_cnt;
 	curthread_pflags_restore(saveheld);
 out:
 	free(uio_clone, M_IOV);
 	return (error);
 }
 
 static int
 vn_io_fault(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	fo_rdwr_t *doio;
 	struct vnode *vp;
 	void *rl_cookie;
 	struct vn_io_fault_args args;
 	int error;
 
 	doio = uio->uio_rw == UIO_READ ? vn_read : vn_write;
 	vp = fp->f_vnode;
 	foffset_lock_uio(fp, uio, flags);
 	if (do_vn_io_fault(vp, uio)) {
 		args.kind = VN_IO_FAULT_FOP;
 		args.args.fop_args.fp = fp;
 		args.args.fop_args.doio = doio;
 		args.cred = active_cred;
 		args.flags = flags | FOF_OFFSET;
 		if (uio->uio_rw == UIO_READ) {
 			rl_cookie = vn_rangelock_rlock(vp, uio->uio_offset,
 			    uio->uio_offset + uio->uio_resid);
 		} else if ((fp->f_flag & O_APPEND) != 0 ||
 		    (flags & FOF_OFFSET) == 0) {
 			/* For appenders, punt and lock the whole range. */
 			rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 		} else {
 			rl_cookie = vn_rangelock_wlock(vp, uio->uio_offset,
 			    uio->uio_offset + uio->uio_resid);
 		}
 		error = vn_io_fault1(vp, uio, &args, td);
 		vn_rangelock_unlock(vp, rl_cookie);
 	} else {
 		error = doio(fp, uio, active_cred, flags | FOF_OFFSET, td);
 	}
 	foffset_unlock_uio(fp, uio, flags);
 	return (error);
 }
 
 /*
  * Helper function to perform the requested uiomove operation using
  * the held pages for io->uio_iov[0].iov_base buffer instead of
  * copyin/copyout.  Access to the pages with uiomove_fromphys()
  * instead of iov_base prevents page faults that could occur due to
  * pmap_collect() invalidating the mapping created by
  * vm_fault_quick_hold_pages(), or pageout daemon, page laundry or
  * object cleanup revoking the write access from page mappings.
  *
  * Filesystems specified MNTK_NO_IOPF shall use vn_io_fault_uiomove()
  * instead of plain uiomove().
  */
 int
 vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio)
 {
 	struct uio transp_uio;
 	struct iovec transp_iov[1];
 	struct thread *td;
 	size_t adv;
 	int error, pgadv;
 
 	td = curthread;
 	if ((td->td_pflags & TDP_UIOHELD) == 0 ||
 	    uio->uio_segflg != UIO_USERSPACE)
 		return (uiomove(data, xfersize, uio));
 
 	KASSERT(uio->uio_iovcnt == 1, ("uio_iovcnt %d", uio->uio_iovcnt));
 	transp_iov[0].iov_base = data;
 	transp_uio.uio_iov = &transp_iov[0];
 	transp_uio.uio_iovcnt = 1;
 	if (xfersize > uio->uio_resid)
 		xfersize = uio->uio_resid;
 	transp_uio.uio_resid = transp_iov[0].iov_len = xfersize;
 	transp_uio.uio_offset = 0;
 	transp_uio.uio_segflg = UIO_SYSSPACE;
 	/*
 	 * Since transp_iov points to data, and td_ma page array
 	 * corresponds to original uio->uio_iov, we need to invert the
 	 * direction of the i/o operation as passed to
 	 * uiomove_fromphys().
 	 */
 	switch (uio->uio_rw) {
 	case UIO_WRITE:
 		transp_uio.uio_rw = UIO_READ;
 		break;
 	case UIO_READ:
 		transp_uio.uio_rw = UIO_WRITE;
 		break;
 	}
 	transp_uio.uio_td = uio->uio_td;
 	error = uiomove_fromphys(td->td_ma,
 	    ((vm_offset_t)uio->uio_iov->iov_base) & PAGE_MASK,
 	    xfersize, &transp_uio);
 	adv = xfersize - transp_uio.uio_resid;
 	pgadv =
 	    (((vm_offset_t)uio->uio_iov->iov_base + adv) >> PAGE_SHIFT) -
 	    (((vm_offset_t)uio->uio_iov->iov_base) >> PAGE_SHIFT);
 	td->td_ma += pgadv;
 	KASSERT(td->td_ma_cnt >= pgadv, ("consumed pages %d %d", td->td_ma_cnt,
 	    pgadv));
 	td->td_ma_cnt -= pgadv;
 	uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + adv;
 	uio->uio_iov->iov_len -= adv;
 	uio->uio_resid -= adv;
 	uio->uio_offset += adv;
 	return (error);
 }
 
 int
 vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize,
     struct uio *uio)
 {
 	struct thread *td;
 	vm_offset_t iov_base;
 	int cnt, pgadv;
 
 	td = curthread;
 	if ((td->td_pflags & TDP_UIOHELD) == 0 ||
 	    uio->uio_segflg != UIO_USERSPACE)
 		return (uiomove_fromphys(ma, offset, xfersize, uio));
 
 	KASSERT(uio->uio_iovcnt == 1, ("uio_iovcnt %d", uio->uio_iovcnt));
 	cnt = xfersize > uio->uio_resid ? uio->uio_resid : xfersize;
 	iov_base = (vm_offset_t)uio->uio_iov->iov_base;
 	switch (uio->uio_rw) {
 	case UIO_WRITE:
 		pmap_copy_pages(td->td_ma, iov_base & PAGE_MASK, ma,
 		    offset, cnt);
 		break;
 	case UIO_READ:
 		pmap_copy_pages(ma, offset, td->td_ma, iov_base & PAGE_MASK,
 		    cnt);
 		break;
 	}
 	pgadv = ((iov_base + cnt) >> PAGE_SHIFT) - (iov_base >> PAGE_SHIFT);
 	td->td_ma += pgadv;
 	KASSERT(td->td_ma_cnt >= pgadv, ("consumed pages %d %d", td->td_ma_cnt,
 	    pgadv));
 	td->td_ma_cnt -= pgadv;
 	uio->uio_iov->iov_base = (char *)(iov_base + cnt);
 	uio->uio_iov->iov_len -= cnt;
 	uio->uio_resid -= cnt;
 	uio->uio_offset += cnt;
 	return (0);
 }
 
 
 /*
  * File table truncate routine.
  */
 static int
 vn_truncate(struct file *fp, off_t length, struct ucred *active_cred,
     struct thread *td)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	void *rl_cookie;
 	int error;
 
 	vp = fp->f_vnode;
 
 	/*
 	 * Lock the whole range for truncation.  Otherwise split i/o
 	 * might happen partly before and partly after the truncation.
 	 */
 	rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 	if (error)
 		goto out1;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	if (vp->v_type == VDIR) {
 		error = EISDIR;
 		goto out;
 	}
 #ifdef MAC
 	error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
 	if (error)
 		goto out;
 #endif
 	error = vn_truncate_locked(vp, length, (fp->f_flag & O_FSYNC) != 0,
 	    fp->f_cred);
 out:
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 out1:
 	vn_rangelock_unlock(vp, rl_cookie);
 	return (error);
 }
 
 /*
  * Truncate a file that is already locked.
  */
 int
 vn_truncate_locked(struct vnode *vp, off_t length, bool sync,
     struct ucred *cred)
 {
 	struct vattr vattr;
 	int error;
 
 	error = VOP_ADD_WRITECOUNT(vp, 1);
 	if (error == 0) {
 		VATTR_NULL(&vattr);
 		vattr.va_size = length;
 		if (sync)
 			vattr.va_vaflags |= VA_SYNC;
 		error = VOP_SETATTR(vp, &vattr, cred);
 		VOP_ADD_WRITECOUNT_CHECKED(vp, -1);
 	}
 	return (error);
 }
 
 /*
  * File table vnode stat routine.
  */
 static int
 vn_statfile(struct file *fp, struct stat *sb, struct ucred *active_cred,
     struct thread *td)
 {
 	struct vnode *vp = fp->f_vnode;
 	int error;
 
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
 	VOP_UNLOCK(vp, 0);
 
 	return (error);
 }
 
 /*
  * Stat a vnode; implementation for the stat syscall
  */
 int
 vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
     struct ucred *file_cred, struct thread *td)
 {
 	struct vattr vattr;
 	struct vattr *vap;
 	int error;
 	u_short mode;
 
 	AUDIT_ARG_VNODE1(vp);
 #ifdef MAC
 	error = mac_vnode_check_stat(active_cred, file_cred, vp);
 	if (error)
 		return (error);
 #endif
 
 	vap = &vattr;
 
 	/*
 	 * Initialize defaults for new and unusual fields, so that file
 	 * systems which don't support these fields don't need to know
 	 * about them.
 	 */
 	vap->va_birthtime.tv_sec = -1;
 	vap->va_birthtime.tv_nsec = 0;
 	vap->va_fsid = VNOVAL;
 	vap->va_rdev = NODEV;
 
 	error = VOP_GETATTR(vp, vap, active_cred);
 	if (error)
 		return (error);
 
 	/*
 	 * Zero the spare stat fields
 	 */
 	bzero(sb, sizeof *sb);
 
 	/*
 	 * Copy from vattr table
 	 */
 	if (vap->va_fsid != VNOVAL)
 		sb->st_dev = vap->va_fsid;
 	else
 		sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
 	sb->st_ino = vap->va_fileid;
 	mode = vap->va_mode;
 	switch (vap->va_type) {
 	case VREG:
 		mode |= S_IFREG;
 		break;
 	case VDIR:
 		mode |= S_IFDIR;
 		break;
 	case VBLK:
 		mode |= S_IFBLK;
 		break;
 	case VCHR:
 		mode |= S_IFCHR;
 		break;
 	case VLNK:
 		mode |= S_IFLNK;
 		break;
 	case VSOCK:
 		mode |= S_IFSOCK;
 		break;
 	case VFIFO:
 		mode |= S_IFIFO;
 		break;
 	default:
 		return (EBADF);
 	}
 	sb->st_mode = mode;
 	sb->st_nlink = vap->va_nlink;
 	sb->st_uid = vap->va_uid;
 	sb->st_gid = vap->va_gid;
 	sb->st_rdev = vap->va_rdev;
 	if (vap->va_size > OFF_MAX)
 		return (EOVERFLOW);
 	sb->st_size = vap->va_size;
 	sb->st_atim = vap->va_atime;
 	sb->st_mtim = vap->va_mtime;
 	sb->st_ctim = vap->va_ctime;
 	sb->st_birthtim = vap->va_birthtime;
 
         /*
 	 * According to www.opengroup.org, the meaning of st_blksize is 
 	 *   "a filesystem-specific preferred I/O block size for this 
 	 *    object.  In some filesystem types, this may vary from file
 	 *    to file"
 	 * Use miminum/default of PAGE_SIZE (e.g. for VCHR).
 	 */
 
 	sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
 	
 	sb->st_flags = vap->va_flags;
 	if (priv_check(td, PRIV_VFS_GENERATION))
 		sb->st_gen = 0;
 	else
 		sb->st_gen = vap->va_gen;
 
 	sb->st_blocks = vap->va_bytes / S_BLKSIZE;
 	return (0);
 }
 
 /*
  * File table vnode ioctl routine.
  */
 static int
 vn_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred,
     struct thread *td)
 {
 	struct vattr vattr;
 	struct vnode *vp;
 	struct fiobmap2_arg *bmarg;
 	int error;
 
 	vp = fp->f_vnode;
 	switch (vp->v_type) {
 	case VDIR:
 	case VREG:
 		switch (com) {
 		case FIONREAD:
 			vn_lock(vp, LK_SHARED | LK_RETRY);
 			error = VOP_GETATTR(vp, &vattr, active_cred);
 			VOP_UNLOCK(vp, 0);
 			if (error == 0)
 				*(int *)data = vattr.va_size - fp->f_offset;
 			return (error);
 		case FIOBMAP2:
 			bmarg = (struct fiobmap2_arg *)data;
 			vn_lock(vp, LK_SHARED | LK_RETRY);
 #ifdef MAC
 			error = mac_vnode_check_read(active_cred, fp->f_cred,
 			    vp);
 			if (error == 0)
 #endif
 				error = VOP_BMAP(vp, bmarg->bn, NULL,
 				    &bmarg->bn, &bmarg->runp, &bmarg->runb);
 			VOP_UNLOCK(vp, 0);
 			return (error);
 		case FIONBIO:
 		case FIOASYNC:
 			return (0);
 		default:
 			return (VOP_IOCTL(vp, com, data, fp->f_flag,
 			    active_cred, td));
 		}
 		break;
 	case VCHR:
 		return (VOP_IOCTL(vp, com, data, fp->f_flag,
 		    active_cred, td));
 	default:
 		return (ENOTTY);
 	}
 }
 
 /*
  * File table vnode poll routine.
  */
 static int
 vn_poll(struct file *fp, int events, struct ucred *active_cred,
     struct thread *td)
 {
 	struct vnode *vp;
 	int error;
 
 	vp = fp->f_vnode;
 #ifdef MAC
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	error = mac_vnode_check_poll(active_cred, fp->f_cred, vp);
 	VOP_UNLOCK(vp, 0);
 	if (!error)
 #endif
 
 	error = VOP_POLL(vp, events, fp->f_cred, td);
 	return (error);
 }
 
 /*
  * Acquire the requested lock and then check for validity.  LK_RETRY
  * permits vn_lock to return doomed vnodes.
  */
 int
 _vn_lock(struct vnode *vp, int flags, char *file, int line)
 {
 	int error;
 
 	VNASSERT((flags & LK_TYPE_MASK) != 0, vp,
 	    ("vn_lock: no locktype"));
 	VNASSERT(vp->v_holdcnt != 0, vp, ("vn_lock: zero hold count"));
 retry:
 	error = VOP_LOCK1(vp, flags, file, line);
 	flags &= ~LK_INTERLOCK;	/* Interlock is always dropped. */
 	KASSERT((flags & LK_RETRY) == 0 || error == 0,
 	    ("vn_lock: error %d incompatible with flags %#x", error, flags));
 
 	if ((flags & LK_RETRY) == 0) {
 		if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) {
 			VOP_UNLOCK(vp, 0);
 			error = ENOENT;
 		}
 	} else if (error != 0)
 		goto retry;
 	return (error);
 }
 
 /*
  * File table vnode close routine.
  */
 static int
 vn_closefile(struct file *fp, struct thread *td)
 {
 	struct vnode *vp;
 	struct flock lf;
 	int error;
 	bool ref;
 
 	vp = fp->f_vnode;
 	fp->f_ops = &badfileops;
 	ref= (fp->f_flag & FHASLOCK) != 0 && fp->f_type == DTYPE_VNODE;
 
 	error = vn_close1(vp, fp->f_flag, fp->f_cred, td, ref);
 
 	if (__predict_false(ref)) {
 		lf.l_whence = SEEK_SET;
 		lf.l_start = 0;
 		lf.l_len = 0;
 		lf.l_type = F_UNLCK;
 		(void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
 		vrele(vp);
 	}
 	return (error);
 }
 
 static bool
 vn_suspendable(struct mount *mp)
 {
 
 	return (mp->mnt_op->vfs_susp_clean != NULL);
 }
 
 /*
  * Preparing to start a filesystem write operation. If the operation is
  * permitted, then we bump the count of operations in progress and
  * proceed. If a suspend request is in progress, we wait until the
  * suspension is over, and then proceed.
  */
 static int
 vn_start_write_refed(struct mount *mp, int flags, bool mplocked)
 {
 	int error, mflags;
 
 	if (__predict_true(!mplocked) && (flags & V_XSLEEP) == 0 &&
 	    vfs_op_thread_enter(mp)) {
 		MPASS((mp->mnt_kern_flag & MNTK_SUSPEND) == 0);
 		vfs_mp_count_add_pcpu(mp, writeopcount, 1);
 		vfs_op_thread_exit(mp);
 		return (0);
 	}
 
 	if (mplocked)
 		mtx_assert(MNT_MTX(mp), MA_OWNED);
 	else
 		MNT_ILOCK(mp);
 
 	error = 0;
 
 	/*
 	 * Check on status of suspension.
 	 */
 	if ((curthread->td_pflags & TDP_IGNSUSP) == 0 ||
 	    mp->mnt_susp_owner != curthread) {
 		mflags = ((mp->mnt_vfc->vfc_flags & VFCF_SBDRY) != 0 ?
 		    (flags & PCATCH) : 0) | (PUSER - 1);
 		while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
 			if (flags & V_NOWAIT) {
 				error = EWOULDBLOCK;
 				goto unlock;
 			}
 			error = msleep(&mp->mnt_flag, MNT_MTX(mp), mflags,
 			    "suspfs", 0);
 			if (error)
 				goto unlock;
 		}
 	}
 	if (flags & V_XSLEEP)
 		goto unlock;
 	mp->mnt_writeopcount++;
 unlock:
 	if (error != 0 || (flags & V_XSLEEP) != 0)
 		MNT_REL(mp);
 	MNT_IUNLOCK(mp);
 	return (error);
 }
 
 int
 vn_start_write(struct vnode *vp, struct mount **mpp, int flags)
 {
 	struct mount *mp;
 	int error;
 
 	KASSERT((flags & V_MNTREF) == 0 || (*mpp != NULL && vp == NULL),
 	    ("V_MNTREF requires mp"));
 
 	error = 0;
 	/*
 	 * If a vnode is provided, get and return the mount point that
 	 * to which it will write.
 	 */
 	if (vp != NULL) {
 		if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
 			*mpp = NULL;
 			if (error != EOPNOTSUPP)
 				return (error);
 			return (0);
 		}
 	}
 	if ((mp = *mpp) == NULL)
 		return (0);
 
 	if (!vn_suspendable(mp)) {
 		if (vp != NULL || (flags & V_MNTREF) != 0)
 			vfs_rel(mp);
 		return (0);
 	}
 
 	/*
 	 * VOP_GETWRITEMOUNT() returns with the mp refcount held through
 	 * a vfs_ref().
 	 * As long as a vnode is not provided we need to acquire a
 	 * refcount for the provided mountpoint too, in order to
 	 * emulate a vfs_ref().
 	 */
 	if (vp == NULL && (flags & V_MNTREF) == 0)
 		vfs_ref(mp);
 
 	return (vn_start_write_refed(mp, flags, false));
 }
 
 /*
  * Secondary suspension. Used by operations such as vop_inactive
  * routines that are needed by the higher level functions. These
  * are allowed to proceed until all the higher level functions have
  * completed (indicated by mnt_writeopcount dropping to zero). At that
  * time, these operations are halted until the suspension is over.
  */
 int
 vn_start_secondary_write(struct vnode *vp, struct mount **mpp, int flags)
 {
 	struct mount *mp;
 	int error;
 
 	KASSERT((flags & V_MNTREF) == 0 || (*mpp != NULL && vp == NULL),
 	    ("V_MNTREF requires mp"));
 
  retry:
 	if (vp != NULL) {
 		if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
 			*mpp = NULL;
 			if (error != EOPNOTSUPP)
 				return (error);
 			return (0);
 		}
 	}
 	/*
 	 * If we are not suspended or have not yet reached suspended
 	 * mode, then let the operation proceed.
 	 */
 	if ((mp = *mpp) == NULL)
 		return (0);
 
 	if (!vn_suspendable(mp)) {
 		if (vp != NULL || (flags & V_MNTREF) != 0)
 			vfs_rel(mp);
 		return (0);
 	}
 
 	/*
 	 * VOP_GETWRITEMOUNT() returns with the mp refcount held through
 	 * a vfs_ref().
 	 * As long as a vnode is not provided we need to acquire a
 	 * refcount for the provided mountpoint too, in order to
 	 * emulate a vfs_ref().
 	 */
 	MNT_ILOCK(mp);
 	if (vp == NULL && (flags & V_MNTREF) == 0)
 		MNT_REF(mp);
 	if ((mp->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND2)) == 0) {
 		mp->mnt_secondary_writes++;
 		mp->mnt_secondary_accwrites++;
 		MNT_IUNLOCK(mp);
 		return (0);
 	}
 	if (flags & V_NOWAIT) {
 		MNT_REL(mp);
 		MNT_IUNLOCK(mp);
 		return (EWOULDBLOCK);
 	}
 	/*
 	 * Wait for the suspension to finish.
 	 */
 	error = msleep(&mp->mnt_flag, MNT_MTX(mp), (PUSER - 1) | PDROP |
 	    ((mp->mnt_vfc->vfc_flags & VFCF_SBDRY) != 0 ? (flags & PCATCH) : 0),
 	    "suspfs", 0);
 	vfs_rel(mp);
 	if (error == 0)
 		goto retry;
 	return (error);
 }
 
 /*
  * Filesystem write operation has completed. If we are suspending and this
  * operation is the last one, notify the suspender that the suspension is
  * now in effect.
  */
 void
 vn_finished_write(struct mount *mp)
 {
 	int c;
 
 	if (mp == NULL || !vn_suspendable(mp))
 		return;
 
 	if (vfs_op_thread_enter(mp)) {
 		vfs_mp_count_sub_pcpu(mp, writeopcount, 1);
 		vfs_mp_count_sub_pcpu(mp, ref, 1);
 		vfs_op_thread_exit(mp);
 		return;
 	}
 
 	MNT_ILOCK(mp);
 	vfs_assert_mount_counters(mp);
 	MNT_REL(mp);
 	c = --mp->mnt_writeopcount;
 	if (mp->mnt_vfs_ops == 0) {
 		MPASS((mp->mnt_kern_flag & MNTK_SUSPEND) == 0);
 		MNT_IUNLOCK(mp);
 		return;
 	}
 	if (c < 0)
 		vfs_dump_mount_counters(mp);
 	if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && c == 0)
 		wakeup(&mp->mnt_writeopcount);
 	MNT_IUNLOCK(mp);
 }
 
 
 /*
  * Filesystem secondary write operation has completed. If we are
  * suspending and this operation is the last one, notify the suspender
  * that the suspension is now in effect.
  */
 void
 vn_finished_secondary_write(struct mount *mp)
 {
 	if (mp == NULL || !vn_suspendable(mp))
 		return;
 	MNT_ILOCK(mp);
 	MNT_REL(mp);
 	mp->mnt_secondary_writes--;
 	if (mp->mnt_secondary_writes < 0)
 		panic("vn_finished_secondary_write: neg cnt");
 	if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
 	    mp->mnt_secondary_writes <= 0)
 		wakeup(&mp->mnt_secondary_writes);
 	MNT_IUNLOCK(mp);
 }
 
 
 
 /*
  * Request a filesystem to suspend write operations.
  */
 int
 vfs_write_suspend(struct mount *mp, int flags)
 {
 	int error;
 
 	MPASS(vn_suspendable(mp));
 
 	vfs_op_enter(mp);
 
 	MNT_ILOCK(mp);
 	vfs_assert_mount_counters(mp);
 	if (mp->mnt_susp_owner == curthread) {
 		vfs_op_exit_locked(mp);
 		MNT_IUNLOCK(mp);
 		return (EALREADY);
 	}
 	while (mp->mnt_kern_flag & MNTK_SUSPEND)
 		msleep(&mp->mnt_flag, MNT_MTX(mp), PUSER - 1, "wsuspfs", 0);
 
 	/*
 	 * Unmount holds a write reference on the mount point.  If we
 	 * own busy reference and drain for writers, we deadlock with
 	 * the reference draining in the unmount path.  Callers of
 	 * vfs_write_suspend() must specify VS_SKIP_UNMOUNT if
 	 * vfs_busy() reference is owned and caller is not in the
 	 * unmount context.
 	 */
 	if ((flags & VS_SKIP_UNMOUNT) != 0 &&
 	    (mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
 		vfs_op_exit_locked(mp);
 		MNT_IUNLOCK(mp);
 		return (EBUSY);
 	}
 
 	mp->mnt_kern_flag |= MNTK_SUSPEND;
 	mp->mnt_susp_owner = curthread;
 	if (mp->mnt_writeopcount > 0)
 		(void) msleep(&mp->mnt_writeopcount, 
 		    MNT_MTX(mp), (PUSER - 1)|PDROP, "suspwt", 0);
 	else
 		MNT_IUNLOCK(mp);
 	if ((error = VFS_SYNC(mp, MNT_SUSPEND)) != 0) {
 		vfs_write_resume(mp, 0);
 		vfs_op_exit(mp);
 	}
 	return (error);
 }
 
 /*
  * Request a filesystem to resume write operations.
  */
 void
 vfs_write_resume(struct mount *mp, int flags)
 {
 
 	MPASS(vn_suspendable(mp));
 
 	MNT_ILOCK(mp);
 	if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
 		KASSERT(mp->mnt_susp_owner == curthread, ("mnt_susp_owner"));
 		mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPEND2 |
 				       MNTK_SUSPENDED);
 		mp->mnt_susp_owner = NULL;
 		wakeup(&mp->mnt_writeopcount);
 		wakeup(&mp->mnt_flag);
 		curthread->td_pflags &= ~TDP_IGNSUSP;
 		if ((flags & VR_START_WRITE) != 0) {
 			MNT_REF(mp);
 			mp->mnt_writeopcount++;
 		}
 		MNT_IUNLOCK(mp);
 		if ((flags & VR_NO_SUSPCLR) == 0)
 			VFS_SUSP_CLEAN(mp);
 		vfs_op_exit(mp);
 	} else if ((flags & VR_START_WRITE) != 0) {
 		MNT_REF(mp);
 		vn_start_write_refed(mp, 0, true);
 	} else {
 		MNT_IUNLOCK(mp);
 	}
 }
 
 /*
  * Helper loop around vfs_write_suspend() for filesystem unmount VFS
  * methods.
  */
 int
 vfs_write_suspend_umnt(struct mount *mp)
 {
 	int error;
 
 	MPASS(vn_suspendable(mp));
 	KASSERT((curthread->td_pflags & TDP_IGNSUSP) == 0,
 	    ("vfs_write_suspend_umnt: recursed"));
 
 	/* dounmount() already called vn_start_write(). */
 	for (;;) {
 		vn_finished_write(mp);
 		error = vfs_write_suspend(mp, 0);
 		if (error != 0) {
 			vn_start_write(NULL, &mp, V_WAIT);
 			return (error);
 		}
 		MNT_ILOCK(mp);
 		if ((mp->mnt_kern_flag & MNTK_SUSPENDED) != 0)
 			break;
 		MNT_IUNLOCK(mp);
 		vn_start_write(NULL, &mp, V_WAIT);
 	}
 	mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2);
 	wakeup(&mp->mnt_flag);
 	MNT_IUNLOCK(mp);
 	curthread->td_pflags |= TDP_IGNSUSP;
 	return (0);
 }
 
 /*
  * Implement kqueues for files by translating it to vnode operation.
  */
 static int
 vn_kqfilter(struct file *fp, struct knote *kn)
 {
 
 	return (VOP_KQFILTER(fp->f_vnode, kn));
 }
 
 /*
  * Simplified in-kernel wrapper calls for extended attribute access.
  * Both calls pass in a NULL credential, authorizing as "kernel" access.
  * Set IO_NODELOCKED in ioflg if the vnode is already locked.
  */
 int
 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
     const char *attrname, int *buflen, char *buf, struct thread *td)
 {
 	struct uio	auio;
 	struct iovec	iov;
 	int	error;
 
 	iov.iov_len = *buflen;
 	iov.iov_base = buf;
 
 	auio.uio_iov = &iov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_td = td;
 	auio.uio_offset = 0;
 	auio.uio_resid = *buflen;
 
 	if ((ioflg & IO_NODELOCKED) == 0)
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 
 	ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 
 	/* authorize attribute retrieval as kernel */
 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL,
 	    td);
 
 	if ((ioflg & IO_NODELOCKED) == 0)
 		VOP_UNLOCK(vp, 0);
 
 	if (error == 0) {
 		*buflen = *buflen - auio.uio_resid;
 	}
 
 	return (error);
 }
 
 /*
  * XXX failure mode if partially written?
  */
 int
 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
     const char *attrname, int buflen, char *buf, struct thread *td)
 {
 	struct uio	auio;
 	struct iovec	iov;
 	struct mount	*mp;
 	int	error;
 
 	iov.iov_len = buflen;
 	iov.iov_base = buf;
 
 	auio.uio_iov = &iov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_td = td;
 	auio.uio_offset = 0;
 	auio.uio_resid = buflen;
 
 	if ((ioflg & IO_NODELOCKED) == 0) {
 		if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 			return (error);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	}
 
 	ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 
 	/* authorize attribute setting as kernel */
 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td);
 
 	if ((ioflg & IO_NODELOCKED) == 0) {
 		vn_finished_write(mp);
 		VOP_UNLOCK(vp, 0);
 	}
 
 	return (error);
 }
 
 int
 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
     const char *attrname, struct thread *td)
 {
 	struct mount	*mp;
 	int	error;
 
 	if ((ioflg & IO_NODELOCKED) == 0) {
 		if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 			return (error);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	}
 
 	ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 
 	/* authorize attribute removal as kernel */
 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL, td);
 	if (error == EOPNOTSUPP)
 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
 		    NULL, td);
 
 	if ((ioflg & IO_NODELOCKED) == 0) {
 		vn_finished_write(mp);
 		VOP_UNLOCK(vp, 0);
 	}
 
 	return (error);
 }
 
 static int
 vn_get_ino_alloc_vget(struct mount *mp, void *arg, int lkflags,
     struct vnode **rvp)
 {
 
 	return (VFS_VGET(mp, *(ino_t *)arg, lkflags, rvp));
 }
 
 int
 vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags, struct vnode **rvp)
 {
 
 	return (vn_vget_ino_gen(vp, vn_get_ino_alloc_vget, &ino,
 	    lkflags, rvp));
 }
 
 int
 vn_vget_ino_gen(struct vnode *vp, vn_get_ino_t alloc, void *alloc_arg,
     int lkflags, struct vnode **rvp)
 {
 	struct mount *mp;
 	int ltype, error;
 
 	ASSERT_VOP_LOCKED(vp, "vn_vget_ino_get");
 	mp = vp->v_mount;
 	ltype = VOP_ISLOCKED(vp);
 	KASSERT(ltype == LK_EXCLUSIVE || ltype == LK_SHARED,
 	    ("vn_vget_ino: vp not locked"));
 	error = vfs_busy(mp, MBF_NOWAIT);
 	if (error != 0) {
 		vfs_ref(mp);
 		VOP_UNLOCK(vp, 0);
 		error = vfs_busy(mp, 0);
 		vn_lock(vp, ltype | LK_RETRY);
 		vfs_rel(mp);
 		if (error != 0)
 			return (ENOENT);
 		if (vp->v_iflag & VI_DOOMED) {
 			vfs_unbusy(mp);
 			return (ENOENT);
 		}
 	}
 	VOP_UNLOCK(vp, 0);
 	error = alloc(mp, alloc_arg, lkflags, rvp);
 	vfs_unbusy(mp);
 	if (error != 0 || *rvp != vp)
 		vn_lock(vp, ltype | LK_RETRY);
 	if (vp->v_iflag & VI_DOOMED) {
 		if (error == 0) {
 			if (*rvp == vp)
 				vunref(vp);
 			else
 				vput(*rvp);
 		}
 		error = ENOENT;
 	}
 	return (error);
 }
 
 int
 vn_rlimit_fsize(const struct vnode *vp, const struct uio *uio,
     struct thread *td)
 {
 
 	if (vp->v_type != VREG || td == NULL)
 		return (0);
 	if ((uoff_t)uio->uio_offset + uio->uio_resid >
 	    lim_cur(td, RLIMIT_FSIZE)) {
 		PROC_LOCK(td->td_proc);
 		kern_psignal(td->td_proc, SIGXFSZ);
 		PROC_UNLOCK(td->td_proc);
 		return (EFBIG);
 	}
 	return (0);
 }
 
 int
 vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
     struct thread *td)
 {
 	struct vnode *vp;
 
 	vp = fp->f_vnode;
 #ifdef AUDIT
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	VOP_UNLOCK(vp, 0);
 #endif
 	return (setfmode(td, active_cred, vp, mode));
 }
 
 int
 vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
     struct thread *td)
 {
 	struct vnode *vp;
 
 	vp = fp->f_vnode;
 #ifdef AUDIT
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	VOP_UNLOCK(vp, 0);
 #endif
 	return (setfown(td, active_cred, vp, uid, gid));
 }
 
 void
 vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end)
 {
 	vm_object_t object;
 
 	if ((object = vp->v_object) == NULL)
 		return;
 	VM_OBJECT_WLOCK(object);
 	vm_object_page_remove(object, start, end, 0);
 	VM_OBJECT_WUNLOCK(object);
 }
 
 int
 vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred)
 {
 	struct vattr va;
 	daddr_t bn, bnp;
 	uint64_t bsize;
 	off_t noff;
 	int error;
 
 	KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA,
 	    ("Wrong command %lu", cmd));
 
 	if (vn_lock(vp, LK_SHARED) != 0)
 		return (EBADF);
 	if (vp->v_type != VREG) {
 		error = ENOTTY;
 		goto unlock;
 	}
 	error = VOP_GETATTR(vp, &va, cred);
 	if (error != 0)
 		goto unlock;
 	noff = *off;
 	if (noff >= va.va_size) {
 		error = ENXIO;
 		goto unlock;
 	}
 	bsize = vp->v_mount->mnt_stat.f_iosize;
 	for (bn = noff / bsize; noff < va.va_size; bn++, noff += bsize -
 	    noff % bsize) {
 		error = VOP_BMAP(vp, bn, NULL, &bnp, NULL, NULL);
 		if (error == EOPNOTSUPP) {
 			error = ENOTTY;
 			goto unlock;
 		}
 		if ((bnp == -1 && cmd == FIOSEEKHOLE) ||
 		    (bnp != -1 && cmd == FIOSEEKDATA)) {
 			noff = bn * bsize;
 			if (noff < *off)
 				noff = *off;
 			goto unlock;
 		}
 	}
 	if (noff > va.va_size)
 		noff = va.va_size;
 	/* noff == va.va_size. There is an implicit hole at the end of file. */
 	if (cmd == FIOSEEKDATA)
 		error = ENXIO;
 unlock:
 	VOP_UNLOCK(vp, 0);
 	if (error == 0)
 		*off = noff;
 	return (error);
 }
 
 int
 vn_seek(struct file *fp, off_t offset, int whence, struct thread *td)
 {
 	struct ucred *cred;
 	struct vnode *vp;
 	struct vattr vattr;
 	off_t foffset, size;
 	int error, noneg;
 
 	cred = td->td_ucred;
 	vp = fp->f_vnode;
 	foffset = foffset_lock(fp, 0);
 	noneg = (vp->v_type != VCHR);
 	error = 0;
 	switch (whence) {
 	case L_INCR:
 		if (noneg &&
 		    (foffset < 0 ||
 		    (offset > 0 && foffset > OFF_MAX - offset))) {
 			error = EOVERFLOW;
 			break;
 		}
 		offset += foffset;
 		break;
 	case L_XTND:
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		error = VOP_GETATTR(vp, &vattr, cred);
 		VOP_UNLOCK(vp, 0);
 		if (error)
 			break;
 
 		/*
 		 * If the file references a disk device, then fetch
 		 * the media size and use that to determine the ending
 		 * offset.
 		 */
 		if (vattr.va_size == 0 && vp->v_type == VCHR &&
 		    fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0)
 			vattr.va_size = size;
 		if (noneg &&
 		    (vattr.va_size > OFF_MAX ||
 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
 			error = EOVERFLOW;
 			break;
 		}
 		offset += vattr.va_size;
 		break;
 	case L_SET:
 		break;
 	case SEEK_DATA:
 		error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
 		if (error == ENOTTY)
 			error = EINVAL;
 		break;
 	case SEEK_HOLE:
 		error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
 		if (error == ENOTTY)
 			error = EINVAL;
 		break;
 	default:
 		error = EINVAL;
 	}
 	if (error == 0 && noneg && offset < 0)
 		error = EINVAL;
 	if (error != 0)
 		goto drop;
 	VFS_KNOTE_UNLOCKED(vp, 0);
 	td->td_uretoff.tdu_off = offset;
 drop:
 	foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0);
 	return (error);
 }
 
 int
 vn_utimes_perm(struct vnode *vp, struct vattr *vap, struct ucred *cred,
     struct thread *td)
 {
 	int error;
 
 	/*
 	 * Grant permission if the caller is the owner of the file, or
 	 * the super-user, or has ACL_WRITE_ATTRIBUTES permission on
 	 * on the file.  If the time pointer is null, then write
 	 * permission on the file is also sufficient.
 	 *
 	 * From NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes:
 	 * A user having ACL_WRITE_DATA or ACL_WRITE_ATTRIBUTES
 	 * will be allowed to set the times [..] to the current
 	 * server time.
 	 */
 	error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td);
 	if (error != 0 && (vap->va_vaflags & VA_UTIMES_NULL) != 0)
 		error = VOP_ACCESS(vp, VWRITE, cred, td);
 	return (error);
 }
 
 int
 vn_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
 {
 	struct vnode *vp;
 	int error;
 
 	if (fp->f_type == DTYPE_FIFO)
 		kif->kf_type = KF_TYPE_FIFO;
 	else
 		kif->kf_type = KF_TYPE_VNODE;
 	vp = fp->f_vnode;
 	vref(vp);
 	FILEDESC_SUNLOCK(fdp);
 	error = vn_fill_kinfo_vnode(vp, kif);
 	vrele(vp);
 	FILEDESC_SLOCK(fdp);
 	return (error);
 }
 
 static inline void
 vn_fill_junk(struct kinfo_file *kif)
 {
 	size_t len, olen;
 
 	/*
 	 * Simulate vn_fullpath returning changing values for a given
 	 * vp during e.g. coredump.
 	 */
 	len = (arc4random() % (sizeof(kif->kf_path) - 2)) + 1;
 	olen = strlen(kif->kf_path);
 	if (len < olen)
 		strcpy(&kif->kf_path[len - 1], "$");
 	else
 		for (; olen < len; olen++)
 			strcpy(&kif->kf_path[olen], "A");
 }
 
 int
 vn_fill_kinfo_vnode(struct vnode *vp, struct kinfo_file *kif)
 {
 	struct vattr va;
 	char *fullpath, *freepath;
 	int error;
 
 	kif->kf_un.kf_file.kf_file_type = vntype_to_kinfo(vp->v_type);
 	freepath = NULL;
 	fullpath = "-";
 	error = vn_fullpath(curthread, vp, &fullpath, &freepath);
 	if (error == 0) {
 		strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path));
 	}
 	if (freepath != NULL)
 		free(freepath, M_TEMP);
 
 	KFAIL_POINT_CODE(DEBUG_FP, fill_kinfo_vnode__random_path,
 		vn_fill_junk(kif);
 	);
 
 	/*
 	 * Retrieve vnode attributes.
 	 */
 	va.va_fsid = VNOVAL;
 	va.va_rdev = NODEV;
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	error = VOP_GETATTR(vp, &va, curthread->td_ucred);
 	VOP_UNLOCK(vp, 0);
 	if (error != 0)
 		return (error);
 	if (va.va_fsid != VNOVAL)
 		kif->kf_un.kf_file.kf_file_fsid = va.va_fsid;
 	else
 		kif->kf_un.kf_file.kf_file_fsid =
 		    vp->v_mount->mnt_stat.f_fsid.val[0];
 	kif->kf_un.kf_file.kf_file_fsid_freebsd11 =
 	    kif->kf_un.kf_file.kf_file_fsid; /* truncate */
 	kif->kf_un.kf_file.kf_file_fileid = va.va_fileid;
 	kif->kf_un.kf_file.kf_file_mode = MAKEIMODE(va.va_type, va.va_mode);
 	kif->kf_un.kf_file.kf_file_size = va.va_size;
 	kif->kf_un.kf_file.kf_file_rdev = va.va_rdev;
 	kif->kf_un.kf_file.kf_file_rdev_freebsd11 =
 	    kif->kf_un.kf_file.kf_file_rdev; /* truncate */
 	return (0);
 }
 
 int
 vn_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
     vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
     struct thread *td)
 {
 #ifdef HWPMC_HOOKS
 	struct pmckern_map_in pkm;
 #endif
 	struct mount *mp;
 	struct vnode *vp;
 	vm_object_t object;
 	vm_prot_t maxprot;
 	boolean_t writecounted;
 	int error;
 
 #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \
     defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4)
 	/*
 	 * POSIX shared-memory objects are defined to have
 	 * kernel persistence, and are not defined to support
 	 * read(2)/write(2) -- or even open(2).  Thus, we can
 	 * use MAP_ASYNC to trade on-disk coherence for speed.
 	 * The shm_open(3) library routine turns on the FPOSIXSHM
 	 * flag to request this behavior.
 	 */
 	if ((fp->f_flag & FPOSIXSHM) != 0)
 		flags |= MAP_NOSYNC;
 #endif
 	vp = fp->f_vnode;
 
 	/*
 	 * Ensure that file and memory protections are
 	 * compatible.  Note that we only worry about
 	 * writability if mapping is shared; in this case,
 	 * current and max prot are dictated by the open file.
 	 * XXX use the vnode instead?  Problem is: what
 	 * credentials do we use for determination? What if
 	 * proc does a setuid?
 	 */
 	mp = vp->v_mount;
 	if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) {
 		maxprot = VM_PROT_NONE;
 		if ((prot & VM_PROT_EXECUTE) != 0)
 			return (EACCES);
 	} else
 		maxprot = VM_PROT_EXECUTE;
 	if ((fp->f_flag & FREAD) != 0)
 		maxprot |= VM_PROT_READ;
 	else if ((prot & VM_PROT_READ) != 0)
 		return (EACCES);
 
 	/*
 	 * If we are sharing potential changes via MAP_SHARED and we
 	 * are trying to get write permission although we opened it
 	 * without asking for it, bail out.
 	 */
 	if ((flags & MAP_SHARED) != 0) {
 		if ((fp->f_flag & FWRITE) != 0)
 			maxprot |= VM_PROT_WRITE;
 		else if ((prot & VM_PROT_WRITE) != 0)
 			return (EACCES);
 	} else {
 		maxprot |= VM_PROT_WRITE;
 		cap_maxprot |= VM_PROT_WRITE;
 	}
 	maxprot &= cap_maxprot;
 
 	/*
 	 * For regular files and shared memory, POSIX requires that
 	 * the value of foff be a legitimate offset within the data
 	 * object.  In particular, negative offsets are invalid.
 	 * Blocking negative offsets and overflows here avoids
 	 * possible wraparound or user-level access into reserved
 	 * ranges of the data object later.  In contrast, POSIX does
 	 * not dictate how offsets are used by device drivers, so in
 	 * the case of a device mapping a negative offset is passed
 	 * on.
 	 */
 	if (
 #ifdef _LP64
 	    size > OFF_MAX ||
 #endif
 	    foff < 0 || foff > OFF_MAX - size)
 		return (EINVAL);
 
 	writecounted = FALSE;
 	error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, vp,
 	    &foff, &object, &writecounted);
 	if (error != 0)
 		return (error);
 	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
 	    foff, writecounted, td);
 	if (error != 0) {
 		/*
 		 * If this mapping was accounted for in the vnode's
 		 * writecount, then undo that now.
 		 */
 		if (writecounted)
 			vm_pager_release_writecount(object, 0, size);
 		vm_object_deallocate(object);
 	}
 #ifdef HWPMC_HOOKS
 	/* Inform hwpmc(4) if an executable is being mapped. */
 	if (PMC_HOOK_INSTALLED(PMC_FN_MMAP)) {
 		if ((prot & VM_PROT_EXECUTE) != 0 && error == 0) {
 			pkm.pm_file = vp;
 			pkm.pm_address = (uintptr_t) *addr;
 			PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_MMAP, (void *) &pkm);
 		}
 	}
 #endif
 	return (error);
 }
 
 void
 vn_fsid(struct vnode *vp, struct vattr *va)
 {
 	fsid_t *f;
 
 	f = &vp->v_mount->mnt_stat.f_fsid;
 	va->va_fsid = (uint32_t)f->val[1];
 	va->va_fsid <<= sizeof(f->val[1]) * NBBY;
 	va->va_fsid += (uint32_t)f->val[0];
 }
 
 int
 vn_fsync_buf(struct vnode *vp, int waitfor)
 {
 	struct buf *bp, *nbp;
 	struct bufobj *bo;
 	struct mount *mp;
 	int error, maxretry;
 
 	error = 0;
 	maxretry = 10000;     /* large, arbitrarily chosen */
 	mp = NULL;
 	if (vp->v_type == VCHR) {
 		VI_LOCK(vp);
 		mp = vp->v_rdev->si_mountpt;
 		VI_UNLOCK(vp);
 	}
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
 loop1:
 	/*
 	 * MARK/SCAN initialization to avoid infinite loops.
 	 */
         TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
 		bp->b_vflags &= ~BV_SCANNED;
 		bp->b_error = 0;
 	}
 
 	/*
 	 * Flush all dirty buffers associated with a vnode.
 	 */
 loop2:
 	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 		if ((bp->b_vflags & BV_SCANNED) != 0)
 			continue;
 		bp->b_vflags |= BV_SCANNED;
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
 			if (waitfor != MNT_WAIT)
 				continue;
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL,
 			    BO_LOCKPTR(bo)) != 0) {
 				BO_LOCK(bo);
 				goto loop1;
 			}
 			BO_LOCK(bo);
 		}
 		BO_UNLOCK(bo);
 		KASSERT(bp->b_bufobj == bo,
 		    ("bp %p wrong b_bufobj %p should be %p",
 		    bp, bp->b_bufobj, bo));
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("fsync: not dirty");
 		if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) {
 			vfs_bio_awrite(bp);
 		} else {
 			bremfree(bp);
 			bawrite(bp);
 		}
 		if (maxretry < 1000)
 			pause("dirty", hz < 1000 ? 1 : hz / 1000);
 		BO_LOCK(bo);
 		goto loop2;
 	}
 
 	/*
 	 * If synchronous the caller expects us to completely resolve all
 	 * dirty buffers in the system.  Wait for in-progress I/O to
 	 * complete (which could include background bitmap writes), then
 	 * retry if dirty blocks still exist.
 	 */
 	if (waitfor == MNT_WAIT) {
 		bufobj_wwait(bo, 0, 0);
 		if (bo->bo_dirty.bv_cnt > 0) {
 			/*
 			 * If we are unable to write any of these buffers
 			 * then we fail now rather than trying endlessly
 			 * to write them out.
 			 */
 			TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
 				if ((error = bp->b_error) != 0)
 					break;
 			if ((mp != NULL && mp->mnt_secondary_writes > 0) ||
 			    (error == 0 && --maxretry >= 0))
 				goto loop1;
 			if (error == 0)
 				error = EAGAIN;
 		}
 	}
 	BO_UNLOCK(bo);
 	if (error != 0)
 		vn_printf(vp, "fsync: giving up on dirty (error = %d) ", error);
 
 	return (error);
 }
 
 /*
  * Copies a byte range from invp to outvp.  Calls VOP_COPY_FILE_RANGE()
  * or vn_generic_copy_file_range() after rangelocking the byte ranges,
  * to do the actual copy.
  * vn_generic_copy_file_range() is factored out, so it can be called
  * from a VOP_COPY_FILE_RANGE() call as well, but handles vnodes from
  * different file systems.
  */
 int
 vn_copy_file_range(struct vnode *invp, off_t *inoffp, struct vnode *outvp,
     off_t *outoffp, size_t *lenp, unsigned int flags, struct ucred *incred,
     struct ucred *outcred, struct thread *fsize_td)
 {
 	struct vattr va;
 	int error;
 	size_t len;
 	uint64_t uvalin, uvalout;
 
 	len = *lenp;
 	*lenp = 0;		/* For error returns. */
 	error = 0;
 
 	/* Do some sanity checks on the arguments. */
 	uvalin = *inoffp;
 	uvalin += len;
 	uvalout = *outoffp;
 	uvalout += len;
 	if (invp->v_type == VDIR || outvp->v_type == VDIR)
 		error = EISDIR;
 	else if (*inoffp < 0 || uvalin > INT64_MAX || uvalin <
 	    (uint64_t)*inoffp || *outoffp < 0 || uvalout > INT64_MAX ||
 	    uvalout < (uint64_t)*outoffp || invp->v_type != VREG ||
 	    outvp->v_type != VREG)
 		error = EINVAL;
 	else if (invp == outvp)
 		error = EBADF;
 	if (error != 0)
 		goto out;
 
 	error = vn_lock(invp, LK_SHARED);
 	if (error != 0)
 		goto out;
 	/* Check that the offset + len does not go past EOF of invp. */
 	error = VOP_GETATTR(invp, &va, incred);
 	if (error == 0 && va.va_size < *inoffp + len)
 		error = EINVAL;
 	VOP_UNLOCK(invp, 0);
 	if (error != 0)
 		goto out;
 
 	/*
 	 * If the two vnode are for the same file system, call
 	 * VOP_COPY_FILE_RANGE(), otherwise call vn_generic_copy_file_range()
 	 * which can handle copies across multiple file systems.
 	 */
 	*lenp = len;
 	if (invp->v_mount == outvp->v_mount)
 		error = VOP_COPY_FILE_RANGE(invp, inoffp, outvp, outoffp,
 		    lenp, flags, incred, outcred, fsize_td);
 	else
 		error = vn_generic_copy_file_range(invp, inoffp, outvp,
 		    outoffp, lenp, flags, incred, outcred, fsize_td);
 out:
 	return (error);
 }
 
 /*
  * Test len bytes of data starting at dat for all bytes == 0.
  * Return true if all bytes are zero, false otherwise.
  * Expects dat to be well aligned.
  */
 static bool
 mem_iszero(void *dat, int len)
 {
 	int i;
 	const u_int *p;
 	const char *cp;
 
 	for (p = dat; len > 0; len -= sizeof(*p), p++) {
 		if (len >= sizeof(*p)) {
 			if (*p != 0)
 				return (false);
 		} else {
 			cp = (const char *)p;
 			for (i = 0; i < len; i++, cp++)
 				if (*cp != '\0')
 					return (false);
 		}
 	}
 	return (true);
 }
 
 /*
  * Look for a hole in the output file and, if found, adjust *outoffp
  * and *xferp to skip past the hole.
  * *xferp is the entire hole length to be written and xfer2 is how many bytes
  * to be written as 0's upon return.
  */
 static off_t
 vn_skip_hole(struct vnode *outvp, off_t xfer2, off_t *outoffp, off_t *xferp,
     off_t *dataoffp, off_t *holeoffp, struct ucred *cred)
 {
 	int error;
 	off_t delta;
 
 	if (*holeoffp == 0 || *holeoffp <= *outoffp) {
 		*dataoffp = *outoffp;
 		error = VOP_IOCTL(outvp, FIOSEEKDATA, dataoffp, 0, cred,
 		    curthread);
 		if (error == 0) {
 			*holeoffp = *dataoffp;
 			error = VOP_IOCTL(outvp, FIOSEEKHOLE, holeoffp, 0, cred,
 			    curthread);
 		}
 		if (error != 0 || *holeoffp == *dataoffp) {
 			/*
 			 * Since outvp is unlocked, it may be possible for
 			 * another thread to do a truncate(), lseek(), write()
 			 * creating a hole at startoff between the above
 			 * VOP_IOCTL() calls, if the other thread does not do
 			 * rangelocking.
 			 * If that happens, *holeoffp == *dataoffp and finding
 			 * the hole has failed, so disable vn_skip_hole().
 			 */
 			*holeoffp = -1;	/* Disable use of vn_skip_hole(). */
 			return (xfer2);
 		}
 		KASSERT(*dataoffp >= *outoffp,
 		    ("vn_skip_hole: dataoff=%jd < outoff=%jd",
 		    (intmax_t)*dataoffp, (intmax_t)*outoffp));
 		KASSERT(*holeoffp > *dataoffp,
 		    ("vn_skip_hole: holeoff=%jd <= dataoff=%jd",
 		    (intmax_t)*holeoffp, (intmax_t)*dataoffp));
 	}
 
 	/*
 	 * If there is a hole before the data starts, advance *outoffp and
 	 * *xferp past the hole.
 	 */
 	if (*dataoffp > *outoffp) {
 		delta = *dataoffp - *outoffp;
 		if (delta >= *xferp) {
 			/* Entire *xferp is a hole. */
 			*outoffp += *xferp;
 			*xferp = 0;
 			return (0);
 		}
 		*xferp -= delta;
 		*outoffp += delta;
 		xfer2 = MIN(xfer2, *xferp);
 	}
 
 	/*
 	 * If a hole starts before the end of this xfer2, reduce this xfer2 so
 	 * that the write ends at the start of the hole.
 	 * *holeoffp should always be greater than *outoffp, but for the
 	 * non-INVARIANTS case, check this to make sure xfer2 remains a sane
 	 * value.
 	 */
 	if (*holeoffp > *outoffp && *holeoffp < *outoffp + xfer2)
 		xfer2 = *holeoffp - *outoffp;
 	return (xfer2);
 }
 
 /*
  * Write an xfer sized chunk to outvp in blksize blocks from dat.
  * dat is a maximum of blksize in length and can be written repeatedly in
  * the chunk.
  * If growfile == true, just grow the file via vn_truncate_locked() instead
  * of doing actual writes.
  * If checkhole == true, a hole is being punched, so skip over any hole
  * already in the output file.
  */
 static int
 vn_write_outvp(struct vnode *outvp, char *dat, off_t outoff, off_t xfer,
     u_long blksize, bool growfile, bool checkhole, struct ucred *cred)
 {
 	struct mount *mp;
 	off_t dataoff, holeoff, xfer2;
 	int error, lckf;
 
 	/*
 	 * Loop around doing writes of blksize until write has been completed.
 	 * Lock/unlock on each loop iteration so that a bwillwrite() can be
 	 * done for each iteration, since the xfer argument can be very
 	 * large if there is a large hole to punch in the output file.
 	 */
 	error = 0;
 	holeoff = 0;
 	do {
 		xfer2 = MIN(xfer, blksize);
 		if (checkhole) {
 			/*
 			 * Punching a hole.  Skip writing if there is
 			 * already a hole in the output file.
 			 */
 			xfer2 = vn_skip_hole(outvp, xfer2, &outoff, &xfer,
 			    &dataoff, &holeoff, cred);
 			if (xfer == 0)
 				break;
 			if (holeoff < 0)
 				checkhole = false;
 			KASSERT(xfer2 > 0, ("vn_write_outvp: xfer2=%jd",
 			    (intmax_t)xfer2));
 		}
 		bwillwrite();
 		mp = NULL;
 		error = vn_start_write(outvp, &mp, V_WAIT);
 		if (error == 0) {
 			if (MNT_SHARED_WRITES(mp))
 				lckf = LK_SHARED;
 			else
 				lckf = LK_EXCLUSIVE;
 			error = vn_lock(outvp, lckf);
 		}
 		if (error == 0) {
 			if (growfile)
 				error = vn_truncate_locked(outvp, outoff + xfer,
 				    false, cred);
 			else {
 				error = vn_rdwr(UIO_WRITE, outvp, dat, xfer2,
 				    outoff, UIO_SYSSPACE, IO_NODELOCKED,
 				    curthread->td_ucred, cred, NULL, curthread);
 				outoff += xfer2;
 				xfer -= xfer2;
 			}
 			VOP_UNLOCK(outvp, 0);
 		}
 		if (mp != NULL)
 			vn_finished_write(mp);
 	} while (!growfile && xfer > 0 && error == 0);
 	return (error);
 }
 
 /*
  * Copy a byte range of one file to another.  This function can handle the
  * case where invp and outvp are on different file systems.
  * It can also be called by a VOP_COPY_FILE_RANGE() to do the work, if there
  * is no better file system specific way to do it.
  */
 int
 vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp,
     struct vnode *outvp, off_t *outoffp, size_t *lenp, unsigned int flags,
     struct ucred *incred, struct ucred *outcred, struct thread *fsize_td)
 {
 	struct vattr va;
 	struct mount *mp;
 	struct uio io;
 	off_t startoff, endoff, xfer, xfer2;
 	u_long blksize;
 	int error;
 	bool cantseek, readzeros;
 	ssize_t aresid;
 	size_t copylen, len, savlen;
 	char *dat;
 	long holein, holeout;
 
 	holein = holeout = 0;
 	savlen = len = *lenp;
 	error = 0;
 	dat = NULL;
 
 	error = vn_lock(invp, LK_SHARED);
 	if (error != 0)
 		goto out;
 	if (VOP_PATHCONF(invp, _PC_MIN_HOLE_SIZE, &holein) != 0)
 		holein = 0;
 	VOP_UNLOCK(invp, 0);
 
 	mp = NULL;
 	error = vn_start_write(outvp, &mp, V_WAIT);
 	if (error == 0)
 		error = vn_lock(outvp, LK_EXCLUSIVE);
 	if (error == 0) {
 		/*
 		 * If fsize_td != NULL, do a vn_rlimit_fsize() call,
 		 * now that outvp is locked.
 		 */
 		if (fsize_td != NULL) {
 			io.uio_offset = *outoffp;
 			io.uio_resid = len;
 			error = vn_rlimit_fsize(outvp, &io, fsize_td);
 			if (error != 0)
 				error = EFBIG;
 		}
 		if (VOP_PATHCONF(outvp, _PC_MIN_HOLE_SIZE, &holeout) != 0)
 			holeout = 0;
 		/*
 		 * Holes that are past EOF do not need to be written as a block
 		 * of zero bytes.  So, truncate the output file as far as
 		 * possible and then use va.va_size to decide if writing 0
 		 * bytes is necessary in the loop below.
 		 */
 		if (error == 0)
 			error = VOP_GETATTR(outvp, &va, outcred);
 		if (error == 0 && va.va_size > *outoffp && va.va_size <=
 		    *outoffp + len) {
 #ifdef MAC
 			error = mac_vnode_check_write(curthread->td_ucred,
 			    outcred, outvp);
 			if (error == 0)
 #endif
 				error = vn_truncate_locked(outvp, *outoffp,
 				    false, outcred);
 			if (error == 0)
 				va.va_size = *outoffp;
 		}
 		VOP_UNLOCK(outvp, 0);
 	}
 	if (mp != NULL)
 		vn_finished_write(mp);
 	if (error != 0)
 		goto out;
 
 	/*
 	 * Set the blksize to the larger of the hole sizes for invp and outvp.
 	 * If hole sizes aren't available, set the blksize to the larger 
 	 * f_iosize of invp and outvp.
 	 * This code expects the hole sizes and f_iosizes to be powers of 2.
 	 * This value is clipped at 4Kbytes and 1Mbyte.
 	 */
 	blksize = MAX(holein, holeout);
 	if (blksize == 0)
 		blksize = MAX(invp->v_mount->mnt_stat.f_iosize,
 		    outvp->v_mount->mnt_stat.f_iosize);
 	if (blksize < 4096)
 		blksize = 4096;
 	else if (blksize > 1024 * 1024)
 		blksize = 1024 * 1024;
 	dat = malloc(blksize, M_TEMP, M_WAITOK);
 
 	/*
 	 * If VOP_IOCTL(FIOSEEKHOLE) works for invp, use it and FIOSEEKDATA
 	 * to find holes.  Otherwise, just scan the read block for all 0s
 	 * in the inner loop where the data copying is done.
 	 * Note that some file systems such as NFSv3, NFSv4.0 and NFSv4.1 may
 	 * support holes on the server, but do not support FIOSEEKHOLE.
 	 */
 	while (len > 0 && error == 0) {
 		endoff = 0;			/* To shut up compilers. */
 		cantseek = true;
 		startoff = *inoffp;
 		copylen = len;
 
 		/*
 		 * Find the next data area.  If there is just a hole to EOF,
 		 * FIOSEEKDATA should fail and then we drop down into the
 		 * inner loop and create the hole on the outvp file.
 		 * (I do not know if any file system will report a hole to
 		 *  EOF via FIOSEEKHOLE, but I am pretty sure FIOSEEKDATA
 		 *  will fail for those file systems.)
 		 *
 		 * For input files that don't support FIOSEEKDATA/FIOSEEKHOLE,
 		 * the code just falls through to the inner copy loop.
 		 */
 		error = EINVAL;
 		if (holein > 0)
 			error = VOP_IOCTL(invp, FIOSEEKDATA, &startoff, 0,
 			    incred, curthread);
 		if (error == 0) {
 			endoff = startoff;
 			error = VOP_IOCTL(invp, FIOSEEKHOLE, &endoff, 0,
 			    incred, curthread);
 			/*
 			 * Since invp is unlocked, it may be possible for
 			 * another thread to do a truncate(), lseek(), write()
 			 * creating a hole at startoff between the above
 			 * VOP_IOCTL() calls, if the other thread does not do
 			 * rangelocking.
 			 * If that happens, startoff == endoff and finding
 			 * the hole has failed, so set an error.
 			 */
 			if (error == 0 && startoff == endoff)
 				error = EINVAL; /* Any error. Reset to 0. */
 		}
 		if (error == 0) {
 			if (startoff > *inoffp) {
 				/* Found hole before data block. */
 				xfer = MIN(startoff - *inoffp, len);
 				if (*outoffp < va.va_size) {
 					/* Must write 0s to punch hole. */
 					xfer2 = MIN(va.va_size - *outoffp,
 					    xfer);
 					memset(dat, 0, MIN(xfer2, blksize));
 					error = vn_write_outvp(outvp, dat,
 					    *outoffp, xfer2, blksize, false,
 					    holeout > 0, outcred);
 				}
 
 				if (error == 0 && *outoffp + xfer >
 				    va.va_size && xfer == len)
 					/* Grow last block. */
 					error = vn_write_outvp(outvp, dat,
 					    *outoffp, xfer, blksize, true,
 					    false, outcred);
 				if (error == 0) {
 					*inoffp += xfer;
 					*outoffp += xfer;
 					len -= xfer;
 				}
 			}
 			copylen = MIN(len, endoff - startoff);
 			cantseek = false;
 		} else {
 			cantseek = true;
 			startoff = *inoffp;
 			copylen = len;
 			error = 0;
 		}
 
 		xfer = blksize;
 		if (cantseek) {
 			/*
 			 * Set first xfer to end at a block boundary, so that
 			 * holes are more likely detected in the loop below via
 			 * the for all bytes 0 method.
 			 */
 			xfer -= (*inoffp % blksize);
 		}
 		/* Loop copying the data block. */
 		while (copylen > 0 && error == 0) {
 			if (copylen < xfer)
 				xfer = copylen;
 			error = vn_lock(invp, LK_SHARED);
 			if (error != 0)
 				goto out;
 			error = vn_rdwr(UIO_READ, invp, dat, xfer,
 			    startoff, UIO_SYSSPACE, IO_NODELOCKED,
 			    curthread->td_ucred, incred, &aresid,
 			    curthread);
 			VOP_UNLOCK(invp, 0);
 			/*
 			 * Linux considers a range that exceeds EOF to
 			 * be an error, so we will too.
 			 */
 			if (error == 0 && aresid > 0)
 				error = EINVAL;
 			if (error == 0) {
 				/*
 				 * Skip the write for holes past the initial EOF
 				 * of the output file, unless this is the last
 				 * write of the output file at EOF.
 				 */
 				readzeros = cantseek ? mem_iszero(dat, xfer) :
 				    false;
 				if (!cantseek || *outoffp < va.va_size ||
 				    xfer == len || !readzeros)
 					error = vn_write_outvp(outvp, dat,
 					    *outoffp, xfer, blksize,
 					    readzeros && xfer == len &&
 					    *outoffp >= va.va_size, false,
 					    outcred);
 				if (error == 0) {
 					*inoffp += xfer;
 					startoff += xfer;
 					*outoffp += xfer;
 					copylen -= xfer;
 					len -= xfer;
 				}
 			}
 			xfer = blksize;
 		}
 	}
 out:
 	*lenp = savlen - len;
 	free(dat, M_TEMP);
 	return (error);
 }
Index: projects/clang900-import/sys/net/if.c
===================================================================
--- projects/clang900-import/sys/net/if.c	(revision 352536)
+++ projects/clang900-import/sys/net/if.c	(revision 352537)
@@ -1,4611 +1,4612 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if.c	8.5 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #include "opt_inet6.h"
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/domainset.h>
 #include <sys/sbuf.h>
 #include <sys/bus.h>
 #include <sys/epoch.h>
 #include <sys/mbuf.h>
 #include <sys/systm.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/refcount.h>
 #include <sys/module.h>
 #include <sys/rwlock.h>
 #include <sys/sockio.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/taskqueue.h>
 #include <sys/domain.h>
 #include <sys/jail.h>
 #include <sys/priv.h>
 
 #include <machine/stdarg.h>
 #include <vm/uma.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_clone.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_media.h>
 #include <net/if_vlan_var.h>
 #include <net/radix.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #if defined(INET) || defined(INET6)
 #include <net/ethernet.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_carp.h>
 #ifdef INET
 #include <netinet/if_ether.h>
 #include <netinet/netdump/netdump.h>
 #endif /* INET */
 #ifdef INET6
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_ifattach.h>
 #endif /* INET6 */
 #endif /* INET || INET6 */
 
 #include <security/mac/mac_framework.h>
 
 /*
  * Consumers of struct ifreq such as tcpdump assume no pad between ifr_name
  * and ifr_ifru when it is used in SIOCGIFCONF.
  */
 _Static_assert(sizeof(((struct ifreq *)0)->ifr_name) ==
     offsetof(struct ifreq, ifr_ifru), "gap between ifr_name and ifr_ifru");
 
 __read_mostly epoch_t net_epoch_preempt;
 __read_mostly epoch_t net_epoch;
 #ifdef COMPAT_FREEBSD32
 #include <sys/mount.h>
 #include <compat/freebsd32/freebsd32.h>
 
 struct ifreq_buffer32 {
 	uint32_t	length;		/* (size_t) */
 	uint32_t	buffer;		/* (void *) */
 };
 
 /*
  * Interface request structure used for socket
  * ioctl's.  All interface ioctl's must have parameter
  * definitions which begin with ifr_name.  The
  * remainder may be interface specific.
  */
 struct ifreq32 {
 	char	ifr_name[IFNAMSIZ];		/* if name, e.g. "en0" */
 	union {
 		struct sockaddr	ifru_addr;
 		struct sockaddr	ifru_dstaddr;
 		struct sockaddr	ifru_broadaddr;
 		struct ifreq_buffer32 ifru_buffer;
 		short		ifru_flags[2];
 		short		ifru_index;
 		int		ifru_jid;
 		int		ifru_metric;
 		int		ifru_mtu;
 		int		ifru_phys;
 		int		ifru_media;
 		uint32_t	ifru_data;
 		int		ifru_cap[2];
 		u_int		ifru_fib;
 		u_char		ifru_vlan_pcp;
 	} ifr_ifru;
 };
 CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32));
 CTASSERT(__offsetof(struct ifreq, ifr_ifru) ==
     __offsetof(struct ifreq32, ifr_ifru));
 
 struct ifgroupreq32 {
 	char	ifgr_name[IFNAMSIZ];
 	u_int	ifgr_len;
 	union {
 		char		ifgru_group[IFNAMSIZ];
 		uint32_t	ifgru_groups;
 	} ifgr_ifgru;
 };
 
 struct ifmediareq32 {
 	char		ifm_name[IFNAMSIZ];
 	int		ifm_current;
 	int		ifm_mask;
 	int		ifm_status;
 	int		ifm_active;
 	int		ifm_count;
 	uint32_t	ifm_ulist;	/* (int *) */
 };
 #define	SIOCGIFMEDIA32	_IOC_NEWTYPE(SIOCGIFMEDIA, struct ifmediareq32)
 #define	SIOCGIFXMEDIA32	_IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32)
 
 #define	_CASE_IOC_IFGROUPREQ_32(cmd)				\
     _IOC_NEWTYPE((cmd), struct ifgroupreq32): case
 #else /* !COMPAT_FREEBSD32 */
 #define _CASE_IOC_IFGROUPREQ_32(cmd)
 #endif /* !COMPAT_FREEBSD32 */
 
 #define CASE_IOC_IFGROUPREQ(cmd)	\
     _CASE_IOC_IFGROUPREQ_32(cmd)	\
     (cmd)
 
 union ifreq_union {
 	struct ifreq	ifr;
 #ifdef COMPAT_FREEBSD32
 	struct ifreq32	ifr32;
 #endif
 };
 
 union ifgroupreq_union {
 	struct ifgroupreq ifgr;
 #ifdef COMPAT_FREEBSD32
 	struct ifgroupreq32 ifgr32;
 #endif
 };
 
 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
 
 SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
     &ifqmaxlen, 0, "max send queue size");
 
 /* Log link state change events */
 static int log_link_state_change = 1;
 
 SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
 	&log_link_state_change, 0,
 	"log interface link state change events");
 
 /* Log promiscuous mode change events */
 static int log_promisc_mode_change = 1;
 
 SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
 	&log_promisc_mode_change, 1,
 	"log promiscuous mode change events");
 
 /* Interface description */
 static unsigned int ifdescr_maxlen = 1024;
 SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
 	&ifdescr_maxlen, 0,
 	"administrative maximum length for interface description");
 
 static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
 
 /* global sx for non-critical path ifdescr */
 static struct sx ifdescr_sx;
 SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");
 
 void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
 void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
 /* These are external hooks for CARP. */
 void	(*carp_linkstate_p)(struct ifnet *ifp);
 void	(*carp_demote_adj_p)(int, char *);
 int	(*carp_master_p)(struct ifaddr *);
 #if defined(INET) || defined(INET6)
 int	(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
 int	(*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
     const struct sockaddr *sa);
 int	(*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);   
 int	(*carp_attach_p)(struct ifaddr *, int);
 void	(*carp_detach_p)(struct ifaddr *, bool);
 #endif
 #ifdef INET
 int	(*carp_iamatch_p)(struct ifaddr *, uint8_t **);
 #endif
 #ifdef INET6
 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
 caddr_t	(*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
     const struct in6_addr *taddr);
 #endif
 
 struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
 
 /*
  * XXX: Style; these should be sorted alphabetically, and unprototyped
  * static functions should be prototyped. Currently they are sorted by
  * declaration order.
  */
 static void	if_attachdomain(void *);
 static void	if_attachdomain1(struct ifnet *);
 static int	ifconf(u_long, caddr_t);
 static void	*if_grow(void);
 static void	if_input_default(struct ifnet *, struct mbuf *);
 static int	if_requestencap_default(struct ifnet *, struct if_encap_req *);
 static void	if_route(struct ifnet *, int flag, int fam);
 static int	if_setflag(struct ifnet *, int, int, int *, int);
 static int	if_transmit(struct ifnet *ifp, struct mbuf *m);
 static void	if_unroute(struct ifnet *, int flag, int fam);
 static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
 static void	do_link_state_change(void *, int);
 static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
 static int	if_getgroupmembers(struct ifgroupreq *);
 static void	if_delgroups(struct ifnet *);
 static void	if_attach_internal(struct ifnet *, int, struct if_clone *);
 static int	if_detach_internal(struct ifnet *, int, struct if_clone **);
 #ifdef VIMAGE
 static void	if_vmove(struct ifnet *, struct vnet *);
 #endif
 
 #ifdef INET6
 /*
  * XXX: declare here to avoid to include many inet6 related files..
  * should be more generalized?
  */
 extern void	nd6_setmtu(struct ifnet *);
 #endif
 
 /* ipsec helper hooks */
 VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
 VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
 
 VNET_DEFINE(int, if_index);
 int	ifqmaxlen = IFQ_MAXLEN;
 VNET_DEFINE(struct ifnethead, ifnet);	/* depend on static init XXX */
 VNET_DEFINE(struct ifgrouphead, ifg_head);
 
 VNET_DEFINE_STATIC(int, if_indexlim) = 8;
 
 /* Table of ifnet by index. */
 VNET_DEFINE(struct ifnet **, ifindex_table);
 
 #define	V_if_indexlim		VNET(if_indexlim)
 #define	V_ifindex_table		VNET(ifindex_table)
 
 /*
  * The global network interface list (V_ifnet) and related state (such as
  * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and
  * an rwlock.  Either may be acquired shared to stablize the list, but both
  * must be acquired writable to modify the list.  This model allows us to
  * both stablize the interface list during interrupt thread processing, but
  * also to stablize it over long-running ioctls, without introducing priority
  * inversions and deadlocks.
  */
 struct rwlock ifnet_rwlock;
 RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE);
 struct sx ifnet_sxlock;
 SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE);
 
 /*
  * The allocation of network interfaces is a rather non-atomic affair; we
  * need to select an index before we are ready to expose the interface for
  * use, so will use this pointer value to indicate reservation.
  */
 #define	IFNET_HOLD	(void *)(uintptr_t)(-1)
 
 static	if_com_alloc_t *if_com_alloc[256];
 static	if_com_free_t *if_com_free[256];
 
 static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
 
 struct ifnet *
 ifnet_byindex_locked(u_short idx)
 {
 
 	if (idx > V_if_index)
 		return (NULL);
 	if (V_ifindex_table[idx] == IFNET_HOLD)
 		return (NULL);
 	return (V_ifindex_table[idx]);
 }
 
 struct ifnet *
 ifnet_byindex(u_short idx)
 {
 	struct ifnet *ifp;
 
 	ifp = ifnet_byindex_locked(idx);
 	return (ifp);
 }
 
 struct ifnet *
 ifnet_byindex_ref(u_short idx)
 {
 	struct epoch_tracker et;
 	struct ifnet *ifp;
 
 	NET_EPOCH_ENTER(et);
 	ifp = ifnet_byindex_locked(idx);
 	if (ifp == NULL || (ifp->if_flags & IFF_DYING)) {
 		NET_EPOCH_EXIT(et);
 		return (NULL);
 	}
 	if_ref(ifp);
 	NET_EPOCH_EXIT(et);
 	return (ifp);
 }
 
 /*
  * Allocate an ifindex array entry; return 0 on success or an error on
  * failure.
  */
 static u_short
 ifindex_alloc(void **old)
 {
 	u_short idx;
 
 	IFNET_WLOCK_ASSERT();
 	/*
 	 * Try to find an empty slot below V_if_index.  If we fail, take the
 	 * next slot.
 	 */
 	for (idx = 1; idx <= V_if_index; idx++) {
 		if (V_ifindex_table[idx] == NULL)
 			break;
 	}
 
 	/* Catch if_index overflow. */
 	if (idx >= V_if_indexlim) {
 		*old = if_grow();
 		return (USHRT_MAX);
 	}
 	if (idx > V_if_index)
 		V_if_index = idx;
 	return (idx);
 }
 
 static void
 ifindex_free_locked(u_short idx)
 {
 
 	IFNET_WLOCK_ASSERT();
 
 	V_ifindex_table[idx] = NULL;
 	while (V_if_index > 0 &&
 	    V_ifindex_table[V_if_index] == NULL)
 		V_if_index--;
 }
 
 static void
 ifindex_free(u_short idx)
 {
 
 	IFNET_WLOCK();
 	ifindex_free_locked(idx);
 	IFNET_WUNLOCK();
 }
 
 static void
 ifnet_setbyindex(u_short idx, struct ifnet *ifp)
 {
 
 	V_ifindex_table[idx] = ifp;
 }
 
 struct ifaddr *
 ifaddr_byindex(u_short idx)
 {
 	struct epoch_tracker et;
 	struct ifnet *ifp;
 	struct ifaddr *ifa = NULL;
 
 	NET_EPOCH_ENTER(et);
 	ifp = ifnet_byindex_locked(idx);
 	if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
 		ifa_ref(ifa);
 	NET_EPOCH_EXIT(et);
 	return (ifa);
 }
 
 /*
  * Network interface utility routines.
  *
  * Routines with ifa_ifwith* names take sockaddr *'s as
  * parameters.
  */
 
 static void
 vnet_if_init(const void *unused __unused)
 {
 	void *old;
 
 	CK_STAILQ_INIT(&V_ifnet);
 	CK_STAILQ_INIT(&V_ifg_head);
 	IFNET_WLOCK();
 	old = if_grow();				/* create initial table */
 	IFNET_WUNLOCK();
 	epoch_wait_preempt(net_epoch_preempt);
 	free(old, M_IFNET);
 	vnet_if_clone_init();
 }
 VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
     NULL);
 
 #ifdef VIMAGE
 static void
 vnet_if_uninit(const void *unused __unused)
 {
 
 	VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p "
 	    "not empty", __func__, __LINE__, &V_ifnet));
 	VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p "
 	    "not empty", __func__, __LINE__, &V_ifg_head));
 
 	free((caddr_t)V_ifindex_table, M_IFNET);
 }
 VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
     vnet_if_uninit, NULL);
 
 static void
 vnet_if_return(const void *unused __unused)
 {
 	struct ifnet *ifp, *nifp;
 
 	/* Return all inherited interfaces to their parent vnets. */
 	CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
 		if (ifp->if_home_vnet != ifp->if_vnet)
 			if_vmove(ifp, ifp->if_home_vnet);
 	}
 }
 VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
     vnet_if_return, NULL);
 #endif
 
 
 static void *
 if_grow(void)
 {
 	int oldlim;
 	u_int n;
 	struct ifnet **e;
 	void *old;
 
 	old = NULL;
 	IFNET_WLOCK_ASSERT();
 	oldlim = V_if_indexlim;
 	IFNET_WUNLOCK();
 	n = (oldlim << 1) * sizeof(*e);
 	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
 	IFNET_WLOCK();
 	if (V_if_indexlim != oldlim) {
 		free(e, M_IFNET);
 		return (NULL);
 	}
 	if (V_ifindex_table != NULL) {
 		memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
 		old = V_ifindex_table;
 	}
 	V_if_indexlim <<= 1;
 	V_ifindex_table = e;
 	return (old);
 }
 
 /*
  * Allocate a struct ifnet and an index for an interface.  A layer 2
  * common structure will also be allocated if an allocation routine is
  * registered for the passed type.
  */
 struct ifnet *
 if_alloc_domain(u_char type, int numa_domain)
 {
 	struct ifnet *ifp;
 	u_short idx;
 	void *old;
 
 	KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large"));
 	if (numa_domain == IF_NODOM)
 		ifp = malloc(sizeof(struct ifnet), M_IFNET,
 		    M_WAITOK | M_ZERO);
 	else
 		ifp = malloc_domainset(sizeof(struct ifnet), M_IFNET,
 		    DOMAINSET_PREF(numa_domain), M_WAITOK | M_ZERO);
  restart:
 	IFNET_WLOCK();
 	idx = ifindex_alloc(&old);
 	if (__predict_false(idx == USHRT_MAX)) {
 		IFNET_WUNLOCK();
 		epoch_wait_preempt(net_epoch_preempt);
 		free(old, M_IFNET);
 		goto restart;
 	}
 	ifnet_setbyindex(idx, IFNET_HOLD);
 	IFNET_WUNLOCK();
 	ifp->if_index = idx;
 	ifp->if_type = type;
 	ifp->if_alloctype = type;
 	ifp->if_numa_domain = numa_domain;
 #ifdef VIMAGE
 	ifp->if_vnet = curvnet;
 #endif
 	if (if_com_alloc[type] != NULL) {
 		ifp->if_l2com = if_com_alloc[type](type, ifp);
 		if (ifp->if_l2com == NULL) {
 			free(ifp, M_IFNET);
 			ifindex_free(idx);
 			return (NULL);
 		}
 	}
 
 	IF_ADDR_LOCK_INIT(ifp);
 	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
 	ifp->if_afdata_initialized = 0;
 	IF_AFDATA_LOCK_INIT(ifp);
 	CK_STAILQ_INIT(&ifp->if_addrhead);
 	CK_STAILQ_INIT(&ifp->if_multiaddrs);
 	CK_STAILQ_INIT(&ifp->if_groups);
 #ifdef MAC
 	mac_ifnet_init(ifp);
 #endif
 	ifq_init(&ifp->if_snd, ifp);
 
 	refcount_init(&ifp->if_refcount, 1);	/* Index reference. */
 	for (int i = 0; i < IFCOUNTERS; i++)
 		ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
 	ifp->if_get_counter = if_get_counter_default;
 	ifp->if_pcp = IFNET_PCP_NONE;
 	ifnet_setbyindex(ifp->if_index, ifp);
 	return (ifp);
 }
 
 struct ifnet *
 if_alloc_dev(u_char type, device_t dev)
 {
 	int numa_domain;
 
 	if (dev == NULL || bus_get_domain(dev, &numa_domain) != 0)
 		return (if_alloc_domain(type, IF_NODOM));
 	return (if_alloc_domain(type, numa_domain));
 }
 
 struct ifnet *
 if_alloc(u_char type)
 {
 
 	return (if_alloc_domain(type, IF_NODOM));
 }
 /*
  * Do the actual work of freeing a struct ifnet, and layer 2 common
  * structure.  This call is made when the last reference to an
  * interface is released.
  */
 static void
 if_free_internal(struct ifnet *ifp)
 {
 
 	KASSERT((ifp->if_flags & IFF_DYING),
 	    ("if_free_internal: interface not dying"));
 
 	if (if_com_free[ifp->if_alloctype] != NULL)
 		if_com_free[ifp->if_alloctype](ifp->if_l2com,
 		    ifp->if_alloctype);
 
 #ifdef MAC
 	mac_ifnet_destroy(ifp);
 #endif /* MAC */
 	IF_AFDATA_DESTROY(ifp);
 	IF_ADDR_LOCK_DESTROY(ifp);
 	ifq_delete(&ifp->if_snd);
 
 	for (int i = 0; i < IFCOUNTERS; i++)
 		counter_u64_free(ifp->if_counters[i]);
 
 	free(ifp->if_description, M_IFDESCR);
 	free(ifp->if_hw_addr, M_IFADDR);
 	if (ifp->if_numa_domain == IF_NODOM)
 		free(ifp, M_IFNET);
 	else
 		free_domain(ifp, M_IFNET);
 }
 
 static void
 if_destroy(epoch_context_t ctx)
 {
 	struct ifnet *ifp;
 
 	ifp = __containerof(ctx, struct ifnet, if_epoch_ctx);
 	if_free_internal(ifp);
 }
 
 /*
  * Deregister an interface and free the associated storage.
  */
 void
 if_free(struct ifnet *ifp)
 {
 
 	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 	IFNET_WLOCK();
 	KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
 	    ("%s: freeing unallocated ifnet", ifp->if_xname));
 
 	ifindex_free_locked(ifp->if_index);
 	IFNET_WUNLOCK();
 
 	if (refcount_release(&ifp->if_refcount))
 		epoch_call(net_epoch_preempt, &ifp->if_epoch_ctx, if_destroy);
 	CURVNET_RESTORE();
 }
 
 /*
  * Interfaces to keep an ifnet type-stable despite the possibility of the
  * driver calling if_free().  If there are additional references, we defer
  * freeing the underlying data structure.
  */
 void
 if_ref(struct ifnet *ifp)
 {
 
 	/* We don't assert the ifnet list lock here, but arguably should. */
 	refcount_acquire(&ifp->if_refcount);
 }
 
 void
 if_rele(struct ifnet *ifp)
 {
 
 	if (!refcount_release(&ifp->if_refcount))
 		return;
 	epoch_call(net_epoch_preempt, &ifp->if_epoch_ctx, if_destroy);
 }
 
 void
 ifq_init(struct ifaltq *ifq, struct ifnet *ifp)
 {
 	
 	mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
 
 	if (ifq->ifq_maxlen == 0) 
 		ifq->ifq_maxlen = ifqmaxlen;
 
 	ifq->altq_type = 0;
 	ifq->altq_disc = NULL;
 	ifq->altq_flags &= ALTQF_CANTCHANGE;
 	ifq->altq_tbr  = NULL;
 	ifq->altq_ifp  = ifp;
 }
 
 void
 ifq_delete(struct ifaltq *ifq)
 {
 	mtx_destroy(&ifq->ifq_mtx);
 }
 
 /*
  * Perform generic interface initialization tasks and attach the interface
  * to the list of "active" interfaces.  If vmove flag is set on entry
  * to if_attach_internal(), perform only a limited subset of initialization
  * tasks, given that we are moving from one vnet to another an ifnet which
  * has already been fully initialized.
  *
  * Note that if_detach_internal() removes group membership unconditionally
  * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
  * Thus, when if_vmove() is applied to a cloned interface, group membership
  * is lost while a cloned one always joins a group whose name is
  * ifc->ifc_name.  To recover this after if_detach_internal() and
  * if_attach_internal(), the cloner should be specified to
  * if_attach_internal() via ifc.  If it is non-NULL, if_attach_internal()
  * attempts to join a group whose name is ifc->ifc_name.
  *
  * XXX:
  *  - The decision to return void and thus require this function to
  *    succeed is questionable.
  *  - We should probably do more sanity checking.  For instance we don't
  *    do anything to insure if_xname is unique or non-empty.
  */
 void
 if_attach(struct ifnet *ifp)
 {
 
 	if_attach_internal(ifp, 0, NULL);
 }
 
 /*
  * Compute the least common TSO limit.
  */
 void
 if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax)
 {
 	/*
 	 * 1) If there is no limit currently, take the limit from
 	 * the network adapter.
 	 *
 	 * 2) If the network adapter has a limit below the current
 	 * limit, apply it.
 	 */
 	if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 &&
 	    ifp->if_hw_tsomax < pmax->tsomaxbytes)) {
 		pmax->tsomaxbytes = ifp->if_hw_tsomax;
 	}
 	if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 &&
 	    ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) {
 		pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
 	}
 	if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 &&
 	    ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) {
 		pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
 	}
 }
 
 /*
  * Update TSO limit of a network adapter.
  *
  * Returns zero if no change. Else non-zero.
  */
 int
 if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax)
 {
 	int retval = 0;
 	if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
 		ifp->if_hw_tsomax = pmax->tsomaxbytes;
 		retval++;
 	}
 	if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) {
 		ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize;
 		retval++;
 	}
 	if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) {
 		ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount;
 		retval++;
 	}
 	return (retval);
 }
 
 static void
 if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
 {
 	unsigned socksize, ifasize;
 	int namelen, masklen;
 	struct sockaddr_dl *sdl;
 	struct ifaddr *ifa;
 
 	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
 		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
 		    ifp->if_xname);
 
 #ifdef VIMAGE
 	ifp->if_vnet = curvnet;
 	if (ifp->if_home_vnet == NULL)
 		ifp->if_home_vnet = curvnet;
 #endif
 
 	if_addgroup(ifp, IFG_ALL);
 
 	/* Restore group membership for cloned interfaces. */
 	if (vmove && ifc != NULL)
 		if_clone_addgroup(ifp, ifc);
 
 	getmicrotime(&ifp->if_lastchange);
 	ifp->if_epoch = time_uptime;
 
 	KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
 	    (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
 	    ("transmit and qflush must both either be set or both be NULL"));
 	if (ifp->if_transmit == NULL) {
 		ifp->if_transmit = if_transmit;
 		ifp->if_qflush = if_qflush;
 	}
 	if (ifp->if_input == NULL)
 		ifp->if_input = if_input_default;
 
 	if (ifp->if_requestencap == NULL)
 		ifp->if_requestencap = if_requestencap_default;
 
 	if (!vmove) {
 #ifdef MAC
 		mac_ifnet_create(ifp);
 #endif
 
 		/*
 		 * Create a Link Level name for this device.
 		 */
 		namelen = strlen(ifp->if_xname);
 		/*
 		 * Always save enough space for any possiable name so we
 		 * can do a rename in place later.
 		 */
 		masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
 		socksize = masklen + ifp->if_addrlen;
 		if (socksize < sizeof(*sdl))
 			socksize = sizeof(*sdl);
 		socksize = roundup2(socksize, sizeof(long));
 		ifasize = sizeof(*ifa) + 2 * socksize;
 		ifa = ifa_alloc(ifasize, M_WAITOK);
 		sdl = (struct sockaddr_dl *)(ifa + 1);
 		sdl->sdl_len = socksize;
 		sdl->sdl_family = AF_LINK;
 		bcopy(ifp->if_xname, sdl->sdl_data, namelen);
 		sdl->sdl_nlen = namelen;
 		sdl->sdl_index = ifp->if_index;
 		sdl->sdl_type = ifp->if_type;
 		ifp->if_addr = ifa;
 		ifa->ifa_ifp = ifp;
 		ifa->ifa_addr = (struct sockaddr *)sdl;
 		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
 		ifa->ifa_netmask = (struct sockaddr *)sdl;
 		sdl->sdl_len = masklen;
 		while (namelen != 0)
 			sdl->sdl_data[--namelen] = 0xff;
 		CK_STAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
 		/* Reliably crash if used uninitialized. */
 		ifp->if_broadcastaddr = NULL;
 
 		if (ifp->if_type == IFT_ETHER) {
 			ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR,
 			    M_WAITOK | M_ZERO);
 		}
 
 #if defined(INET) || defined(INET6)
 		/* Use defaults for TSO, if nothing is set */
 		if (ifp->if_hw_tsomax == 0 &&
 		    ifp->if_hw_tsomaxsegcount == 0 &&
 		    ifp->if_hw_tsomaxsegsize == 0) {
 			/*
 			 * The TSO defaults needs to be such that an
 			 * NFS mbuf list of 35 mbufs totalling just
 			 * below 64K works and that a chain of mbufs
 			 * can be defragged into at most 32 segments:
 			 */
 			ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) -
 			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
 			ifp->if_hw_tsomaxsegcount = 35;
 			ifp->if_hw_tsomaxsegsize = 2048;	/* 2K */
 
 			/* XXX some drivers set IFCAP_TSO after ethernet attach */
 			if (ifp->if_capabilities & IFCAP_TSO) {
 				if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n",
 				    ifp->if_hw_tsomax,
 				    ifp->if_hw_tsomaxsegcount,
 				    ifp->if_hw_tsomaxsegsize);
 			}
 		}
 #endif
 	}
 #ifdef VIMAGE
 	else {
 		/*
 		 * Update the interface index in the link layer address
 		 * of the interface.
 		 */
 		for (ifa = ifp->if_addr; ifa != NULL;
 		    ifa = CK_STAILQ_NEXT(ifa, ifa_link)) {
 			if (ifa->ifa_addr->sa_family == AF_LINK) {
 				sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 				sdl->sdl_index = ifp->if_index;
 			}
 		}
 	}
 #endif
 
 	IFNET_WLOCK();
 	CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
 #ifdef VIMAGE
 	curvnet->vnet_ifcnt++;
 #endif
 	IFNET_WUNLOCK();
 
 	if (domain_init_status >= 2)
 		if_attachdomain1(ifp);
 
 	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
 
 	/* Announce the interface. */
 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
 }
 
 static void
 if_epochalloc(void *dummy __unused)
 {
 
 	net_epoch_preempt = epoch_alloc(EPOCH_PREEMPT);
 	net_epoch = epoch_alloc(0);
 }
 SYSINIT(ifepochalloc, SI_SUB_TASKQ + 1, SI_ORDER_ANY,
     if_epochalloc, NULL);
 
 static void
 if_attachdomain(void *dummy)
 {
 	struct ifnet *ifp;
 
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link)
 		if_attachdomain1(ifp);
 }
 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
     if_attachdomain, NULL);
 
 static void
 if_attachdomain1(struct ifnet *ifp)
 {
 	struct domain *dp;
 
 	/*
 	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
 	 * cannot lock ifp->if_afdata initialization, entirely.
 	 */
 	IF_AFDATA_LOCK(ifp);
 	if (ifp->if_afdata_initialized >= domain_init_status) {
 		IF_AFDATA_UNLOCK(ifp);
 		log(LOG_WARNING, "%s called more than once on %s\n",
 		    __func__, ifp->if_xname);
 		return;
 	}
 	ifp->if_afdata_initialized = domain_init_status;
 	IF_AFDATA_UNLOCK(ifp);
 
 	/* address family dependent data region */
 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
 	for (dp = domains; dp; dp = dp->dom_next) {
 		if (dp->dom_ifattach)
 			ifp->if_afdata[dp->dom_family] =
 			    (*dp->dom_ifattach)(ifp);
 	}
 }
 
 /*
  * Remove any unicast or broadcast network addresses from an interface.
  */
 void
 if_purgeaddrs(struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 
 	while (1) {
 		struct epoch_tracker et;
 
 		NET_EPOCH_ENTER(et);
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_LINK)
 				break;
 		}
 		NET_EPOCH_EXIT(et);
 
 		if (ifa == NULL)
 			break;
 #ifdef INET
 		/* XXX: Ugly!! ad hoc just for INET */
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			struct ifaliasreq ifr;
 
 			bzero(&ifr, sizeof(ifr));
 			ifr.ifra_addr = *ifa->ifa_addr;
 			if (ifa->ifa_dstaddr)
 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
 			    NULL) == 0)
 				continue;
 		}
 #endif /* INET */
 #ifdef INET6
 		if (ifa->ifa_addr->sa_family == AF_INET6) {
 			in6_purgeaddr(ifa);
 			/* ifp_addrhead is already updated */
 			continue;
 		}
 #endif /* INET6 */
 		IF_ADDR_WLOCK(ifp);
 		CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
 		IF_ADDR_WUNLOCK(ifp);
 		ifa_free(ifa);
 	}
 }
 
 /*
  * Remove any multicast network addresses from an interface when an ifnet
  * is going away.
  */
 static void
 if_purgemaddrs(struct ifnet *ifp)
 {
 	struct ifmultiaddr *ifma;
 
 	IF_ADDR_WLOCK(ifp);
 	while (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) {
 		ifma = CK_STAILQ_FIRST(&ifp->if_multiaddrs);
 		CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
 		if_delmulti_locked(ifp, ifma, 1);
 	}
 	IF_ADDR_WUNLOCK(ifp);
 }
 
 /*
  * Detach an interface, removing it from the list of "active" interfaces.
  * If vmove flag is set on entry to if_detach_internal(), perform only a
  * limited subset of cleanup tasks, given that we are moving an ifnet from
  * one vnet to another, where it must be fully operational.
  *
  * XXXRW: There are some significant questions about event ordering, and
  * how to prevent things from starting to use the interface during detach.
  */
 void
 if_detach(struct ifnet *ifp)
 {
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 	if_detach_internal(ifp, 0, NULL);
 	CURVNET_RESTORE();
 }
 
 /*
  * The vmove flag, if set, indicates that we are called from a callpath
  * that is moving an interface to a different vnet instance.
  *
  * The shutdown flag, if set, indicates that we are called in the
  * process of shutting down a vnet instance.  Currently only the
  * vnet_if_return SYSUNINIT function sets it.  Note: we can be called
  * on a vnet instance shutdown without this flag being set, e.g., when
  * the cloned interfaces are destoyed as first thing of teardown.
  */
 static int
 if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
 {
 	struct ifaddr *ifa;
 	int i;
 	struct domain *dp;
  	struct ifnet *iter;
  	int found = 0;
 #ifdef VIMAGE
 	int shutdown;
 
 	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
 		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
 #endif
 	IFNET_WLOCK();
 	CK_STAILQ_FOREACH(iter, &V_ifnet, if_link)
 		if (iter == ifp) {
 			CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link);
 			if (!vmove)
 				ifp->if_flags |= IFF_DYING;
 			found = 1;
 			break;
 		}
 	IFNET_WUNLOCK();
 	if (!found) {
 		/*
 		 * While we would want to panic here, we cannot
 		 * guarantee that the interface is indeed still on
 		 * the list given we don't hold locks all the way.
 		 */
 		return (ENOENT);
 #if 0
 		if (vmove)
 			panic("%s: ifp=%p not on the ifnet tailq %p",
 			    __func__, ifp, &V_ifnet);
 		else
 			return; /* XXX this should panic as well? */
 #endif
 	}
 
 	/*
 	 * At this point we know the interface still was on the ifnet list
 	 * and we removed it so we are in a stable state.
 	 */
 #ifdef VIMAGE
 	curvnet->vnet_ifcnt--;
 #endif
 	epoch_wait_preempt(net_epoch_preempt);
 
 	/*
 	 * Ensure all pending EPOCH(9) callbacks have been executed. This
 	 * fixes issues about late destruction of multicast options
 	 * which lead to leave group calls, which in turn access the
 	 * belonging ifnet structure:
 	 */
 	epoch_drain_callbacks(net_epoch_preempt);
 
 	/*
 	 * In any case (destroy or vmove) detach us from the groups
 	 * and remove/wait for pending events on the taskq.
 	 * XXX-BZ in theory an interface could still enqueue a taskq change?
 	 */
 	if_delgroups(ifp);
 
 	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
 
 	/*
 	 * Check if this is a cloned interface or not. Must do even if
 	 * shutting down as a if_vmove_reclaim() would move the ifp and
 	 * the if_clone_addgroup() will have a corrupted string overwise
 	 * from a gibberish pointer.
 	 */
 	if (vmove && ifcp != NULL)
 		*ifcp = if_clone_findifc(ifp);
 
 	if_down(ifp);
 
 #ifdef VIMAGE
 	/*
 	 * On VNET shutdown abort here as the stack teardown will do all
 	 * the work top-down for us.
 	 */
 	if (shutdown) {
 		/* Give interface users the chance to clean up. */
 		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
 
 		/*
 		 * In case of a vmove we are done here without error.
 		 * If we would signal an error it would lead to the same
 		 * abort as if we did not find the ifnet anymore.
 		 * if_detach() calls us in void context and does not care
 		 * about an early abort notification, so life is splendid :)
 		 */
 		goto finish_vnet_shutdown;
 	}
 #endif
 
 	/*
 	 * At this point we are not tearing down a VNET and are either
 	 * going to destroy or vmove the interface and have to cleanup
 	 * accordingly.
 	 */
 
 	/*
 	 * Remove routes and flush queues.
 	 */
 #ifdef ALTQ
 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
 		altq_disable(&ifp->if_snd);
 	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
 		altq_detach(&ifp->if_snd);
 #endif
 
 	if_purgeaddrs(ifp);
 
 #ifdef INET
 	in_ifdetach(ifp);
 #endif
 
 #ifdef INET6
 	/*
 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
 	 * before removing routing entries below, since IPv6 interface direct
 	 * routes are expected to be removed by the IPv6-specific kernel API.
 	 * Otherwise, the kernel will detect some inconsistency and bark it.
 	 */
 	in6_ifdetach(ifp);
 #endif
 	if_purgemaddrs(ifp);
 
 	/* Announce that the interface is gone. */
 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
 	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
 
 	if (!vmove) {
 		/*
 		 * Prevent further calls into the device driver via ifnet.
 		 */
 		if_dead(ifp);
 
 		/*
 		 * Clean up all addresses.
 		 */
 		IF_ADDR_WLOCK(ifp);
 		if (!CK_STAILQ_EMPTY(&ifp->if_addrhead)) {
 			ifa = CK_STAILQ_FIRST(&ifp->if_addrhead);
 			CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
 			IF_ADDR_WUNLOCK(ifp);
 			ifa_free(ifa);
 		} else
 			IF_ADDR_WUNLOCK(ifp);
 	}
 
 	rt_flushifroutes(ifp);
 
 #ifdef VIMAGE
 finish_vnet_shutdown:
 #endif
 	/*
 	 * We cannot hold the lock over dom_ifdetach calls as they might
 	 * sleep, for example trying to drain a callout, thus open up the
 	 * theoretical race with re-attaching.
 	 */
 	IF_AFDATA_LOCK(ifp);
 	i = ifp->if_afdata_initialized;
 	ifp->if_afdata_initialized = 0;
 	IF_AFDATA_UNLOCK(ifp);
 	for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
 			(*dp->dom_ifdetach)(ifp,
 			    ifp->if_afdata[dp->dom_family]);
 			ifp->if_afdata[dp->dom_family] = NULL;
 		}
 	}
 
 	return (0);
 }
 
 #ifdef VIMAGE
 /*
  * if_vmove() performs a limited version of if_detach() in current
  * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
  * An attempt is made to shrink if_index in current vnet, find an
  * unused if_index in target vnet and calls if_grow() if necessary,
  * and finally find an unused if_xname for the target vnet.
  */
 static void
 if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
 {
 	struct if_clone *ifc;
 	u_int bif_dlt, bif_hdrlen;
 	void *old;
 	int rc;
 
  	/*
 	 * if_detach_internal() will call the eventhandler to notify
 	 * interface departure.  That will detach if_bpf.  We need to
 	 * safe the dlt and hdrlen so we can re-attach it later.
 	 */
 	bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
 
 	/*
 	 * Detach from current vnet, but preserve LLADDR info, do not
 	 * mark as dead etc. so that the ifnet can be reattached later.
 	 * If we cannot find it, we lost the race to someone else.
 	 */
 	rc = if_detach_internal(ifp, 1, &ifc);
 	if (rc != 0)
 		return;
 
 	/*
 	 * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
 	 * the if_index for that vnet if possible.
 	 *
 	 * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
 	 * or we'd lock on one vnet and unlock on another.
 	 */
 	IFNET_WLOCK();
 	ifindex_free_locked(ifp->if_index);
 	IFNET_WUNLOCK();
 
 	/*
 	 * Perform interface-specific reassignment tasks, if provided by
 	 * the driver.
 	 */
 	if (ifp->if_reassign != NULL)
 		ifp->if_reassign(ifp, new_vnet, NULL);
 
 	/*
 	 * Switch to the context of the target vnet.
 	 */
 	CURVNET_SET_QUIET(new_vnet);
  restart:
 	IFNET_WLOCK();
 	ifp->if_index = ifindex_alloc(&old);
 	if (__predict_false(ifp->if_index == USHRT_MAX)) {
 		IFNET_WUNLOCK();
 		epoch_wait_preempt(net_epoch_preempt);
 		free(old, M_IFNET);
 		goto restart;
 	}
 	ifnet_setbyindex(ifp->if_index, ifp);
 	IFNET_WUNLOCK();
 
 	if_attach_internal(ifp, 1, ifc);
 
 	if (ifp->if_bpf == NULL)
 		bpfattach(ifp, bif_dlt, bif_hdrlen);
 
 	CURVNET_RESTORE();
 }
 
 /*
  * Move an ifnet to or from another child prison/vnet, specified by the jail id.
  */
 static int
 if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
 {
 	struct prison *pr;
 	struct ifnet *difp;
 	int shutdown;
 
 	/* Try to find the prison within our visibility. */
 	sx_slock(&allprison_lock);
 	pr = prison_find_child(td->td_ucred->cr_prison, jid);
 	sx_sunlock(&allprison_lock);
 	if (pr == NULL)
 		return (ENXIO);
 	prison_hold_locked(pr);
 	mtx_unlock(&pr->pr_mtx);
 
 	/* Do not try to move the iface from and to the same prison. */
 	if (pr->pr_vnet == ifp->if_vnet) {
 		prison_free(pr);
 		return (EEXIST);
 	}
 
 	/* Make sure the named iface does not exists in the dst. prison/vnet. */
 	/* XXX Lock interfaces to avoid races. */
 	CURVNET_SET_QUIET(pr->pr_vnet);
 	difp = ifunit(ifname);
 	if (difp != NULL) {
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (EEXIST);
 	}
 
 	/* Make sure the VNET is stable. */
 	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
 		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
 	if (shutdown) {
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (EBUSY);
 	}
 	CURVNET_RESTORE();
 
 	/* Move the interface into the child jail/vnet. */
 	if_vmove(ifp, pr->pr_vnet);
 
 	/* Report the new if_xname back to the userland. */
 	sprintf(ifname, "%s", ifp->if_xname);
 
 	prison_free(pr);
 	return (0);
 }
 
 static int
 if_vmove_reclaim(struct thread *td, char *ifname, int jid)
 {
 	struct prison *pr;
 	struct vnet *vnet_dst;
 	struct ifnet *ifp;
  	int shutdown;
 
 	/* Try to find the prison within our visibility. */
 	sx_slock(&allprison_lock);
 	pr = prison_find_child(td->td_ucred->cr_prison, jid);
 	sx_sunlock(&allprison_lock);
 	if (pr == NULL)
 		return (ENXIO);
 	prison_hold_locked(pr);
 	mtx_unlock(&pr->pr_mtx);
 
 	/* Make sure the named iface exists in the source prison/vnet. */
 	CURVNET_SET(pr->pr_vnet);
 	ifp = ifunit(ifname);		/* XXX Lock to avoid races. */
 	if (ifp == NULL) {
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (ENXIO);
 	}
 
 	/* Do not try to move the iface from and to the same prison. */
 	vnet_dst = TD_TO_VNET(td);
 	if (vnet_dst == ifp->if_vnet) {
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (EEXIST);
 	}
 
 	/* Make sure the VNET is stable. */
 	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
 		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
 	if (shutdown) {
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (EBUSY);
 	}
 
 	/* Get interface back from child jail/vnet. */
 	if_vmove(ifp, vnet_dst);
 	CURVNET_RESTORE();
 
 	/* Report the new if_xname back to the userland. */
 	sprintf(ifname, "%s", ifp->if_xname);
 
 	prison_free(pr);
 	return (0);
 }
 #endif /* VIMAGE */
 
 /*
  * Add a group to an interface
  */
 int
 if_addgroup(struct ifnet *ifp, const char *groupname)
 {
 	struct ifg_list		*ifgl;
 	struct ifg_group	*ifg = NULL;
 	struct ifg_member	*ifgm;
 	int 			 new = 0;
 
 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
 	    groupname[strlen(groupname) - 1] <= '9')
 		return (EINVAL);
 
 	IFNET_WLOCK();
 	CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
 			IFNET_WUNLOCK();
 			return (EEXIST);
 		}
 
 	if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
 	    M_NOWAIT)) == NULL) {
 	    	IFNET_WUNLOCK();
 		return (ENOMEM);
 	}
 
 	if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
 	    M_TEMP, M_NOWAIT)) == NULL) {
 		free(ifgl, M_TEMP);
 		IFNET_WUNLOCK();
 		return (ENOMEM);
 	}
 
 	CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
 		if (!strcmp(ifg->ifg_group, groupname))
 			break;
 
 	if (ifg == NULL) {
 		if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
 		    M_TEMP, M_NOWAIT)) == NULL) {
 			free(ifgl, M_TEMP);
 			free(ifgm, M_TEMP);
 			IFNET_WUNLOCK();
 			return (ENOMEM);
 		}
 		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
 		ifg->ifg_refcnt = 0;
 		CK_STAILQ_INIT(&ifg->ifg_members);
 		CK_STAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
 		new = 1;
 	}
 
 	ifg->ifg_refcnt++;
 	ifgl->ifgl_group = ifg;
 	ifgm->ifgm_ifp = ifp;
 
 	IF_ADDR_WLOCK(ifp);
 	CK_STAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
 	CK_STAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
 	IF_ADDR_WUNLOCK(ifp);
 
 	IFNET_WUNLOCK();
 
 	if (new)
 		EVENTHANDLER_INVOKE(group_attach_event, ifg);
 	EVENTHANDLER_INVOKE(group_change_event, groupname);
 
 	return (0);
 }
 
 /*
  * Remove a group from an interface
  */
 int
 if_delgroup(struct ifnet *ifp, const char *groupname)
 {
 	struct ifg_list		*ifgl;
 	struct ifg_member	*ifgm;
 	int freeifgl;
 
 	IFNET_WLOCK();
 	CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
 			break;
 	if (ifgl == NULL) {
 		IFNET_WUNLOCK();
 		return (ENOENT);
 	}
 
 	freeifgl = 0;
 	IF_ADDR_WLOCK(ifp);
 	CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next);
 	IF_ADDR_WUNLOCK(ifp);
 
 	CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
 		if (ifgm->ifgm_ifp == ifp)
 			break;
 
 	if (ifgm != NULL)
 		CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifg_member, ifgm_next);
 
 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
 		CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group, ifg_next);
 		freeifgl = 1;
 	}
 	IFNET_WUNLOCK();
 
 	epoch_wait_preempt(net_epoch_preempt);
 	if (freeifgl) {
 		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
 		free(ifgl->ifgl_group, M_TEMP);
 	}
 	free(ifgm, M_TEMP);
 	free(ifgl, M_TEMP);
 
 	EVENTHANDLER_INVOKE(group_change_event, groupname);
 
 	return (0);
 }
 
 /*
  * Remove an interface from all groups
  */
 static void
 if_delgroups(struct ifnet *ifp)
 {
 	struct ifg_list		*ifgl;
 	struct ifg_member	*ifgm;
 	char groupname[IFNAMSIZ];
 	int ifglfree;
 
 	IFNET_WLOCK();
 	while (!CK_STAILQ_EMPTY(&ifp->if_groups)) {
 		ifgl = CK_STAILQ_FIRST(&ifp->if_groups);
 
 		strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
 
 		IF_ADDR_WLOCK(ifp);
 		CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next);
 		IF_ADDR_WUNLOCK(ifp);
 
 		CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
 			if (ifgm->ifgm_ifp == ifp)
 				break;
 
 		if (ifgm != NULL)
 			CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifg_member,
 			    ifgm_next);
 		ifglfree = 0;
 		if (--ifgl->ifgl_group->ifg_refcnt == 0) {
 			CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group, ifg_next);
 			ifglfree = 1;
 		}
 
 		IFNET_WUNLOCK();
 		epoch_wait_preempt(net_epoch_preempt);
 		free(ifgm, M_TEMP);
 		if (ifglfree) {
 			EVENTHANDLER_INVOKE(group_detach_event,
 								ifgl->ifgl_group);
 			free(ifgl->ifgl_group, M_TEMP);
 		}
 		EVENTHANDLER_INVOKE(group_change_event, groupname);
 
 		IFNET_WLOCK();
 	}
 	IFNET_WUNLOCK();
 }
 
 static char *
 ifgr_group_get(void *ifgrp)
 {
 	union ifgroupreq_union *ifgrup;
 
 	ifgrup = ifgrp;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		return (&ifgrup->ifgr32.ifgr_ifgru.ifgru_group[0]);
 #endif
 	return (&ifgrup->ifgr.ifgr_ifgru.ifgru_group[0]);
 }
 
 static struct ifg_req *
 ifgr_groups_get(void *ifgrp)
 {
 	union ifgroupreq_union *ifgrup;
 
 	ifgrup = ifgrp;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		return ((struct ifg_req *)(uintptr_t)
 		    ifgrup->ifgr32.ifgr_ifgru.ifgru_groups);
 #endif
 	return (ifgrup->ifgr.ifgr_ifgru.ifgru_groups);
 }
 
 /*
  * Stores all groups from an interface in memory pointed to by ifgr.
  */
 static int
 if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp)
 {
 	struct epoch_tracker	 et;
 	int			 len, error;
 	struct ifg_list		*ifgl;
 	struct ifg_req		 ifgrq, *ifgp;
 
 	if (ifgr->ifgr_len == 0) {
 		NET_EPOCH_ENTER(et);
 		CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 			ifgr->ifgr_len += sizeof(struct ifg_req);
 		NET_EPOCH_EXIT(et);
 		return (0);
 	}
 
 	len = ifgr->ifgr_len;
 	ifgp = ifgr_groups_get(ifgr);
 	/* XXX: wire */
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
 		if (len < sizeof(ifgrq)) {
 			NET_EPOCH_EXIT(et);
 			return (EINVAL);
 		}
 		bzero(&ifgrq, sizeof ifgrq);
 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
 		    sizeof(ifgrq.ifgrq_group));
 		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
 		    	NET_EPOCH_EXIT(et);
 			return (error);
 		}
 		len -= sizeof(ifgrq);
 		ifgp++;
 	}
 	NET_EPOCH_EXIT(et);
 
 	return (0);
 }
 
 /*
  * Stores all members of a group in memory pointed to by igfr
  */
 static int
 if_getgroupmembers(struct ifgroupreq *ifgr)
 {
 	struct ifg_group	*ifg;
 	struct ifg_member	*ifgm;
 	struct ifg_req		 ifgrq, *ifgp;
 	int			 len, error;
 
 	IFNET_RLOCK();
 	CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
 			break;
 	if (ifg == NULL) {
 		IFNET_RUNLOCK();
 		return (ENOENT);
 	}
 
 	if (ifgr->ifgr_len == 0) {
 		CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
 			ifgr->ifgr_len += sizeof(ifgrq);
 		IFNET_RUNLOCK();
 		return (0);
 	}
 
 	len = ifgr->ifgr_len;
 	ifgp = ifgr_groups_get(ifgr);
 	CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
 		if (len < sizeof(ifgrq)) {
 			IFNET_RUNLOCK();
 			return (EINVAL);
 		}
 		bzero(&ifgrq, sizeof ifgrq);
 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
 		    sizeof(ifgrq.ifgrq_member));
 		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
 			IFNET_RUNLOCK();
 			return (error);
 		}
 		len -= sizeof(ifgrq);
 		ifgp++;
 	}
 	IFNET_RUNLOCK();
 
 	return (0);
 }
 
 /*
  * Return counter values from counter(9)s stored in ifnet.
  */
 uint64_t
 if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
 {
 
 	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
 
 	return (counter_u64_fetch(ifp->if_counters[cnt]));
 }
 
 /*
  * Increase an ifnet counter. Usually used for counters shared
  * between the stack and a driver, but function supports them all.
  */
 void
 if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
 {
 
 	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
 
 	counter_u64_add(ifp->if_counters[cnt], inc);
 }
 
 /*
  * Copy data from ifnet to userland API structure if_data.
  */
 void
 if_data_copy(struct ifnet *ifp, struct if_data *ifd)
 {
 
 	ifd->ifi_type = ifp->if_type;
 	ifd->ifi_physical = 0;
 	ifd->ifi_addrlen = ifp->if_addrlen;
 	ifd->ifi_hdrlen = ifp->if_hdrlen;
 	ifd->ifi_link_state = ifp->if_link_state;
 	ifd->ifi_vhid = 0;
 	ifd->ifi_datalen = sizeof(struct if_data);
 	ifd->ifi_mtu = ifp->if_mtu;
 	ifd->ifi_metric = ifp->if_metric;
 	ifd->ifi_baudrate = ifp->if_baudrate;
 	ifd->ifi_hwassist = ifp->if_hwassist;
 	ifd->ifi_epoch = ifp->if_epoch;
 	ifd->ifi_lastchange = ifp->if_lastchange;
 
 	ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
 	ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
 	ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
 	ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
 	ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS);
 	ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
 	ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
 	ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
 	ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS);
 	ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
 	ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
 	ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
 }
 
 /*
  * Wrapper functions for struct ifnet address list locking macros.  These are
  * used by kernel modules to avoid encoding programming interface or binary
  * interface assumptions that may be violated when kernel-internal locking
  * approaches change.
  */
 void
 if_addr_rlock(struct ifnet *ifp)
 {
 
 	epoch_enter_preempt(net_epoch_preempt, curthread->td_et);
 }
 
 void
 if_addr_runlock(struct ifnet *ifp)
 {
 
 	epoch_exit_preempt(net_epoch_preempt, curthread->td_et);
 }
 
 void
 if_maddr_rlock(if_t ifp)
 {
 
 	epoch_enter_preempt(net_epoch_preempt, curthread->td_et);
 }
 
 void
 if_maddr_runlock(if_t ifp)
 {
 
 	epoch_exit_preempt(net_epoch_preempt, curthread->td_et);
 }
 
 /*
  * Initialization, destruction and refcounting functions for ifaddrs.
  */
 struct ifaddr *
 ifa_alloc(size_t size, int flags)
 {
 	struct ifaddr *ifa;
 
 	KASSERT(size >= sizeof(struct ifaddr),
 	    ("%s: invalid size %zu", __func__, size));
 
 	ifa = malloc(size, M_IFADDR, M_ZERO | flags);
 	if (ifa == NULL)
 		return (NULL);
 
 	if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
 		goto fail;
 	if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
 		goto fail;
 	if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
 		goto fail;
 	if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
 		goto fail;
 
 	refcount_init(&ifa->ifa_refcnt, 1);
 
 	return (ifa);
 
 fail:
 	/* free(NULL) is okay */
 	counter_u64_free(ifa->ifa_opackets);
 	counter_u64_free(ifa->ifa_ipackets);
 	counter_u64_free(ifa->ifa_obytes);
 	counter_u64_free(ifa->ifa_ibytes);
 	free(ifa, M_IFADDR);
 
 	return (NULL);
 }
 
 void
 ifa_ref(struct ifaddr *ifa)
 {
 
 	refcount_acquire(&ifa->ifa_refcnt);
 }
 
 static void
 ifa_destroy(epoch_context_t ctx)
 {
 	struct ifaddr *ifa;
 
 	ifa = __containerof(ctx, struct ifaddr, ifa_epoch_ctx);
 	counter_u64_free(ifa->ifa_opackets);
 	counter_u64_free(ifa->ifa_ipackets);
 	counter_u64_free(ifa->ifa_obytes);
 	counter_u64_free(ifa->ifa_ibytes);
 	free(ifa, M_IFADDR);
 }
 
 void
 ifa_free(struct ifaddr *ifa)
 {
 
 	if (refcount_release(&ifa->ifa_refcnt))
 		epoch_call(net_epoch_preempt, &ifa->ifa_epoch_ctx, ifa_destroy);
 }
 
 
 static int
 ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
     struct sockaddr *ia)
 {
 	struct epoch_tracker et;
 	int error;
 	struct rt_addrinfo info;
 	struct sockaddr_dl null_sdl;
 	struct ifnet *ifp;
 
 	ifp = ifa->ifa_ifp;
 
 	bzero(&info, sizeof(info));
 	if (cmd != RTM_DELETE)
 		info.rti_ifp = V_loif;
 	if (cmd == RTM_ADD) {
 		/* explicitly specify (loopback) ifa */
 		if (info.rti_ifp != NULL) {
 			NET_EPOCH_ENTER(et);
 			info.rti_ifa = ifaof_ifpforaddr(ifa->ifa_addr, info.rti_ifp);
 			if (info.rti_ifa != NULL)
 				ifa_ref(info.rti_ifa);
 			NET_EPOCH_EXIT(et);
 		}
 	}
 	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
 	info.rti_info[RTAX_DST] = ia;
 	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
 	link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type);
 
 	error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib);
 
 	if (error != 0 &&
 	    !(cmd == RTM_ADD && error == EEXIST) &&
 	    !(cmd == RTM_DELETE && error == ENOENT))
 		if_printf(ifp, "%s failed: %d\n", otype, error);
 
 	return (error);
 }
 
 int
 ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
 {
 
 	return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia));
 }
 
 int
 ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
 {
 
 	return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia));
 }
 
 int
 ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
 {
 
 	return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia));
 }
 
 /*
  * XXX: Because sockaddr_dl has deeper structure than the sockaddr
  * structs used to represent other address families, it is necessary
  * to perform a different comparison.
  */
 
 #define	sa_dl_equal(a1, a2)	\
 	((((const struct sockaddr_dl *)(a1))->sdl_len ==		\
 	 ((const struct sockaddr_dl *)(a2))->sdl_len) &&		\
 	 (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)),		\
 	       CLLADDR((const struct sockaddr_dl *)(a2)),		\
 	       ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
 
 /*
  * Locate an interface based on a complete address.
  */
 /*ARGSUSED*/
 struct ifaddr *
 ifa_ifwithaddr(const struct sockaddr *addr)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	MPASS(in_epoch(net_epoch_preempt));
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if (sa_equal(addr, ifa->ifa_addr)) {
 				goto done;
 			}
 			/* IP6 doesn't have broadcast */
 			if ((ifp->if_flags & IFF_BROADCAST) &&
 			    ifa->ifa_broadaddr &&
 			    ifa->ifa_broadaddr->sa_len != 0 &&
 			    sa_equal(ifa->ifa_broadaddr, addr)) {
 				goto done;
 			}
 		}
 	}
 	ifa = NULL;
 done:
 	return (ifa);
 }
 
 int
 ifa_ifwithaddr_check(const struct sockaddr *addr)
 {
 	struct epoch_tracker et;
 	int rc;
 
 	NET_EPOCH_ENTER(et);
 	rc = (ifa_ifwithaddr(addr) != NULL);
 	NET_EPOCH_EXIT(et);
 	return (rc);
 }
 
 /*
  * Locate an interface based on the broadcast address.
  */
 /* ARGSUSED */
 struct ifaddr *
 ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	MPASS(in_epoch(net_epoch_preempt));
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
 			continue;
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if ((ifp->if_flags & IFF_BROADCAST) &&
 			    ifa->ifa_broadaddr &&
 			    ifa->ifa_broadaddr->sa_len != 0 &&
 			    sa_equal(ifa->ifa_broadaddr, addr)) {
 				goto done;
 			}
 		}
 	}
 	ifa = NULL;
 done:
 	return (ifa);
 }
 
 /*
  * Locate the point to point interface with a given destination address.
  */
 /*ARGSUSED*/
 struct ifaddr *
 ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	MPASS(in_epoch(net_epoch_preempt));
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
 			continue;
 		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
 			continue;
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if (ifa->ifa_dstaddr != NULL &&
 			    sa_equal(addr, ifa->ifa_dstaddr)) {
 				goto done;
 			}
 		}
 	}
 	ifa = NULL;
 done:
 	return (ifa);
 }
 
 /*
  * Find an interface on a specific network.  If many, choice
  * is most specific found.
  */
 struct ifaddr *
 ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct ifaddr *ifa_maybe = NULL;
 	u_int af = addr->sa_family;
 	const char *addr_data = addr->sa_data, *cplim;
 
 	MPASS(in_epoch(net_epoch_preempt));
 	/*
 	 * AF_LINK addresses can be looked up directly by their index number,
 	 * so do that if we can.
 	 */
 	if (af == AF_LINK) {
 	    const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr;
 	    if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
 		return (ifaddr_byindex(sdl->sdl_index));
 	}
 
 	/*
 	 * Scan though each interface, looking for ones that have addresses
 	 * in this address family and the requested fib.
 	 */
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
 			continue;
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			const char *cp, *cp2, *cp3;
 
 			if (ifa->ifa_addr->sa_family != af)
 next:				continue;
 			if (af == AF_INET && 
 			    ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) {
 				/*
 				 * This is a bit broken as it doesn't
 				 * take into account that the remote end may
 				 * be a single node in the network we are
 				 * looking for.
 				 * The trouble is that we don't know the
 				 * netmask for the remote end.
 				 */
 				if (ifa->ifa_dstaddr != NULL &&
 				    sa_equal(addr, ifa->ifa_dstaddr)) {
 					goto done;
 				}
 			} else {
 				/*
 				 * Scan all the bits in the ifa's address.
 				 * If a bit dissagrees with what we are
 				 * looking for, mask it with the netmask
 				 * to see if it really matters.
 				 * (A byte at a time)
 				 */
 				if (ifa->ifa_netmask == 0)
 					continue;
 				cp = addr_data;
 				cp2 = ifa->ifa_addr->sa_data;
 				cp3 = ifa->ifa_netmask->sa_data;
 				cplim = ifa->ifa_netmask->sa_len
 					+ (char *)ifa->ifa_netmask;
 				while (cp3 < cplim)
 					if ((*cp++ ^ *cp2++) & *cp3++)
 						goto next; /* next address! */
 				/*
 				 * If the netmask of what we just found
 				 * is more specific than what we had before
 				 * (if we had one), or if the virtual status
 				 * of new prefix is better than of the old one,
 				 * then remember the new one before continuing
 				 * to search for an even better one.
 				 */
 				if (ifa_maybe == NULL ||
 				    ifa_preferred(ifa_maybe, ifa) ||
 				    rn_refines((caddr_t)ifa->ifa_netmask,
 				    (caddr_t)ifa_maybe->ifa_netmask)) {
 					ifa_maybe = ifa;
 				}
 			}
 		}
 	}
 	ifa = ifa_maybe;
 	ifa_maybe = NULL;
 done:
 	return (ifa);
 }
 
 /*
  * Find an interface address specific to an interface best matching
  * a given address.
  */
 struct ifaddr *
 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 	const char *cp, *cp2, *cp3;
 	char *cplim;
 	struct ifaddr *ifa_maybe = NULL;
 	u_int af = addr->sa_family;
 
 	if (af >= AF_MAX)
 		return (NULL);
 
 	MPASS(in_epoch(net_epoch_preempt));
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != af)
 			continue;
 		if (ifa_maybe == NULL)
 			ifa_maybe = ifa;
 		if (ifa->ifa_netmask == 0) {
 			if (sa_equal(addr, ifa->ifa_addr) ||
 			    (ifa->ifa_dstaddr &&
 			    sa_equal(addr, ifa->ifa_dstaddr)))
 				goto done;
 			continue;
 		}
 		if (ifp->if_flags & IFF_POINTOPOINT) {
 			if (sa_equal(addr, ifa->ifa_dstaddr))
 				goto done;
 		} else {
 			cp = addr->sa_data;
 			cp2 = ifa->ifa_addr->sa_data;
 			cp3 = ifa->ifa_netmask->sa_data;
 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
 			for (; cp3 < cplim; cp3++)
 				if ((*cp++ ^ *cp2++) & *cp3)
 					break;
 			if (cp3 == cplim)
 				goto done;
 		}
 	}
 	ifa = ifa_maybe;
 done:
 	return (ifa);
 }
 
 /*
  * See whether new ifa is better than current one:
  * 1) A non-virtual one is preferred over virtual.
  * 2) A virtual in master state preferred over any other state.
  *
  * Used in several address selecting functions.
  */
 int
 ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
 {
 
 	return (cur->ifa_carp && (!next->ifa_carp ||
 	    ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
 }
 
 struct sockaddr_dl *
 link_alloc_sdl(size_t size, int flags)
 {
 
 	return (malloc(size, M_TEMP, flags));
 }
 
 void
 link_free_sdl(struct sockaddr *sa)
 {
 	free(sa, M_TEMP);
 }
 
 /*
  * Fills in given sdl with interface basic info.
  * Returns pointer to filled sdl.
  */
 struct sockaddr_dl *
 link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype)
 {
 	struct sockaddr_dl *sdl;
 
 	sdl = (struct sockaddr_dl *)paddr;
 	memset(sdl, 0, sizeof(struct sockaddr_dl));
 	sdl->sdl_len = sizeof(struct sockaddr_dl);
 	sdl->sdl_family = AF_LINK;
 	sdl->sdl_index = ifp->if_index;
 	sdl->sdl_type = iftype;
 
 	return (sdl);
 }
 
 /*
  * Mark an interface down and notify protocols of
  * the transition.
  */
 static void
 if_unroute(struct ifnet *ifp, int flag, int fam)
 {
 	struct ifaddr *ifa;
 
 	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
 
 	ifp->if_flags &= ~flag;
 	getmicrotime(&ifp->if_lastchange);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
 			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
 	ifp->if_qflush(ifp);
 
 	if (ifp->if_carp)
 		(*carp_linkstate_p)(ifp);
 	rt_ifmsg(ifp);
 }
 
 /*
  * Mark an interface up and notify protocols of
  * the transition.
  */
 static void
 if_route(struct ifnet *ifp, int flag, int fam)
 {
 	struct ifaddr *ifa;
 
 	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
 
 	ifp->if_flags |= flag;
 	getmicrotime(&ifp->if_lastchange);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
 			pfctlinput(PRC_IFUP, ifa->ifa_addr);
 	if (ifp->if_carp)
 		(*carp_linkstate_p)(ifp);
 	rt_ifmsg(ifp);
 #ifdef INET6
 	in6_if_up(ifp);
 #endif
 }
 
 void	(*vlan_link_state_p)(struct ifnet *);	/* XXX: private from if_vlan */
 void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
 struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
 struct	ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
 int	(*vlan_tag_p)(struct ifnet *, uint16_t *);
 int	(*vlan_pcp_p)(struct ifnet *, uint16_t *);
 int	(*vlan_setcookie_p)(struct ifnet *, void *);
 void	*(*vlan_cookie_p)(struct ifnet *);
 
 /*
  * Handle a change in the interface link state. To avoid LORs
  * between driver lock and upper layer locks, as well as possible
  * recursions, we post event to taskqueue, and all job
  * is done in static do_link_state_change().
  */
 void
 if_link_state_change(struct ifnet *ifp, int link_state)
 {
 	/* Return if state hasn't changed. */
 	if (ifp->if_link_state == link_state)
 		return;
 
 	ifp->if_link_state = link_state;
 
 	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
 }
 
 static void
 do_link_state_change(void *arg, int pending)
 {
 	struct ifnet *ifp = (struct ifnet *)arg;
 	int link_state = ifp->if_link_state;
 	CURVNET_SET(ifp->if_vnet);
 
 	/* Notify that the link state has changed. */
 	rt_ifmsg(ifp);
 	if (ifp->if_vlantrunk != NULL)
 		(*vlan_link_state_p)(ifp);
 
 	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
 	    ifp->if_l2com != NULL)
 		(*ng_ether_link_state_p)(ifp, link_state);
 	if (ifp->if_carp)
 		(*carp_linkstate_p)(ifp);
 	if (ifp->if_bridge)
 		ifp->if_bridge_linkstate(ifp);
 	if (ifp->if_lagg)
 		(*lagg_linkstate_p)(ifp, link_state);
 
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("IFNET", ifp->if_xname,
 		    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
 		    NULL);
 	if (pending > 1)
 		if_printf(ifp, "%d link states coalesced\n", pending);
 	if (log_link_state_change)
 		if_printf(ifp, "link state changed to %s\n",
 		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
 	EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
 	CURVNET_RESTORE();
 }
 
 /*
  * Mark an interface down and notify protocols of
  * the transition.
  */
 void
 if_down(struct ifnet *ifp)
 {
 
 	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
 }
 
 /*
  * Mark an interface up and notify protocols of
  * the transition.
  */
 void
 if_up(struct ifnet *ifp)
 {
 
 	if_route(ifp, IFF_UP, AF_UNSPEC);
 	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
 }
 
 /*
  * Flush an interface queue.
  */
 void
 if_qflush(struct ifnet *ifp)
 {
 	struct mbuf *m, *n;
 	struct ifaltq *ifq;
 	
 	ifq = &ifp->if_snd;
 	IFQ_LOCK(ifq);
 #ifdef ALTQ
 	if (ALTQ_IS_ENABLED(ifq))
 		ALTQ_PURGE(ifq);
 #endif
 	n = ifq->ifq_head;
 	while ((m = n) != NULL) {
 		n = m->m_nextpkt;
 		m_freem(m);
 	}
 	ifq->ifq_head = 0;
 	ifq->ifq_tail = 0;
 	ifq->ifq_len = 0;
 	IFQ_UNLOCK(ifq);
 }
 
 /*
  * Map interface name to interface structure pointer, with or without
  * returning a reference.
  */
 struct ifnet *
 ifunit_ref(const char *name)
 {
 	struct epoch_tracker et;
 	struct ifnet *ifp;
 
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
 		    !(ifp->if_flags & IFF_DYING))
 			break;
 	}
 	if (ifp != NULL)
 		if_ref(ifp);
 	NET_EPOCH_EXIT(et);
 	return (ifp);
 }
 
 struct ifnet *
 ifunit(const char *name)
 {
 	struct epoch_tracker et;
 	struct ifnet *ifp;
 
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
 			break;
 	}
 	NET_EPOCH_EXIT(et);
 	return (ifp);
 }
 
 static void *
 ifr_buffer_get_buffer(void *data)
 {
 	union ifreq_union *ifrup;
 
 	ifrup = data;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		return ((void *)(uintptr_t)
 		    ifrup->ifr32.ifr_ifru.ifru_buffer.buffer);
 #endif
 	return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer);
 }
 
 static void
 ifr_buffer_set_buffer_null(void *data)
 {
 	union ifreq_union *ifrup;
 
 	ifrup = data;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0;
 	else
 #endif
 		ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL;
 }
 
 static size_t
 ifr_buffer_get_length(void *data)
 {
 	union ifreq_union *ifrup;
 
 	ifrup = data;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		return (ifrup->ifr32.ifr_ifru.ifru_buffer.length);
 #endif
 	return (ifrup->ifr.ifr_ifru.ifru_buffer.length);
 }
 
 static void
 ifr_buffer_set_length(void *data, size_t len)
 {
 	union ifreq_union *ifrup;
 
 	ifrup = data;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		ifrup->ifr32.ifr_ifru.ifru_buffer.length = len;
 	else
 #endif
 		ifrup->ifr.ifr_ifru.ifru_buffer.length = len;
 }
 
 void *
 ifr_data_get_ptr(void *ifrp)
 {
 	union ifreq_union *ifrup;
 
 	ifrup = ifrp;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		return ((void *)(uintptr_t)
 		    ifrup->ifr32.ifr_ifru.ifru_data);
 #endif
 		return (ifrup->ifr.ifr_ifru.ifru_data);
 }
 
 /*
  * Hardware specific interface ioctls.
  */
 int
 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 {
 	struct ifreq *ifr;
 	int error = 0, do_ifup = 0;
 	int new_flags, temp_flags;
 	size_t namelen, onamelen;
 	size_t descrlen;
 	char *descrbuf, *odescrbuf;
 	char new_name[IFNAMSIZ];
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 
 	ifr = (struct ifreq *)data;
 	switch (cmd) {
 	case SIOCGIFINDEX:
 		ifr->ifr_index = ifp->if_index;
 		break;
 
 	case SIOCGIFFLAGS:
 		temp_flags = ifp->if_flags | ifp->if_drv_flags;
 		ifr->ifr_flags = temp_flags & 0xffff;
 		ifr->ifr_flagshigh = temp_flags >> 16;
 		break;
 
 	case SIOCGIFCAP:
 		ifr->ifr_reqcap = ifp->if_capabilities;
 		ifr->ifr_curcap = ifp->if_capenable;
 		break;
 
 #ifdef MAC
 	case SIOCGIFMAC:
 		error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
 		break;
 #endif
 
 	case SIOCGIFMETRIC:
 		ifr->ifr_metric = ifp->if_metric;
 		break;
 
 	case SIOCGIFMTU:
 		ifr->ifr_mtu = ifp->if_mtu;
 		break;
 
 	case SIOCGIFPHYS:
 		/* XXXGL: did this ever worked? */
 		ifr->ifr_phys = 0;
 		break;
 
 	case SIOCGIFDESCR:
 		error = 0;
 		sx_slock(&ifdescr_sx);
 		if (ifp->if_description == NULL)
 			error = ENOMSG;
 		else {
 			/* space for terminating nul */
 			descrlen = strlen(ifp->if_description) + 1;
 			if (ifr_buffer_get_length(ifr) < descrlen)
 				ifr_buffer_set_buffer_null(ifr);
 			else
 				error = copyout(ifp->if_description,
 				    ifr_buffer_get_buffer(ifr), descrlen);
 			ifr_buffer_set_length(ifr, descrlen);
 		}
 		sx_sunlock(&ifdescr_sx);
 		break;
 
 	case SIOCSIFDESCR:
 		error = priv_check(td, PRIV_NET_SETIFDESCR);
 		if (error)
 			return (error);
 
 		/*
 		 * Copy only (length-1) bytes to make sure that
 		 * if_description is always nul terminated.  The
 		 * length parameter is supposed to count the
 		 * terminating nul in.
 		 */
 		if (ifr_buffer_get_length(ifr) > ifdescr_maxlen)
 			return (ENAMETOOLONG);
 		else if (ifr_buffer_get_length(ifr) == 0)
 			descrbuf = NULL;
 		else {
 			descrbuf = malloc(ifr_buffer_get_length(ifr),
 			    M_IFDESCR, M_WAITOK | M_ZERO);
 			error = copyin(ifr_buffer_get_buffer(ifr), descrbuf,
 			    ifr_buffer_get_length(ifr) - 1);
 			if (error) {
 				free(descrbuf, M_IFDESCR);
 				break;
 			}
 		}
 
 		sx_xlock(&ifdescr_sx);
 		odescrbuf = ifp->if_description;
 		ifp->if_description = descrbuf;
 		sx_xunlock(&ifdescr_sx);
 
 		getmicrotime(&ifp->if_lastchange);
 		free(odescrbuf, M_IFDESCR);
 		break;
 
 	case SIOCGIFFIB:
 		ifr->ifr_fib = ifp->if_fib;
 		break;
 
 	case SIOCSIFFIB:
 		error = priv_check(td, PRIV_NET_SETIFFIB);
 		if (error)
 			return (error);
 		if (ifr->ifr_fib >= rt_numfibs)
 			return (EINVAL);
 
 		ifp->if_fib = ifr->ifr_fib;
 		break;
 
 	case SIOCSIFFLAGS:
 		error = priv_check(td, PRIV_NET_SETIFFLAGS);
 		if (error)
 			return (error);
 		/*
 		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
 		 * check, so we don't need special handling here yet.
 		 */
 		new_flags = (ifr->ifr_flags & 0xffff) |
 		    (ifr->ifr_flagshigh << 16);
 		if (ifp->if_flags & IFF_UP &&
 		    (new_flags & IFF_UP) == 0) {
 			if_down(ifp);
 		} else if (new_flags & IFF_UP &&
 		    (ifp->if_flags & IFF_UP) == 0) {
 			do_ifup = 1;
 		}
 		/* See if permanently promiscuous mode bit is about to flip */
 		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
 			if (new_flags & IFF_PPROMISC)
 				ifp->if_flags |= IFF_PROMISC;
 			else if (ifp->if_pcount == 0)
 				ifp->if_flags &= ~IFF_PROMISC;
 			if (log_promisc_mode_change)
                                 if_printf(ifp, "permanently promiscuous mode %s\n",
                                     ((new_flags & IFF_PPROMISC) ?
                                      "enabled" : "disabled"));
 		}
 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
 			(new_flags &~ IFF_CANTCHANGE);
 		if (ifp->if_ioctl) {
 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
 		}
 		if (do_ifup)
 			if_up(ifp);
 		getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFCAP:
 		error = priv_check(td, PRIV_NET_SETIFCAP);
 		if (error)
 			return (error);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
 			return (EINVAL);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 #ifdef MAC
 	case SIOCSIFMAC:
 		error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
 		break;
 #endif
 
 	case SIOCSIFNAME:
 		error = priv_check(td, PRIV_NET_SETIFNAME);
 		if (error)
 			return (error);
 		error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ,
 		    NULL);
 		if (error != 0)
 			return (error);
 		if (new_name[0] == '\0')
 			return (EINVAL);
 		if (new_name[IFNAMSIZ-1] != '\0') {
 			new_name[IFNAMSIZ-1] = '\0';
 			if (strlen(new_name) == IFNAMSIZ-1)
 				return (EINVAL);
 		}
 		if (strcmp(new_name, ifp->if_xname) == 0)
 			break;
 		if (ifunit(new_name) != NULL)
 			return (EEXIST);
 
 		/*
 		 * XXX: Locking.  Nothing else seems to lock if_flags,
 		 * and there are numerous other races with the
 		 * ifunit() checks not being atomic with namespace
 		 * changes (renames, vmoves, if_attach, etc).
 		 */
 		ifp->if_flags |= IFF_RENAMING;
 		
 		/* Announce the departure of the interface. */
 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
 		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
 
 		if_printf(ifp, "changing name to '%s'\n", new_name);
 
 		IF_ADDR_WLOCK(ifp);
 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
 		ifa = ifp->if_addr;
 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 		namelen = strlen(new_name);
 		onamelen = sdl->sdl_nlen;
 		/*
 		 * Move the address if needed.  This is safe because we
 		 * allocate space for a name of length IFNAMSIZ when we
 		 * create this in if_attach().
 		 */
 		if (namelen != onamelen) {
 			bcopy(sdl->sdl_data + onamelen,
 			    sdl->sdl_data + namelen, sdl->sdl_alen);
 		}
 		bcopy(new_name, sdl->sdl_data, namelen);
 		sdl->sdl_nlen = namelen;
 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
 		bzero(sdl->sdl_data, onamelen);
 		while (namelen != 0)
 			sdl->sdl_data[--namelen] = 0xff;
 		IF_ADDR_WUNLOCK(ifp);
 
 		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
 		/* Announce the return of the interface. */
 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
 
 		ifp->if_flags &= ~IFF_RENAMING;
 		break;
 
 #ifdef VIMAGE
 	case SIOCSIFVNET:
 		error = priv_check(td, PRIV_NET_SETIFVNET);
 		if (error)
 			return (error);
 		error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid);
 		break;
 #endif
 
 	case SIOCSIFMETRIC:
 		error = priv_check(td, PRIV_NET_SETIFMETRIC);
 		if (error)
 			return (error);
 		ifp->if_metric = ifr->ifr_metric;
 		getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFPHYS:
 		error = priv_check(td, PRIV_NET_SETIFPHYS);
 		if (error)
 			return (error);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFMTU:
 	{
 		u_long oldmtu = ifp->if_mtu;
 
 		error = priv_check(td, PRIV_NET_SETIFMTU);
 		if (error)
 			return (error);
 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
 			return (EINVAL);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0) {
 			getmicrotime(&ifp->if_lastchange);
 			rt_ifmsg(ifp);
 #ifdef INET
 			NETDUMP_REINIT(ifp);
 #endif
 		}
 		/*
 		 * If the link MTU changed, do network layer specific procedure.
 		 */
 		if (ifp->if_mtu != oldmtu) {
 #ifdef INET6
 			nd6_setmtu(ifp);
 #endif
 			rt_updatemtu(ifp);
 		}
 		break;
 	}
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (cmd == SIOCADDMULTI)
 			error = priv_check(td, PRIV_NET_ADDMULTI);
 		else
 			error = priv_check(td, PRIV_NET_DELMULTI);
 		if (error)
 			return (error);
 
 		/* Don't allow group membership on non-multicast interfaces. */
 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
 			return (EOPNOTSUPP);
 
 		/* Don't let users screw up protocols' entries. */
 		if (ifr->ifr_addr.sa_family != AF_LINK)
 			return (EINVAL);
 
 		if (cmd == SIOCADDMULTI) {
 			struct epoch_tracker et;
 			struct ifmultiaddr *ifma;
 
 			/*
 			 * Userland is only permitted to join groups once
 			 * via the if_addmulti() KPI, because it cannot hold
 			 * struct ifmultiaddr * between calls. It may also
 			 * lose a race while we check if the membership
 			 * already exists.
 			 */
 			NET_EPOCH_ENTER(et);
 			ifma = if_findmulti(ifp, &ifr->ifr_addr);
 			NET_EPOCH_EXIT(et);
 			if (ifma != NULL)
 				error = EADDRINUSE;
 			else
 				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
 		} else {
 			error = if_delmulti(ifp, &ifr->ifr_addr);
 		}
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFPHYADDR:
 	case SIOCDIFPHYADDR:
 #ifdef INET6
 	case SIOCSIFPHYADDR_IN6:
 #endif
 	case SIOCSIFMEDIA:
 	case SIOCSIFGENERIC:
 		error = priv_check(td, PRIV_NET_HWIOCTL);
 		if (error)
 			return (error);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCGIFSTATUS:
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
 	case SIOCGIFMEDIA:
 	case SIOCGIFXMEDIA:
 	case SIOCGIFGENERIC:
 	case SIOCGIFRSSKEY:
 	case SIOCGIFRSSHASH:
+	case SIOCGIFDOWNREASON:
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		break;
 
 	case SIOCSIFLLADDR:
 		error = priv_check(td, PRIV_NET_SETLLADDR);
 		if (error)
 			return (error);
 		error = if_setlladdr(ifp,
 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
 		break;
 
 	case SIOCGHWADDR:
 		error = if_gethwaddr(ifp, ifr);
 		break;
 
 	case CASE_IOC_IFGROUPREQ(SIOCAIFGROUP):
 		error = priv_check(td, PRIV_NET_ADDIFGROUP);
 		if (error)
 			return (error);
 		if ((error = if_addgroup(ifp,
 		    ifgr_group_get((struct ifgroupreq *)data))))
 			return (error);
 		break;
 
 	case CASE_IOC_IFGROUPREQ(SIOCGIFGROUP):
 		if ((error = if_getgroup((struct ifgroupreq *)data, ifp)))
 			return (error);
 		break;
 
 	case CASE_IOC_IFGROUPREQ(SIOCDIFGROUP):
 		error = priv_check(td, PRIV_NET_DELIFGROUP);
 		if (error)
 			return (error);
 		if ((error = if_delgroup(ifp,
 		    ifgr_group_get((struct ifgroupreq *)data))))
 			return (error);
 		break;
 
 	default:
 		error = ENOIOCTL;
 		break;
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD32
 struct ifconf32 {
 	int32_t	ifc_len;
 	union {
 		uint32_t	ifcu_buf;
 		uint32_t	ifcu_req;
 	} ifc_ifcu;
 };
 #define	SIOCGIFCONF32	_IOWR('i', 36, struct ifconf32)
 #endif
 
 #ifdef COMPAT_FREEBSD32
 static void
 ifmr_init(struct ifmediareq *ifmr, caddr_t data)
 {
 	struct ifmediareq32 *ifmr32;
 
 	ifmr32 = (struct ifmediareq32 *)data;
 	memcpy(ifmr->ifm_name, ifmr32->ifm_name,
 	    sizeof(ifmr->ifm_name));
 	ifmr->ifm_current = ifmr32->ifm_current;
 	ifmr->ifm_mask = ifmr32->ifm_mask;
 	ifmr->ifm_status = ifmr32->ifm_status;
 	ifmr->ifm_active = ifmr32->ifm_active;
 	ifmr->ifm_count = ifmr32->ifm_count;
 	ifmr->ifm_ulist = (int *)(uintptr_t)ifmr32->ifm_ulist;
 }
 
 static void
 ifmr_update(const struct ifmediareq *ifmr, caddr_t data)
 {
 	struct ifmediareq32 *ifmr32;
 
 	ifmr32 = (struct ifmediareq32 *)data;
 	ifmr32->ifm_current = ifmr->ifm_current;
 	ifmr32->ifm_mask = ifmr->ifm_mask;
 	ifmr32->ifm_status = ifmr->ifm_status;
 	ifmr32->ifm_active = ifmr->ifm_active;
 	ifmr32->ifm_count = ifmr->ifm_count;
 }
 #endif
 
 /*
  * Interface ioctls.
  */
 int
 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
 {
 #ifdef COMPAT_FREEBSD32
 	caddr_t saved_data = NULL;
 	struct ifmediareq ifmr;
 	struct ifmediareq *ifmrp;
 #endif
 	struct ifnet *ifp;
 	struct ifreq *ifr;
 	int error;
 	int oif_flags;
 #ifdef VIMAGE
 	int shutdown;
 #endif
 
 	CURVNET_SET(so->so_vnet);
 #ifdef VIMAGE
 	/* Make sure the VNET is stable. */
 	shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET &&
 		 so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
 	if (shutdown) {
 		CURVNET_RESTORE();
 		return (EBUSY);
 	}
 #endif
 
 
 	switch (cmd) {
 	case SIOCGIFCONF:
 		error = ifconf(cmd, data);
 		CURVNET_RESTORE();
 		return (error);
 
 #ifdef COMPAT_FREEBSD32
 	case SIOCGIFCONF32:
 		{
 			struct ifconf32 *ifc32;
 			struct ifconf ifc;
 
 			ifc32 = (struct ifconf32 *)data;
 			ifc.ifc_len = ifc32->ifc_len;
 			ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
 
 			error = ifconf(SIOCGIFCONF, (void *)&ifc);
 			CURVNET_RESTORE();
 			if (error == 0)
 				ifc32->ifc_len = ifc.ifc_len;
 			return (error);
 		}
 #endif
 	}
 
 #ifdef COMPAT_FREEBSD32
 	ifmrp = NULL;
 	switch (cmd) {
 	case SIOCGIFMEDIA32:
 	case SIOCGIFXMEDIA32:
 		ifmrp = &ifmr;
 		ifmr_init(ifmrp, data);
 		cmd = _IOC_NEWTYPE(cmd, struct ifmediareq);
 		saved_data = data;
 		data = (caddr_t)ifmrp;
 	}
 #endif
 
 	ifr = (struct ifreq *)data;
 	switch (cmd) {
 #ifdef VIMAGE
 	case SIOCSIFRVNET:
 		error = priv_check(td, PRIV_NET_SETIFVNET);
 		if (error == 0)
 			error = if_vmove_reclaim(td, ifr->ifr_name,
 			    ifr->ifr_jid);
 		goto out_noref;
 #endif
 	case SIOCIFCREATE:
 	case SIOCIFCREATE2:
 		error = priv_check(td, PRIV_NET_IFCREATE);
 		if (error == 0)
 			error = if_clone_create(ifr->ifr_name,
 			    sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ?
 			    ifr_data_get_ptr(ifr) : NULL);
 		goto out_noref;
 	case SIOCIFDESTROY:
 		error = priv_check(td, PRIV_NET_IFDESTROY);
 		if (error == 0)
 			error = if_clone_destroy(ifr->ifr_name);
 		goto out_noref;
 
 	case SIOCIFGCLONERS:
 		error = if_clone_list((struct if_clonereq *)data);
 		goto out_noref;
 
 	case CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB):
 		error = if_getgroupmembers((struct ifgroupreq *)data);
 		goto out_noref;
 
 #if defined(INET) || defined(INET6)
 	case SIOCSVH:
 	case SIOCGVH:
 		if (carp_ioctl_p == NULL)
 			error = EPROTONOSUPPORT;
 		else
 			error = (*carp_ioctl_p)(ifr, cmd, td);
 		goto out_noref;
 #endif
 	}
 
 	ifp = ifunit_ref(ifr->ifr_name);
 	if (ifp == NULL) {
 		error = ENXIO;
 		goto out_noref;
 	}
 
 	error = ifhwioctl(cmd, ifp, data, td);
 	if (error != ENOIOCTL)
 		goto out_ref;
 
 	oif_flags = ifp->if_flags;
 	if (so->so_proto == NULL) {
 		error = EOPNOTSUPP;
 		goto out_ref;
 	}
 
 	/*
 	 * Pass the request on to the socket control method, and if the
 	 * latter returns EOPNOTSUPP, directly to the interface.
 	 *
 	 * Make an exception for the legacy SIOCSIF* requests.  Drivers
 	 * trust SIOCSIFADDR et al to come from an already privileged
 	 * layer, and do not perform any credentials checks or input
 	 * validation.
 	 */
 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data,
 	    ifp, td));
 	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
 	    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
 	    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 
 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
 #ifdef INET6
 		if (ifp->if_flags & IFF_UP)
 			in6_if_up(ifp);
 #endif
 	}
 
 out_ref:
 	if_rele(ifp);
 out_noref:
 #ifdef COMPAT_FREEBSD32
 	if (ifmrp != NULL) {
 		KASSERT((cmd == SIOCGIFMEDIA || cmd == SIOCGIFXMEDIA),
 		    ("ifmrp non-NULL, but cmd is not an ifmedia req 0x%lx",
 		     cmd));
 		data = saved_data;
 		ifmr_update(ifmrp, data);
 	}
 #endif
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * The code common to handling reference counted flags,
  * e.g., in ifpromisc() and if_allmulti().
  * The "pflag" argument can specify a permanent mode flag to check,
  * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
  *
  * Only to be used on stack-owned flags, not driver-owned flags.
  */
 static int
 if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
 {
 	struct ifreq ifr;
 	int error;
 	int oldflags, oldcount;
 
 	/* Sanity checks to catch programming errors */
 	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
 	    ("%s: setting driver-owned flag %d", __func__, flag));
 
 	if (onswitch)
 		KASSERT(*refcount >= 0,
 		    ("%s: increment negative refcount %d for flag %d",
 		    __func__, *refcount, flag));
 	else
 		KASSERT(*refcount > 0,
 		    ("%s: decrement non-positive refcount %d for flag %d",
 		    __func__, *refcount, flag));
 
 	/* In case this mode is permanent, just touch refcount */
 	if (ifp->if_flags & pflag) {
 		*refcount += onswitch ? 1 : -1;
 		return (0);
 	}
 
 	/* Save ifnet parameters for if_ioctl() may fail */
 	oldcount = *refcount;
 	oldflags = ifp->if_flags;
 	
 	/*
 	 * See if we aren't the only and touching refcount is enough.
 	 * Actually toggle interface flag if we are the first or last.
 	 */
 	if (onswitch) {
 		if ((*refcount)++)
 			return (0);
 		ifp->if_flags |= flag;
 	} else {
 		if (--(*refcount))
 			return (0);
 		ifp->if_flags &= ~flag;
 	}
 
 	/* Call down the driver since we've changed interface flags */
 	if (ifp->if_ioctl == NULL) {
 		error = EOPNOTSUPP;
 		goto recover;
 	}
 	ifr.ifr_flags = ifp->if_flags & 0xffff;
 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
 	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 	if (error)
 		goto recover;
 	/* Notify userland that interface flags have changed */
 	rt_ifmsg(ifp);
 	return (0);
 
 recover:
 	/* Recover after driver error */
 	*refcount = oldcount;
 	ifp->if_flags = oldflags;
 	return (error);
 }
 
 /*
  * Set/clear promiscuous mode on interface ifp based on the truth value
  * of pswitch.  The calls are reference counted so that only the first
  * "on" request actually has an effect, as does the final "off" request.
  * Results are undefined if the "off" and "on" requests are not matched.
  */
 int
 ifpromisc(struct ifnet *ifp, int pswitch)
 {
 	int error;
 	int oldflags = ifp->if_flags;
 
 	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
 			   &ifp->if_pcount, pswitch);
 	/* If promiscuous mode status has changed, log a message */
 	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
             log_promisc_mode_change)
 		if_printf(ifp, "promiscuous mode %s\n",
 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
 	return (error);
 }
 
 /*
  * Return interface configuration
  * of system.  List may be used
  * in later ioctl's (above) to get
  * other information.
  */
 /*ARGSUSED*/
 static int
 ifconf(u_long cmd, caddr_t data)
 {
 	struct ifconf *ifc = (struct ifconf *)data;
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct ifreq ifr;
 	struct sbuf *sb;
 	int error, full = 0, valid_len, max_len;
 
 	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
 	max_len = MAXPHYS - 1;
 
 	/* Prevent hostile input from being able to crash the system */
 	if (ifc->ifc_len <= 0)
 		return (EINVAL);
 
 again:
 	if (ifc->ifc_len <= max_len) {
 		max_len = ifc->ifc_len;
 		full = 1;
 	}
 	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
 	max_len = 0;
 	valid_len = 0;
 
 	IFNET_RLOCK();
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		struct epoch_tracker et;
 		int addrs;
 
 		/*
 		 * Zero the ifr to make sure we don't disclose the contents
 		 * of the stack.
 		 */
 		memset(&ifr, 0, sizeof(ifr));
 
 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
 		    >= sizeof(ifr.ifr_name)) {
 			sbuf_delete(sb);
 			IFNET_RUNLOCK();
 			return (ENAMETOOLONG);
 		}
 
 		addrs = 0;
 		NET_EPOCH_ENTER(et);
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			struct sockaddr *sa = ifa->ifa_addr;
 
 			if (prison_if(curthread->td_ucred, sa) != 0)
 				continue;
 			addrs++;
 			if (sa->sa_len <= sizeof(*sa)) {
 				if (sa->sa_len < sizeof(*sa)) {
 					memset(&ifr.ifr_ifru.ifru_addr, 0,
 					    sizeof(ifr.ifr_ifru.ifru_addr));
 					memcpy(&ifr.ifr_ifru.ifru_addr, sa,
 					    sa->sa_len);
 				} else
 					ifr.ifr_ifru.ifru_addr = *sa;
 				sbuf_bcat(sb, &ifr, sizeof(ifr));
 				max_len += sizeof(ifr);
 			} else {
 				sbuf_bcat(sb, &ifr,
 				    offsetof(struct ifreq, ifr_addr));
 				max_len += offsetof(struct ifreq, ifr_addr);
 				sbuf_bcat(sb, sa, sa->sa_len);
 				max_len += sa->sa_len;
 			}
 
 			if (sbuf_error(sb) == 0)
 				valid_len = sbuf_len(sb);
 		}
 		NET_EPOCH_EXIT(et);
 		if (addrs == 0) {
 			sbuf_bcat(sb, &ifr, sizeof(ifr));
 			max_len += sizeof(ifr);
 
 			if (sbuf_error(sb) == 0)
 				valid_len = sbuf_len(sb);
 		}
 	}
 	IFNET_RUNLOCK();
 
 	/*
 	 * If we didn't allocate enough space (uncommon), try again.  If
 	 * we have already allocated as much space as we are allowed,
 	 * return what we've got.
 	 */
 	if (valid_len != max_len && !full) {
 		sbuf_delete(sb);
 		goto again;
 	}
 
 	ifc->ifc_len = valid_len;
 	sbuf_finish(sb);
 	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
 	sbuf_delete(sb);
 	return (error);
 }
 
 /*
  * Just like ifpromisc(), but for all-multicast-reception mode.
  */
 int
 if_allmulti(struct ifnet *ifp, int onswitch)
 {
 
 	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
 }
 
 struct ifmultiaddr *
 if_findmulti(struct ifnet *ifp, const struct sockaddr *sa)
 {
 	struct ifmultiaddr *ifma;
 
 	IF_ADDR_LOCK_ASSERT(ifp);
 
 	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (sa->sa_family == AF_LINK) {
 			if (sa_dl_equal(ifma->ifma_addr, sa))
 				break;
 		} else {
 			if (sa_equal(ifma->ifma_addr, sa))
 				break;
 		}
 	}
 
 	return ifma;
 }
 
 /*
  * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
  * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
  * the ifnet multicast address list here, so the caller must do that and
  * other setup work (such as notifying the device driver).  The reference
  * count is initialized to 1.
  */
 static struct ifmultiaddr *
 if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
     int mflags)
 {
 	struct ifmultiaddr *ifma;
 	struct sockaddr *dupsa;
 
 	ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
 	    M_ZERO);
 	if (ifma == NULL)
 		return (NULL);
 
 	dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
 	if (dupsa == NULL) {
 		free(ifma, M_IFMADDR);
 		return (NULL);
 	}
 	bcopy(sa, dupsa, sa->sa_len);
 	ifma->ifma_addr = dupsa;
 
 	ifma->ifma_ifp = ifp;
 	ifma->ifma_refcount = 1;
 	ifma->ifma_protospec = NULL;
 
 	if (llsa == NULL) {
 		ifma->ifma_lladdr = NULL;
 		return (ifma);
 	}
 
 	dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
 	if (dupsa == NULL) {
 		free(ifma->ifma_addr, M_IFMADDR);
 		free(ifma, M_IFMADDR);
 		return (NULL);
 	}
 	bcopy(llsa, dupsa, llsa->sa_len);
 	ifma->ifma_lladdr = dupsa;
 
 	return (ifma);
 }
 
 /*
  * if_freemulti: free ifmultiaddr structure and possibly attached related
  * addresses.  The caller is responsible for implementing reference
  * counting, notifying the driver, handling routing messages, and releasing
  * any dependent link layer state.
  */
 #ifdef MCAST_VERBOSE
 extern void kdb_backtrace(void);
 #endif
 static void
 if_freemulti_internal(struct ifmultiaddr *ifma)
 {
 
 	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
 	    ifma->ifma_refcount));
 
 	if (ifma->ifma_lladdr != NULL)
 		free(ifma->ifma_lladdr, M_IFMADDR);
 #ifdef MCAST_VERBOSE
 	kdb_backtrace();
 	printf("%s freeing ifma: %p\n", __func__, ifma);
 #endif
 	free(ifma->ifma_addr, M_IFMADDR);
 	free(ifma, M_IFMADDR);
 }
 
 static void
 if_destroymulti(epoch_context_t ctx)
 {
 	struct ifmultiaddr *ifma;
 
 	ifma = __containerof(ctx, struct ifmultiaddr, ifma_epoch_ctx);
 	if_freemulti_internal(ifma);
 }
 
 void
 if_freemulti(struct ifmultiaddr *ifma)
 {
 	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti_epoch: refcount %d",
 	    ifma->ifma_refcount));
 
 	epoch_call(net_epoch_preempt, &ifma->ifma_epoch_ctx, if_destroymulti);
 }
 
 
 /*
  * Register an additional multicast address with a network interface.
  *
  * - If the address is already present, bump the reference count on the
  *   address and return.
  * - If the address is not link-layer, look up a link layer address.
  * - Allocate address structures for one or both addresses, and attach to the
  *   multicast address list on the interface.  If automatically adding a link
  *   layer address, the protocol address will own a reference to the link
  *   layer address, to be freed when it is freed.
  * - Notify the network device driver of an addition to the multicast address
  *   list.
  *
  * 'sa' points to caller-owned memory with the desired multicast address.
  *
  * 'retifma' will be used to return a pointer to the resulting multicast
  * address reference, if desired.
  */
 int
 if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
     struct ifmultiaddr **retifma)
 {
 	struct ifmultiaddr *ifma, *ll_ifma;
 	struct sockaddr *llsa;
 	struct sockaddr_dl sdl;
 	int error;
 
 #ifdef INET
 	IN_MULTI_LIST_UNLOCK_ASSERT();
 #endif
 #ifdef INET6
 	IN6_MULTI_LIST_UNLOCK_ASSERT();
 #endif
 	/*
 	 * If the address is already present, return a new reference to it;
 	 * otherwise, allocate storage and set up a new address.
 	 */
 	IF_ADDR_WLOCK(ifp);
 	ifma = if_findmulti(ifp, sa);
 	if (ifma != NULL) {
 		ifma->ifma_refcount++;
 		if (retifma != NULL)
 			*retifma = ifma;
 		IF_ADDR_WUNLOCK(ifp);
 		return (0);
 	}
 
 	/*
 	 * The address isn't already present; resolve the protocol address
 	 * into a link layer address, and then look that up, bump its
 	 * refcount or allocate an ifma for that also.
 	 * Most link layer resolving functions returns address data which
 	 * fits inside default sockaddr_dl structure. However callback
 	 * can allocate another sockaddr structure, in that case we need to
 	 * free it later.
 	 */
 	llsa = NULL;
 	ll_ifma = NULL;
 	if (ifp->if_resolvemulti != NULL) {
 		/* Provide called function with buffer size information */
 		sdl.sdl_len = sizeof(sdl);
 		llsa = (struct sockaddr *)&sdl;
 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
 		if (error)
 			goto unlock_out;
 	}
 
 	/*
 	 * Allocate the new address.  Don't hook it up yet, as we may also
 	 * need to allocate a link layer multicast address.
 	 */
 	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
 	if (ifma == NULL) {
 		error = ENOMEM;
 		goto free_llsa_out;
 	}
 
 	/*
 	 * If a link layer address is found, we'll need to see if it's
 	 * already present in the address list, or allocate is as well.
 	 * When this block finishes, the link layer address will be on the
 	 * list.
 	 */
 	if (llsa != NULL) {
 		ll_ifma = if_findmulti(ifp, llsa);
 		if (ll_ifma == NULL) {
 			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
 			if (ll_ifma == NULL) {
 				--ifma->ifma_refcount;
 				if_freemulti(ifma);
 				error = ENOMEM;
 				goto free_llsa_out;
 			}
 			ll_ifma->ifma_flags |= IFMA_F_ENQUEUED;
 			CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
 			    ifma_link);
 		} else
 			ll_ifma->ifma_refcount++;
 		ifma->ifma_llifma = ll_ifma;
 	}
 
 	/*
 	 * We now have a new multicast address, ifma, and possibly a new or
 	 * referenced link layer address.  Add the primary address to the
 	 * ifnet address list.
 	 */
 	ifma->ifma_flags |= IFMA_F_ENQUEUED;
 	CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
 
 	if (retifma != NULL)
 		*retifma = ifma;
 
 	/*
 	 * Must generate the message while holding the lock so that 'ifma'
 	 * pointer is still valid.
 	 */
 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
 	IF_ADDR_WUNLOCK(ifp);
 
 	/*
 	 * We are certain we have added something, so call down to the
 	 * interface to let them know about it.
 	 */
 	if (ifp->if_ioctl != NULL) {
 		(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
 	}
 
 	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
 		link_free_sdl(llsa);
 
 	return (0);
 
 free_llsa_out:
 	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
 		link_free_sdl(llsa);
 
 unlock_out:
 	IF_ADDR_WUNLOCK(ifp);
 	return (error);
 }
 
 /*
  * Delete a multicast group membership by network-layer group address.
  *
  * Returns ENOENT if the entry could not be found. If ifp no longer
  * exists, results are undefined. This entry point should only be used
  * from subsystems which do appropriate locking to hold ifp for the
  * duration of the call.
  * Network-layer protocol domains must use if_delmulti_ifma().
  */
 int
 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
 {
 	struct ifmultiaddr *ifma;
 	int lastref;
 #ifdef INVARIANTS
 	struct epoch_tracker et;
 	struct ifnet *oifp;
 
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link)
 		if (ifp == oifp)
 			break;
 	if (ifp != oifp)
 		ifp = NULL;
 	NET_EPOCH_EXIT(et);
 
 	KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
 #endif
 	if (ifp == NULL)
 		return (ENOENT);
 
 	IF_ADDR_WLOCK(ifp);
 	lastref = 0;
 	ifma = if_findmulti(ifp, sa);
 	if (ifma != NULL)
 		lastref = if_delmulti_locked(ifp, ifma, 0);
 	IF_ADDR_WUNLOCK(ifp);
 
 	if (ifma == NULL)
 		return (ENOENT);
 
 	if (lastref && ifp->if_ioctl != NULL) {
 		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
 	}
 
 	return (0);
 }
 
 /*
  * Delete all multicast group membership for an interface.
  * Should be used to quickly flush all multicast filters.
  */
 void
 if_delallmulti(struct ifnet *ifp)
 {
 	struct ifmultiaddr *ifma;
 	struct ifmultiaddr *next;
 
 	IF_ADDR_WLOCK(ifp);
 	CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
 		if_delmulti_locked(ifp, ifma, 0);
 	IF_ADDR_WUNLOCK(ifp);
 }
 
 void
 if_delmulti_ifma(struct ifmultiaddr *ifma)
 {
 	if_delmulti_ifma_flags(ifma, 0);
 }
 
 /*
  * Delete a multicast group membership by group membership pointer.
  * Network-layer protocol domains must use this routine.
  *
  * It is safe to call this routine if the ifp disappeared.
  */
 void
 if_delmulti_ifma_flags(struct ifmultiaddr *ifma, int flags)
 {
 	struct ifnet *ifp;
 	int lastref;
 	MCDPRINTF("%s freeing ifma: %p\n", __func__, ifma);
 #ifdef INET
 	IN_MULTI_LIST_UNLOCK_ASSERT();
 #endif
 	ifp = ifma->ifma_ifp;
 #ifdef DIAGNOSTIC
 	if (ifp == NULL) {
 		printf("%s: ifma_ifp seems to be detached\n", __func__);
 	} else {
 		struct epoch_tracker et;
 		struct ifnet *oifp;
 
 		NET_EPOCH_ENTER(et);
 		CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link)
 			if (ifp == oifp)
 				break;
 		if (ifp != oifp)
 			ifp = NULL;
 		NET_EPOCH_EXIT(et);
 	}
 #endif
 	/*
 	 * If and only if the ifnet instance exists: Acquire the address lock.
 	 */
 	if (ifp != NULL)
 		IF_ADDR_WLOCK(ifp);
 
 	lastref = if_delmulti_locked(ifp, ifma, flags);
 
 	if (ifp != NULL) {
 		/*
 		 * If and only if the ifnet instance exists:
 		 *  Release the address lock.
 		 *  If the group was left: update the hardware hash filter.
 		 */
 		IF_ADDR_WUNLOCK(ifp);
 		if (lastref && ifp->if_ioctl != NULL) {
 			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
 		}
 	}
 }
 
 /*
  * Perform deletion of network-layer and/or link-layer multicast address.
  *
  * Return 0 if the reference count was decremented.
  * Return 1 if the final reference was released, indicating that the
  * hardware hash filter should be reprogrammed.
  */
 static int
 if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
 {
 	struct ifmultiaddr *ll_ifma;
 
 	if (ifp != NULL && ifma->ifma_ifp != NULL) {
 		KASSERT(ifma->ifma_ifp == ifp,
 		    ("%s: inconsistent ifp %p", __func__, ifp));
 		IF_ADDR_WLOCK_ASSERT(ifp);
 	}
 
 	ifp = ifma->ifma_ifp;
 	MCDPRINTF("%s freeing %p from %s \n", __func__, ifma, ifp ? ifp->if_xname : "");
 
 	/*
 	 * If the ifnet is detaching, null out references to ifnet,
 	 * so that upper protocol layers will notice, and not attempt
 	 * to obtain locks for an ifnet which no longer exists. The
 	 * routing socket announcement must happen before the ifnet
 	 * instance is detached from the system.
 	 */
 	if (detaching) {
 #ifdef DIAGNOSTIC
 		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
 #endif
 		/*
 		 * ifp may already be nulled out if we are being reentered
 		 * to delete the ll_ifma.
 		 */
 		if (ifp != NULL) {
 			rt_newmaddrmsg(RTM_DELMADDR, ifma);
 			ifma->ifma_ifp = NULL;
 		}
 	}
 
 	if (--ifma->ifma_refcount > 0)
 		return 0;
 
 	if (ifp != NULL && detaching == 0 && (ifma->ifma_flags & IFMA_F_ENQUEUED)) {
 		CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
 		ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
 	}
 	/*
 	 * If this ifma is a network-layer ifma, a link-layer ifma may
 	 * have been associated with it. Release it first if so.
 	 */
 	ll_ifma = ifma->ifma_llifma;
 	if (ll_ifma != NULL) {
 		KASSERT(ifma->ifma_lladdr != NULL,
 		    ("%s: llifma w/o lladdr", __func__));
 		if (detaching)
 			ll_ifma->ifma_ifp = NULL;	/* XXX */
 		if (--ll_ifma->ifma_refcount == 0) {
 			if (ifp != NULL) {
 				if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
 					CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr,
 						ifma_link);
 					ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
 				}
 			}
 			if_freemulti(ll_ifma);
 		}
 	}
 #ifdef INVARIANTS
 	if (ifp) {
 		struct ifmultiaddr *ifmatmp;
 
 		CK_STAILQ_FOREACH(ifmatmp, &ifp->if_multiaddrs, ifma_link)
 			MPASS(ifma != ifmatmp);
 	}
 #endif
 	if_freemulti(ifma);
 	/*
 	 * The last reference to this instance of struct ifmultiaddr
 	 * was released; the hardware should be notified of this change.
 	 */
 	return 1;
 }
 
 /*
  * Set the link layer address on an interface.
  *
  * At this time we only support certain types of interfaces,
  * and we don't allow the length of the address to change.
  *
  * Set noinline to be dtrace-friendly
  */
 __noinline int
 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
 {
 	struct sockaddr_dl *sdl;
 	struct ifaddr *ifa;
 	struct ifreq ifr;
 	struct epoch_tracker et;
 	int rc;
 
 	rc = 0;
 	NET_EPOCH_ENTER(et);
 	ifa = ifp->if_addr;
 	if (ifa == NULL) {
 		rc = EINVAL;
 		goto out;
 	}
 
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	if (sdl == NULL) {
 		rc = EINVAL;
 		goto out;
 	}
 	if (len != sdl->sdl_alen) {	/* don't allow length to change */
 		rc = EINVAL;
 		goto out;
 	}
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_XETHER:
 	case IFT_L2VLAN:
 	case IFT_BRIDGE:
 	case IFT_IEEE8023ADLAG:
 		bcopy(lladdr, LLADDR(sdl), len);
 		break;
 	default:
 		rc = ENODEV;
 		goto out;
 	}
 
 	/*
 	 * If the interface is already up, we need
 	 * to re-init it in order to reprogram its
 	 * address filter.
 	 */
 	NET_EPOCH_EXIT(et);
 	if ((ifp->if_flags & IFF_UP) != 0) {
 		if (ifp->if_ioctl) {
 			ifp->if_flags &= ~IFF_UP;
 			ifr.ifr_flags = ifp->if_flags & 0xffff;
 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
 			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 			ifp->if_flags |= IFF_UP;
 			ifr.ifr_flags = ifp->if_flags & 0xffff;
 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
 			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 		}
 	}
 	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
 	return (0);
  out:
 	NET_EPOCH_EXIT(et);
 	return (rc);
 }
 
 /*
  * Compat function for handling basic encapsulation requests.
  * Not converted stacks (FDDI, IB, ..) supports traditional
  * output model: ARP (and other similar L2 protocols) are handled
  * inside output routine, arpresolve/nd6_resolve() returns MAC
  * address instead of full prepend.
  *
  * This function creates calculated header==MAC for IPv4/IPv6 and
  * returns EAFNOSUPPORT (which is then handled in ARP code) for other
  * address families.
  */
 static int
 if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
 {
 
 	if (req->rtype != IFENCAP_LL)
 		return (EOPNOTSUPP);
 
 	if (req->bufsize < req->lladdr_len)
 		return (ENOMEM);
 
 	switch (req->family) {
 	case AF_INET:
 	case AF_INET6:
 		break;
 	default:
 		return (EAFNOSUPPORT);
 	}
 
 	/* Copy lladdr to storage as is */
 	memmove(req->buf, req->lladdr, req->lladdr_len);
 	req->bufsize = req->lladdr_len;
 	req->lladdr_off = 0;
 
 	return (0);
 }
 
 /*
  * Tunnel interfaces can nest, also they may cause infinite recursion
  * calls when misconfigured. We'll prevent this by detecting loops.
  * High nesting level may cause stack exhaustion. We'll prevent this
  * by introducing upper limit.
  *
  * Return 0, if tunnel nesting count is equal or less than limit.
  */
 int
 if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie,
     int limit)
 {
 	struct m_tag *mtag;
 	int count;
 
 	count = 1;
 	mtag = NULL;
 	while ((mtag = m_tag_locate(m, cookie, 0, mtag)) != NULL) {
 		if (*(struct ifnet **)(mtag + 1) == ifp) {
 			log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp));
 			return (EIO);
 		}
 		count++;
 	}
 	if (count > limit) {
 		log(LOG_NOTICE,
 		    "%s: if_output recursively called too many times(%d)\n",
 		    if_name(ifp), count);
 		return (EIO);
 	}
 	mtag = m_tag_alloc(cookie, 0, sizeof(struct ifnet *), M_NOWAIT);
 	if (mtag == NULL)
 		return (ENOMEM);
 	*(struct ifnet **)(mtag + 1) = ifp;
 	m_tag_prepend(m, mtag);
 	return (0);
 }
 
 /*
  * Get the link layer address that was read from the hardware at attach.
  *
  * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type
  * their component interfaces as IFT_IEEE8023ADLAG.
  */
 int
 if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr)
 {
 
 	if (ifp->if_hw_addr == NULL)
 		return (ENODEV);
 
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_IEEE8023ADLAG:
 		bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen);
 		return (0);
 	default:
 		return (ENODEV);
 	}
 }
 
 /*
  * The name argument must be a pointer to storage which will last as
  * long as the interface does.  For physical devices, the result of
  * device_get_name(dev) is a good choice and for pseudo-devices a
  * static string works well.
  */
 void
 if_initname(struct ifnet *ifp, const char *name, int unit)
 {
 	ifp->if_dname = name;
 	ifp->if_dunit = unit;
 	if (unit != IF_DUNIT_NONE)
 		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
 	else
 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
 }
 
 int
 if_printf(struct ifnet *ifp, const char *fmt, ...)
 {
 	char if_fmt[256];
 	va_list ap;
 
 	snprintf(if_fmt, sizeof(if_fmt), "%s: %s", ifp->if_xname, fmt);
 	va_start(ap, fmt);
 	vlog(LOG_INFO, if_fmt, ap);
 	va_end(ap);
 	return (0);
 }
 
 void
 if_start(struct ifnet *ifp)
 {
 
 	(*(ifp)->if_start)(ifp);
 }
 
 /*
  * Backwards compatibility interface for drivers 
  * that have not implemented it
  */
 static int
 if_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	int error;
 
 	IFQ_HANDOFF(ifp, m, error);
 	return (error);
 }
 
 static void
 if_input_default(struct ifnet *ifp __unused, struct mbuf *m)
 {
 
 	m_freem(m);
 }
 
 int
 if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
 {
 	int active = 0;
 
 	IF_LOCK(ifq);
 	if (_IF_QFULL(ifq)) {
 		IF_UNLOCK(ifq);
 		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
 		m_freem(m);
 		return (0);
 	}
 	if (ifp != NULL) {
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust);
 		if (m->m_flags & (M_BCAST|M_MCAST))
 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
 	}
 	_IF_ENQUEUE(ifq, m);
 	IF_UNLOCK(ifq);
 	if (ifp != NULL && !active)
 		(*(ifp)->if_start)(ifp);
 	return (1);
 }
 
 void
 if_register_com_alloc(u_char type,
     if_com_alloc_t *a, if_com_free_t *f)
 {
 	
 	KASSERT(if_com_alloc[type] == NULL,
 	    ("if_register_com_alloc: %d already registered", type));
 	KASSERT(if_com_free[type] == NULL,
 	    ("if_register_com_alloc: %d free already registered", type));
 
 	if_com_alloc[type] = a;
 	if_com_free[type] = f;
 }
 
 void
 if_deregister_com_alloc(u_char type)
 {
 	
 	KASSERT(if_com_alloc[type] != NULL,
 	    ("if_deregister_com_alloc: %d not registered", type));
 	KASSERT(if_com_free[type] != NULL,
 	    ("if_deregister_com_alloc: %d free not registered", type));
 	if_com_alloc[type] = NULL;
 	if_com_free[type] = NULL;
 }
 
 /* API for driver access to network stack owned ifnet.*/
 uint64_t
 if_setbaudrate(struct ifnet *ifp, uint64_t baudrate)
 {
 	uint64_t oldbrate;
 
 	oldbrate = ifp->if_baudrate;
 	ifp->if_baudrate = baudrate;
 	return (oldbrate);
 }
 
 uint64_t
 if_getbaudrate(if_t ifp)
 {
 
 	return (((struct ifnet *)ifp)->if_baudrate);
 }
 
 int
 if_setcapabilities(if_t ifp, int capabilities)
 {
 	((struct ifnet *)ifp)->if_capabilities = capabilities;
 	return (0);
 }
 
 int
 if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit)
 {
 	((struct ifnet *)ifp)->if_capabilities |= setbit;
 	((struct ifnet *)ifp)->if_capabilities &= ~clearbit;
 
 	return (0);
 }
 
 int
 if_getcapabilities(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_capabilities;
 }
 
 int 
 if_setcapenable(if_t ifp, int capabilities)
 {
 	((struct ifnet *)ifp)->if_capenable = capabilities;
 	return (0);
 }
 
 int 
 if_setcapenablebit(if_t ifp, int setcap, int clearcap)
 {
 	if(setcap) 
 		((struct ifnet *)ifp)->if_capenable |= setcap;
 	if(clearcap)
 		((struct ifnet *)ifp)->if_capenable &= ~clearcap;
 
 	return (0);
 }
 
 const char *
 if_getdname(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_dname;
 }
 
 int 
 if_togglecapenable(if_t ifp, int togglecap)
 {
 	((struct ifnet *)ifp)->if_capenable ^= togglecap;
 	return (0);
 }
 
 int
 if_getcapenable(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_capenable;
 }
 
 /*
  * This is largely undesirable because it ties ifnet to a device, but does
  * provide flexiblity for an embedded product vendor. Should be used with
  * the understanding that it violates the interface boundaries, and should be
  * a last resort only.
  */
 int
 if_setdev(if_t ifp, void *dev)
 {
 	return (0);
 }
 
 int
 if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags)
 {
 	((struct ifnet *)ifp)->if_drv_flags |= set_flags;
 	((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags;
 
 	return (0);
 }
 
 int
 if_getdrvflags(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_drv_flags;
 }
  
 int
 if_setdrvflags(if_t ifp, int flags)
 {
 	((struct ifnet *)ifp)->if_drv_flags = flags;
 	return (0);
 }
 
 
 int
 if_setflags(if_t ifp, int flags)
 {
 	((struct ifnet *)ifp)->if_flags = flags;
 	return (0);
 }
 
 int
 if_setflagbits(if_t ifp, int set, int clear)
 {
 	((struct ifnet *)ifp)->if_flags |= set;
 	((struct ifnet *)ifp)->if_flags &= ~clear;
 
 	return (0);
 }
 
 int
 if_getflags(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_flags;
 }
 
 int
 if_clearhwassist(if_t ifp)
 {
 	((struct ifnet *)ifp)->if_hwassist = 0;
 	return (0);
 }
 
 int
 if_sethwassistbits(if_t ifp, int toset, int toclear)
 {
 	((struct ifnet *)ifp)->if_hwassist |= toset;
 	((struct ifnet *)ifp)->if_hwassist &= ~toclear;
 
 	return (0);
 }
 
 int
 if_sethwassist(if_t ifp, int hwassist_bit)
 {
 	((struct ifnet *)ifp)->if_hwassist = hwassist_bit;
 	return (0);
 }
 
 int
 if_gethwassist(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_hwassist;
 }
 
 int
 if_setmtu(if_t ifp, int mtu)
 {
 	((struct ifnet *)ifp)->if_mtu = mtu;
 	return (0);
 }
 
 int
 if_getmtu(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_mtu;
 }
 
 int
 if_getmtu_family(if_t ifp, int family)
 {
 	struct domain *dp;
 
 	for (dp = domains; dp; dp = dp->dom_next) {
 		if (dp->dom_family == family && dp->dom_ifmtu != NULL)
 			return (dp->dom_ifmtu((struct ifnet *)ifp));
 	}
 
 	return (((struct ifnet *)ifp)->if_mtu);
 }
 
 int
 if_setsoftc(if_t ifp, void *softc)
 {
 	((struct ifnet *)ifp)->if_softc = softc;
 	return (0);
 }
 
 void *
 if_getsoftc(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_softc;
 }
 
 void 
 if_setrcvif(struct mbuf *m, if_t ifp)
 {
 
 	MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
 	m->m_pkthdr.rcvif = (struct ifnet *)ifp;
 }
 
 void 
 if_setvtag(struct mbuf *m, uint16_t tag)
 {
 	m->m_pkthdr.ether_vtag = tag;	
 }
 
 uint16_t
 if_getvtag(struct mbuf *m)
 {
 
 	return (m->m_pkthdr.ether_vtag);
 }
 
 int
 if_sendq_empty(if_t ifp)
 {
 	return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd);
 }
 
 struct ifaddr *
 if_getifaddr(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_addr;
 }
 
 int
 if_getamcount(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_amcount;
 }
 
 
 int
 if_setsendqready(if_t ifp)
 {
 	IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd);
 	return (0);
 }
 
 int
 if_setsendqlen(if_t ifp, int tx_desc_count)
 {
 	IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count);
 	((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count;
 
 	return (0);
 }
 
 int
 if_vlantrunkinuse(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0;
 }
 
 int
 if_input(if_t ifp, struct mbuf* sendmp)
 {
 	(*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp);
 	return (0);
 
 }
 
 /* XXX */
 #ifndef ETH_ADDR_LEN
 #define ETH_ADDR_LEN 6
 #endif
 
 int 
 if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max)
 {
 	struct ifmultiaddr *ifma;
 	uint8_t *lmta = (uint8_t *)mta;
 	int mcnt = 0;
 
 	CK_STAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 
 		if (mcnt == max)
 			break;
 
 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
 		    &lmta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
 		mcnt++;
 	}
 	*cnt = mcnt;
 
 	return (0);
 }
 
 int
 if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max)
 {
 	int error;
 
 	if_maddr_rlock(ifp);
 	error = if_setupmultiaddr(ifp, mta, cnt, max);
 	if_maddr_runlock(ifp);
 	return (error);
 }
 
 int
 if_multiaddr_count(if_t ifp, int max)
 {
 	struct ifmultiaddr *ifma;
 	int count;
 
 	count = 0;
 	if_maddr_rlock(ifp);
 	CK_STAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 		count++;
 		if (count == max)
 			break;
 	}
 	if_maddr_runlock(ifp);
 	return (count);
 }
 
 int
 if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg)
 {
 	struct ifmultiaddr *ifma;
 	int cnt = 0;
 
 	if_maddr_rlock(ifp);
 	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
 		cnt += filter(arg, ifma, cnt);
 	if_maddr_runlock(ifp);
 	return (cnt);
 }
 
 struct mbuf *
 if_dequeue(if_t ifp)
 {
 	struct mbuf *m;
 	IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m);
 
 	return (m);
 }
 
 int
 if_sendq_prepend(if_t ifp, struct mbuf *m)
 {
 	IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m);
 	return (0);
 }
 
 int
 if_setifheaderlen(if_t ifp, int len)
 {
 	((struct ifnet *)ifp)->if_hdrlen = len;
 	return (0);
 }
 
 caddr_t
 if_getlladdr(if_t ifp)
 {
 	return (IF_LLADDR((struct ifnet *)ifp));
 }
 
 void *
 if_gethandle(u_char type)
 {
 	return (if_alloc(type));
 }
 
 void
 if_bpfmtap(if_t ifh, struct mbuf *m)
 {
 	struct ifnet *ifp = (struct ifnet *)ifh;
 
 	BPF_MTAP(ifp, m);
 }
 
 void
 if_etherbpfmtap(if_t ifh, struct mbuf *m)
 {
 	struct ifnet *ifp = (struct ifnet *)ifh;
 
 	ETHER_BPF_MTAP(ifp, m);
 }
 
 void
 if_vlancap(if_t ifh)
 {
 	struct ifnet *ifp = (struct ifnet *)ifh;
 	VLAN_CAPABILITIES(ifp);
 }
 
 int
 if_sethwtsomax(if_t ifp, u_int if_hw_tsomax)
 {
 
 	((struct ifnet *)ifp)->if_hw_tsomax = if_hw_tsomax;
         return (0);
 }
 
 int
 if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount)
 {
 
 	((struct ifnet *)ifp)->if_hw_tsomaxsegcount = if_hw_tsomaxsegcount;
         return (0);
 }
 
 int
 if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize)
 {
 
 	((struct ifnet *)ifp)->if_hw_tsomaxsegsize = if_hw_tsomaxsegsize;
         return (0);
 }
 
 u_int
 if_gethwtsomax(if_t ifp)
 {
 
 	return (((struct ifnet *)ifp)->if_hw_tsomax);
 }
 
 u_int
 if_gethwtsomaxsegcount(if_t ifp)
 {
 
 	return (((struct ifnet *)ifp)->if_hw_tsomaxsegcount);
 }
 
 u_int
 if_gethwtsomaxsegsize(if_t ifp)
 {
 
 	return (((struct ifnet *)ifp)->if_hw_tsomaxsegsize);
 }
 
 void
 if_setinitfn(if_t ifp, void (*init_fn)(void *))
 {
 	((struct ifnet *)ifp)->if_init = init_fn;
 }
 
 void
 if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t))
 {
 	((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn;
 }
 
 void
 if_setstartfn(if_t ifp, void (*start_fn)(if_t))
 {
 	((struct ifnet *)ifp)->if_start = (void *)start_fn;
 }
 
 void
 if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn)
 {
 	((struct ifnet *)ifp)->if_transmit = start_fn;
 }
 
 void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn)
 {
 	((struct ifnet *)ifp)->if_qflush = flush_fn;
 	
 }
 
 void
 if_setgetcounterfn(if_t ifp, if_get_counter_t fn)
 {
 
 	ifp->if_get_counter = fn;
 }
 
 /* Revisit these - These are inline functions originally. */
 int
 drbr_inuse_drv(if_t ifh, struct buf_ring *br)
 {
 	return drbr_inuse(ifh, br);
 }
 
 struct mbuf*
 drbr_dequeue_drv(if_t ifh, struct buf_ring *br)
 {
 	return drbr_dequeue(ifh, br);
 }
 
 int
 drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br)
 {
 	return drbr_needs_enqueue(ifh, br);
 }
 
 int
 drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m)
 {
 	return drbr_enqueue(ifh, br, m);
 
 }
Index: projects/clang900-import/sys/net/if.h
===================================================================
--- projects/clang900-import/sys/net/if.h	(revision 352536)
+++ projects/clang900-import/sys/net/if.h	(revision 352537)
@@ -1,610 +1,620 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if.h	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 #ifndef _NET_IF_H_
 #define	_NET_IF_H_
 
 #include <sys/cdefs.h>
 
 #if __BSD_VISIBLE
 /*
  * <net/if.h> does not depend on <sys/time.h> on most other systems.  This
  * helps userland compatibility.  (struct timeval ifi_lastchange)
  * The same holds for <sys/socket.h>.  (struct sockaddr ifru_addr)
  */
 #ifndef _KERNEL
 #include <sys/time.h>
 #include <sys/socket.h>
 #endif
 #endif
 
 /*
  * Length of interface external name, including terminating '\0'.
  * Note: this is the same size as a generic device's external name.
  */
 #define		IF_NAMESIZE	16
 #if __BSD_VISIBLE
 #define		IFNAMSIZ	IF_NAMESIZE
 #define		IF_MAXUNIT	0x7fff	/* historical value */
 #endif
 #if __BSD_VISIBLE
 
 /*
  * Structure used to query names of interface cloners.
  */
 
 struct if_clonereq {
 	int	ifcr_total;		/* total cloners (out) */
 	int	ifcr_count;		/* room for this many in user buffer */
 	char	*ifcr_buffer;		/* buffer for cloner names */
 };
 
 /*
  * Structure describing information about an interface
  * which may be of interest to management entities.
  */
 struct if_data {
 	/* generic interface information */
 	uint8_t	ifi_type;		/* ethernet, tokenring, etc */
 	uint8_t	ifi_physical;		/* e.g., AUI, Thinnet, 10base-T, etc */
 	uint8_t	ifi_addrlen;		/* media address length */
 	uint8_t	ifi_hdrlen;		/* media header length */
 	uint8_t	ifi_link_state;		/* current link state */
 	uint8_t	ifi_vhid;		/* carp vhid */
 	uint16_t	ifi_datalen;	/* length of this data struct */
 	uint32_t	ifi_mtu;	/* maximum transmission unit */
 	uint32_t	ifi_metric;	/* routing metric (external only) */
 	uint64_t	ifi_baudrate;	/* linespeed */
 	/* volatile statistics */
 	uint64_t	ifi_ipackets;	/* packets received on interface */
 	uint64_t	ifi_ierrors;	/* input errors on interface */
 	uint64_t	ifi_opackets;	/* packets sent on interface */
 	uint64_t	ifi_oerrors;	/* output errors on interface */
 	uint64_t	ifi_collisions;	/* collisions on csma interfaces */
 	uint64_t	ifi_ibytes;	/* total number of octets received */
 	uint64_t	ifi_obytes;	/* total number of octets sent */
 	uint64_t	ifi_imcasts;	/* packets received via multicast */
 	uint64_t	ifi_omcasts;	/* packets sent via multicast */
 	uint64_t	ifi_iqdrops;	/* dropped on input */
 	uint64_t	ifi_oqdrops;	/* dropped on output */
 	uint64_t	ifi_noproto;	/* destined for unsupported protocol */
 	uint64_t	ifi_hwassist;	/* HW offload capabilities, see IFCAP */
 
 	/* Unions are here to make sizes MI. */
 	union {				/* uptime at attach or stat reset */
 		time_t		tt;
 		uint64_t	ph;
 	} __ifi_epoch;
 #define	ifi_epoch	__ifi_epoch.tt
 	union {				/* time of last administrative change */
 		struct timeval	tv;
 		struct {
 			uint64_t ph1;
 			uint64_t ph2;
 		} ph;
 	} __ifi_lastchange;
 #define	ifi_lastchange	__ifi_lastchange.tv
 };
 
 /*-
  * Interface flags are of two types: network stack owned flags, and driver
  * owned flags.  Historically, these values were stored in the same ifnet
  * flags field, but with the advent of fine-grained locking, they have been
  * broken out such that the network stack is responsible for synchronizing
  * the stack-owned fields, and the device driver the device-owned fields.
  * Both halves can perform lockless reads of the other half's field, subject
  * to accepting the involved races.
  *
  * Both sets of flags come from the same number space, and should not be
  * permitted to conflict, as they are exposed to user space via a single
  * field.
  *
  * The following symbols identify read and write requirements for fields:
  *
  * (i) if_flags field set by device driver before attach, read-only there
  *     after.
  * (n) if_flags field written only by the network stack, read by either the
  *     stack or driver.
  * (d) if_drv_flags field written only by the device driver, read by either
  *     the stack or driver.
  */
 #define	IFF_UP		0x1		/* (n) interface is up */
 #define	IFF_BROADCAST	0x2		/* (i) broadcast address valid */
 #define	IFF_DEBUG	0x4		/* (n) turn on debugging */
 #define	IFF_LOOPBACK	0x8		/* (i) is a loopback net */
 #define	IFF_POINTOPOINT	0x10		/* (i) is a point-to-point link */
 /*			0x20		   was IFF_SMART */
 #define	IFF_DRV_RUNNING	0x40		/* (d) resources allocated */
 #define	IFF_NOARP	0x80		/* (n) no address resolution protocol */
 #define	IFF_PROMISC	0x100		/* (n) receive all packets */
 #define	IFF_ALLMULTI	0x200		/* (n) receive all multicast packets */
 #define	IFF_DRV_OACTIVE	0x400		/* (d) tx hardware queue is full */
 #define	IFF_SIMPLEX	0x800		/* (i) can't hear own transmissions */
 #define	IFF_LINK0	0x1000		/* per link layer defined bit */
 #define	IFF_LINK1	0x2000		/* per link layer defined bit */
 #define	IFF_LINK2	0x4000		/* per link layer defined bit */
 #define	IFF_ALTPHYS	IFF_LINK2	/* use alternate physical connection */
 #define	IFF_MULTICAST	0x8000		/* (i) supports multicast */
 #define	IFF_CANTCONFIG	0x10000		/* (i) unconfigurable using ioctl(2) */
 #define	IFF_PPROMISC	0x20000		/* (n) user-requested promisc mode */
 #define	IFF_MONITOR	0x40000		/* (n) user-requested monitor mode */
 #define	IFF_STATICARP	0x80000		/* (n) static ARP */
 #define	IFF_DYING	0x200000	/* (n) interface is winding down */
 #define	IFF_RENAMING	0x400000	/* (n) interface is being renamed */
 #define	IFF_NOGROUP	0x800000	/* (n) interface is not part of any groups */
 
 
 /*
  * Old names for driver flags so that user space tools can continue to use
  * the old (portable) names.
  */
 #ifndef _KERNEL
 #define	IFF_RUNNING	IFF_DRV_RUNNING
 #define	IFF_OACTIVE	IFF_DRV_OACTIVE
 #endif
 
 /* flags set internally only: */
 #define	IFF_CANTCHANGE \
 	(IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\
 	    IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_PROMISC|\
 	    IFF_DYING|IFF_CANTCONFIG)
 
 /*
  * Values for if_link_state.
  */
 #define	LINK_STATE_UNKNOWN	0	/* link invalid/unknown */
 #define	LINK_STATE_DOWN		1	/* link is down */
 #define	LINK_STATE_UP		2	/* link is up */
 
 /*
  * Some convenience macros used for setting ifi_baudrate.
  * XXX 1000 vs. 1024? --thorpej@netbsd.org
  */
 #define	IF_Kbps(x)	((uintmax_t)(x) * 1000)	/* kilobits/sec. */
 #define	IF_Mbps(x)	(IF_Kbps((x) * 1000))	/* megabits/sec. */
 #define	IF_Gbps(x)	(IF_Mbps((x) * 1000))	/* gigabits/sec. */
 
 /*
  * Capabilities that interfaces can advertise.
  *
  * struct ifnet.if_capabilities
  *   contains the optional features & capabilities a particular interface
  *   supports (not only the driver but also the detected hw revision).
  *   Capabilities are defined by IFCAP_* below.
  * struct ifnet.if_capenable
  *   contains the enabled (either by default or through ifconfig) optional
  *   features & capabilities on this interface.
  *   Capabilities are defined by IFCAP_* below.
  * struct if_data.ifi_hwassist in mbuf CSUM_ flag form, controlled by above
  *   contains the enabled optional feature & capabilites that can be used
  *   individually per packet and are specified in the mbuf pkthdr.csum_flags
  *   field.  IFCAP_* and CSUM_* do not match one to one and CSUM_* may be
  *   more detailed or differenciated than IFCAP_*.
  *   Hwassist features are defined CSUM_* in sys/mbuf.h
  *
  * Capabilities that cannot be arbitrarily changed with ifconfig/ioctl
  * are listed in IFCAP_CANTCHANGE, similar to IFF_CANTCHANGE.
  * This is not strictly necessary because the common code never
  * changes capabilities, and it is left to the individual driver
  * to do the right thing. However, having the filter here
  * avoids replication of the same code in all individual drivers.
  */
 #define	IFCAP_RXCSUM		0x00001  /* can offload checksum on RX */
 #define	IFCAP_TXCSUM		0x00002  /* can offload checksum on TX */
 #define	IFCAP_NETCONS		0x00004  /* can be a network console */
 #define	IFCAP_VLAN_MTU		0x00008	/* VLAN-compatible MTU */
 #define	IFCAP_VLAN_HWTAGGING	0x00010	/* hardware VLAN tag support */
 #define	IFCAP_JUMBO_MTU		0x00020	/* 9000 byte MTU supported */
 #define	IFCAP_POLLING		0x00040	/* driver supports polling */
 #define	IFCAP_VLAN_HWCSUM	0x00080	/* can do IFCAP_HWCSUM on VLANs */
 #define	IFCAP_TSO4		0x00100	/* can do TCP Segmentation Offload */
 #define	IFCAP_TSO6		0x00200	/* can do TCP6 Segmentation Offload */
 #define	IFCAP_LRO		0x00400	/* can do Large Receive Offload */
 #define	IFCAP_WOL_UCAST		0x00800	/* wake on any unicast frame */
 #define	IFCAP_WOL_MCAST		0x01000	/* wake on any multicast frame */
 #define	IFCAP_WOL_MAGIC		0x02000	/* wake on any Magic Packet */
 #define	IFCAP_TOE4		0x04000	/* interface can offload TCP */
 #define	IFCAP_TOE6		0x08000	/* interface can offload TCP6 */
 #define	IFCAP_VLAN_HWFILTER	0x10000 /* interface hw can filter vlan tag */
 /* 	available		0x20000 */
 #define	IFCAP_VLAN_HWTSO	0x40000 /* can do IFCAP_TSO on VLANs */
 #define	IFCAP_LINKSTATE		0x80000 /* the runtime link state is dynamic */
 #define	IFCAP_NETMAP		0x100000 /* netmap mode supported/enabled */
 #define	IFCAP_RXCSUM_IPV6	0x200000  /* can offload checksum on IPv6 RX */
 #define	IFCAP_TXCSUM_IPV6	0x400000  /* can offload checksum on IPv6 TX */
 #define	IFCAP_HWSTATS		0x800000 /* manages counters internally */
 #define	IFCAP_TXRTLMT		0x1000000 /* hardware supports TX rate limiting */
 #define	IFCAP_HWRXTSTMP		0x2000000 /* hardware rx timestamping */
 #define	IFCAP_NOMAP		0x4000000 /* can TX unmapped mbufs */
 #define	IFCAP_TXTLS4		0x8000000 /* can do TLS encryption and segmentation for TCP */
 #define	IFCAP_TXTLS6		0x10000000 /* can do TLS encryption and segmentation for TCP6 */
 
 #define IFCAP_HWCSUM_IPV6	(IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
 
 #define IFCAP_HWCSUM	(IFCAP_RXCSUM | IFCAP_TXCSUM)
 #define	IFCAP_TSO	(IFCAP_TSO4 | IFCAP_TSO6)
 #define	IFCAP_WOL	(IFCAP_WOL_UCAST | IFCAP_WOL_MCAST | IFCAP_WOL_MAGIC)
 #define	IFCAP_TOE	(IFCAP_TOE4 | IFCAP_TOE6)
 #define	IFCAP_TXTLS	(IFCAP_TXTLS4 | IFCAP_TXTLS6)
 
 #define	IFCAP_CANTCHANGE	(IFCAP_NETMAP)
 
 #define	IFQ_MAXLEN	50
 #define	IFNET_SLOWHZ	1		/* granularity is 1 second */
 
 /*
  * Message format for use in obtaining information about interfaces
  * from getkerninfo and the routing socket
  * For the new, extensible interface see struct if_msghdrl below.
  */
 struct if_msghdr {
 	u_short	ifm_msglen;	/* to skip over non-understood messages */
 	u_char	ifm_version;	/* future binary compatibility */
 	u_char	ifm_type;	/* message type */
 	int	ifm_addrs;	/* like rtm_addrs */
 	int	ifm_flags;	/* value of if_flags */
 	u_short	ifm_index;	/* index for associated ifp */
 	u_short	_ifm_spare1;
 	struct	if_data ifm_data;/* statistics and other data about if */
 };
 
 /*
  * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL.  It is
  * extensible after ifm_data_off or within ifm_data.  Both the if_msghdr and
  * if_data now have a member field detailing the struct length in addition to
  * the routing message length.  Macros are provided to find the start of
  * ifm_data and the start of the socket address strucutres immediately following
  * struct if_msghdrl given a pointer to struct if_msghdrl.
  */
 #define	IF_MSGHDRL_IFM_DATA(_l) \
     (struct if_data *)((char *)(_l) + (_l)->ifm_data_off)
 #define	IF_MSGHDRL_RTA(_l) \
     (void *)((uintptr_t)(_l) + (_l)->ifm_len)
 struct if_msghdrl {
 	u_short	ifm_msglen;	/* to skip over non-understood messages */
 	u_char	ifm_version;	/* future binary compatibility */
 	u_char	ifm_type;	/* message type */
 	int	ifm_addrs;	/* like rtm_addrs */
 	int	ifm_flags;	/* value of if_flags */
 	u_short	ifm_index;	/* index for associated ifp */
 	u_short _ifm_spare1;	/* spare space to grow if_index, see if_var.h */
 	u_short	ifm_len;	/* length of if_msghdrl incl. if_data */
 	u_short	ifm_data_off;	/* offset of if_data from beginning */
 	int	_ifm_spare2;
 	struct	if_data ifm_data;/* statistics and other data about if */
 };
 
 /*
  * Message format for use in obtaining information about interface addresses
  * from getkerninfo and the routing socket
  * For the new, extensible interface see struct ifa_msghdrl below.
  */
 struct ifa_msghdr {
 	u_short	ifam_msglen;	/* to skip over non-understood messages */
 	u_char	ifam_version;	/* future binary compatibility */
 	u_char	ifam_type;	/* message type */
 	int	ifam_addrs;	/* like rtm_addrs */
 	int	ifam_flags;	/* value of ifa_flags */
 	u_short	ifam_index;	/* index for associated ifp */
 	u_short	_ifam_spare1;
 	int	ifam_metric;	/* value of ifa_ifp->if_metric */
 };
 
 /*
  * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL.  It is
  * extensible after ifam_metric or within ifam_data.  Both the ifa_msghdrl and
  * if_data now have a member field detailing the struct length in addition to
  * the routing message length.  Macros are provided to find the start of
  * ifm_data and the start of the socket address strucutres immediately following
  * struct ifa_msghdrl given a pointer to struct ifa_msghdrl.
  */
 #define	IFA_MSGHDRL_IFAM_DATA(_l) \
     (struct if_data *)((char *)(_l) + (_l)->ifam_data_off)
 #define	IFA_MSGHDRL_RTA(_l) \
     (void *)((uintptr_t)(_l) + (_l)->ifam_len)
 struct ifa_msghdrl {
 	u_short	ifam_msglen;	/* to skip over non-understood messages */
 	u_char	ifam_version;	/* future binary compatibility */
 	u_char	ifam_type;	/* message type */
 	int	ifam_addrs;	/* like rtm_addrs */
 	int	ifam_flags;	/* value of ifa_flags */
 	u_short	ifam_index;	/* index for associated ifp */
 	u_short _ifam_spare1;	/* spare space to grow if_index, see if_var.h */
 	u_short	ifam_len;	/* length of ifa_msghdrl incl. if_data */
 	u_short	ifam_data_off;	/* offset of if_data from beginning */
 	int	ifam_metric;	/* value of ifa_ifp->if_metric */
 	struct	if_data ifam_data;/* statistics and other data about if or
 				 * address */
 };
 
 /*
  * Message format for use in obtaining information about multicast addresses
  * from the routing socket
  */
 struct ifma_msghdr {
 	u_short	ifmam_msglen;	/* to skip over non-understood messages */
 	u_char	ifmam_version;	/* future binary compatibility */
 	u_char	ifmam_type;	/* message type */
 	int	ifmam_addrs;	/* like rtm_addrs */
 	int	ifmam_flags;	/* value of ifa_flags */
 	u_short	ifmam_index;	/* index for associated ifp */
 	u_short	_ifmam_spare1;
 };
 
 /*
  * Message format announcing the arrival or departure of a network interface.
  */
 struct if_announcemsghdr {
 	u_short	ifan_msglen;	/* to skip over non-understood messages */
 	u_char	ifan_version;	/* future binary compatibility */
 	u_char	ifan_type;	/* message type */
 	u_short	ifan_index;	/* index for associated ifp */
 	char	ifan_name[IFNAMSIZ]; /* if name, e.g. "en0" */
 	u_short	ifan_what;	/* what type of announcement */
 };
 
 #define	IFAN_ARRIVAL	0	/* interface arrival */
 #define	IFAN_DEPARTURE	1	/* interface departure */
 
 /*
  * Buffer with length to be used in SIOCGIFDESCR/SIOCSIFDESCR requests
  */
 struct ifreq_buffer {
 	size_t	length;
 	void	*buffer;
 };
 
 /*
  * Interface request structure used for socket
  * ioctl's.  All interface ioctl's must have parameter
  * definitions which begin with ifr_name.  The
  * remainder may be interface specific.
  */
 struct	ifreq {
 	char	ifr_name[IFNAMSIZ];		/* if name, e.g. "en0" */
 	union {
 		struct	sockaddr ifru_addr;
 		struct	sockaddr ifru_dstaddr;
 		struct	sockaddr ifru_broadaddr;
 		struct	ifreq_buffer ifru_buffer;
 		short	ifru_flags[2];
 		short	ifru_index;
 		int	ifru_jid;
 		int	ifru_metric;
 		int	ifru_mtu;
 		int	ifru_phys;
 		int	ifru_media;
 		caddr_t	ifru_data;
 		int	ifru_cap[2];
 		u_int	ifru_fib;
 		u_char	ifru_vlan_pcp;
 	} ifr_ifru;
 #define	ifr_addr	ifr_ifru.ifru_addr	/* address */
 #define	ifr_dstaddr	ifr_ifru.ifru_dstaddr	/* other end of p-to-p link */
 #define	ifr_broadaddr	ifr_ifru.ifru_broadaddr	/* broadcast address */
 #ifndef _KERNEL
 #define	ifr_buffer	ifr_ifru.ifru_buffer	/* user supplied buffer with its length */
 #endif
 #define	ifr_flags	ifr_ifru.ifru_flags[0]	/* flags (low 16 bits) */
 #define	ifr_flagshigh	ifr_ifru.ifru_flags[1]	/* flags (high 16 bits) */
 #define	ifr_jid		ifr_ifru.ifru_jid	/* jail/vnet */
 #define	ifr_metric	ifr_ifru.ifru_metric	/* metric */
 #define	ifr_mtu		ifr_ifru.ifru_mtu	/* mtu */
 #define ifr_phys	ifr_ifru.ifru_phys	/* physical wire */
 #define ifr_media	ifr_ifru.ifru_media	/* physical media */
 #ifndef _KERNEL
 #define	ifr_data	ifr_ifru.ifru_data	/* for use by interface */
 #endif
 #define	ifr_reqcap	ifr_ifru.ifru_cap[0]	/* requested capabilities */
 #define	ifr_curcap	ifr_ifru.ifru_cap[1]	/* current capabilities */
 #define	ifr_index	ifr_ifru.ifru_index	/* interface index */
 #define	ifr_fib		ifr_ifru.ifru_fib	/* interface fib */
 #define	ifr_vlan_pcp	ifr_ifru.ifru_vlan_pcp	/* VLAN priority */
 #define	ifr_lan_pcp	ifr_ifru.ifru_vlan_pcp	/* VLAN priority */
 };
 
 #define	_SIZEOF_ADDR_IFREQ(ifr) \
 	((ifr).ifr_addr.sa_len > sizeof(struct sockaddr) ? \
 	 (sizeof(struct ifreq) - sizeof(struct sockaddr) + \
 	  (ifr).ifr_addr.sa_len) : sizeof(struct ifreq))
 
 struct ifaliasreq {
 	char	ifra_name[IFNAMSIZ];		/* if name, e.g. "en0" */
 	struct	sockaddr ifra_addr;
 	struct	sockaddr ifra_broadaddr;
 	struct	sockaddr ifra_mask;
 	int	ifra_vhid;
 };
 
 /* 9.x compat */
 struct oifaliasreq {
 	char	ifra_name[IFNAMSIZ];
 	struct	sockaddr ifra_addr;
 	struct	sockaddr ifra_broadaddr;
 	struct	sockaddr ifra_mask;
 };
 
 struct ifmediareq {
 	char	ifm_name[IFNAMSIZ];	/* if name, e.g. "en0" */
 	int	ifm_current;		/* current media options */
 	int	ifm_mask;		/* don't care mask */
 	int	ifm_status;		/* media status */
 	int	ifm_active;		/* active options */
 	int	ifm_count;		/* # entries in ifm_ulist array */
 	int	*ifm_ulist;		/* media words */
 };
 
 struct  ifdrv {
 	char            ifd_name[IFNAMSIZ];     /* if name, e.g. "en0" */
 	unsigned long   ifd_cmd;
 	size_t          ifd_len;
 	void            *ifd_data;
 };
 
 /* 
  * Structure used to retrieve aux status data from interfaces.
  * Kernel suppliers to this interface should respect the formatting
  * needed by ifconfig(8): each line starts with a TAB and ends with
  * a newline.  The canonical example to copy and paste is in if_tun.c.
  */
 
 #define	IFSTATMAX	800		/* 10 lines of text */
 struct ifstat {
 	char	ifs_name[IFNAMSIZ];	/* if name, e.g. "en0" */
 	char	ascii[IFSTATMAX + 1];
 };
 
 /*
  * Structure used in SIOCGIFCONF request.
  * Used to retrieve interface configuration
  * for machine (useful for programs which
  * must know all networks accessible).
  */
 struct	ifconf {
 	int	ifc_len;		/* size of associated buffer */
 	union {
 		caddr_t	ifcu_buf;
 		struct	ifreq *ifcu_req;
 	} ifc_ifcu;
 #define	ifc_buf	ifc_ifcu.ifcu_buf	/* buffer address */
 #define	ifc_req	ifc_ifcu.ifcu_req	/* array of structures returned */
 };
 
 /*
  * interface groups
  */
 
 #define	IFG_ALL		"all"		/* group contains all interfaces */
 /* XXX: will we implement this? */
 #define	IFG_EGRESS	"egress"	/* if(s) default route(s) point to */
 
 struct ifg_req {
 	union {
 		char			 ifgrqu_group[IFNAMSIZ];
 		char			 ifgrqu_member[IFNAMSIZ];
 	} ifgrq_ifgrqu;
 #define	ifgrq_group	ifgrq_ifgrqu.ifgrqu_group
 #define	ifgrq_member	ifgrq_ifgrqu.ifgrqu_member
 };
 
 /*
  * Used to lookup groups for an interface
  */
 struct ifgroupreq {
 	char	ifgr_name[IFNAMSIZ];
 	u_int	ifgr_len;
 	union {
 		char	ifgru_group[IFNAMSIZ];
 		struct	ifg_req *ifgru_groups;
 	} ifgr_ifgru;
 #ifndef _KERNEL
 #define ifgr_group	ifgr_ifgru.ifgru_group
 #define ifgr_groups	ifgr_ifgru.ifgru_groups
 #endif
 };
 
 /*
  * Structure used to request i2c data
  * from interface transceivers.
  */
 struct ifi2creq {
 	uint8_t dev_addr;	/* i2c address (0xA0, 0xA2) */
 	uint8_t offset;		/* read offset */
 	uint8_t len;		/* read length */
 	uint8_t spare0;
 	uint32_t spare1;
 	uint8_t data[8];	/* read buffer */
 }; 
 
 /*
  * RSS hash.
  */
 
 #define	RSS_FUNC_NONE		0		/* RSS disabled */
 #define	RSS_FUNC_PRIVATE	1		/* non-standard */
 #define	RSS_FUNC_TOEPLITZ	2
 
 #define	RSS_TYPE_IPV4		0x00000001
 #define	RSS_TYPE_TCP_IPV4	0x00000002
 #define	RSS_TYPE_IPV6		0x00000004
 #define	RSS_TYPE_IPV6_EX	0x00000008
 #define	RSS_TYPE_TCP_IPV6	0x00000010
 #define	RSS_TYPE_TCP_IPV6_EX	0x00000020
 #define	RSS_TYPE_UDP_IPV4	0x00000040
 #define	RSS_TYPE_UDP_IPV6	0x00000080
 #define	RSS_TYPE_UDP_IPV6_EX	0x00000100
 
 #define	RSS_KEYLEN		128
 
 struct ifrsskey {
 	char		ifrk_name[IFNAMSIZ];	/* if name, e.g. "en0" */
 	uint8_t		ifrk_func;		/* RSS_FUNC_ */
 	uint8_t		ifrk_spare0;
 	uint16_t	ifrk_keylen;
 	uint8_t		ifrk_key[RSS_KEYLEN];
 };
 
 struct ifrsshash {
 	char		ifrh_name[IFNAMSIZ];	/* if name, e.g. "en0" */
 	uint8_t		ifrh_func;		/* RSS_FUNC_ */
 	uint8_t		ifrh_spare0;
 	uint16_t	ifrh_spare1;
 	uint32_t	ifrh_types;		/* RSS_TYPE_ */
 };
 
 #define	IFNET_PCP_NONE	0xff	/* PCP disabled */
 
+#define	IFDR_MSG_SIZE		64
+#define	IFDR_REASON_MSG		1
+#define	IFDR_REASON_VENDOR	2
+struct ifdownreason {
+	char		ifdr_name[IFNAMSIZ];
+	uint32_t	ifdr_reason;
+	uint32_t	ifdr_vendor;
+	char		ifdr_msg[IFDR_MSG_SIZE];
+};
+
 #endif /* __BSD_VISIBLE */
 
 #ifdef _KERNEL
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_IFADDR);
 MALLOC_DECLARE(M_IFMADDR);
 #endif
 #endif
 
 #ifndef _KERNEL
 struct if_nameindex {
 	unsigned int	if_index;	/* 1, 2, ... */
 	char		*if_name;	/* null terminated name: "le0", ... */
 };
 
 __BEGIN_DECLS
 void			 if_freenameindex(struct if_nameindex *);
 char			*if_indextoname(unsigned int, char *);
 struct if_nameindex	*if_nameindex(void);
 unsigned int		 if_nametoindex(const char *);
 __END_DECLS
 #endif
 #endif /* !_NET_IF_H_ */
Index: projects/clang900-import/sys/netinet/sctp_auth.c
===================================================================
--- projects/clang900-import/sys/netinet/sctp_auth.c	(revision 352536)
+++ projects/clang900-import/sys/netinet/sctp_auth.c	(revision 352537)
@@ -1,2011 +1,2011 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
  * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
  * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * a) Redistributions of source code must retain the above copyright notice,
  *    this list of conditions and the following disclaimer.
  *
  * b) Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the distribution.
  *
  * c) Neither the name of Cisco Systems, Inc. nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp.h>
 #include <netinet/sctp_header.h>
 #include <netinet/sctp_pcb.h>
 #include <netinet/sctp_var.h>
 #include <netinet/sctp_sysctl.h>
 #include <netinet/sctputil.h>
 #include <netinet/sctp_indata.h>
 #include <netinet/sctp_output.h>
 #include <netinet/sctp_auth.h>
 
 #ifdef SCTP_DEBUG
 #define SCTP_AUTH_DEBUG		(SCTP_BASE_SYSCTL(sctp_debug_on) & SCTP_DEBUG_AUTH1)
 #define SCTP_AUTH_DEBUG2	(SCTP_BASE_SYSCTL(sctp_debug_on) & SCTP_DEBUG_AUTH2)
 #endif				/* SCTP_DEBUG */
 
 
 void
 sctp_clear_chunklist(sctp_auth_chklist_t *chklist)
 {
 	memset(chklist, 0, sizeof(*chklist));
 	/* chklist->num_chunks = 0; */
 }
 
 sctp_auth_chklist_t *
 sctp_alloc_chunklist(void)
 {
 	sctp_auth_chklist_t *chklist;
 
 	SCTP_MALLOC(chklist, sctp_auth_chklist_t *, sizeof(*chklist),
 	    SCTP_M_AUTH_CL);
 	if (chklist == NULL) {
 		SCTPDBG(SCTP_DEBUG_AUTH1, "sctp_alloc_chunklist: failed to get memory!\n");
 	} else {
 		sctp_clear_chunklist(chklist);
 	}
 	return (chklist);
 }
 
 void
 sctp_free_chunklist(sctp_auth_chklist_t *list)
 {
 	if (list != NULL)
 		SCTP_FREE(list, SCTP_M_AUTH_CL);
 }
 
 sctp_auth_chklist_t *
 sctp_copy_chunklist(sctp_auth_chklist_t *list)
 {
 	sctp_auth_chklist_t *new_list;
 
 	if (list == NULL)
 		return (NULL);
 
 	/* get a new list */
 	new_list = sctp_alloc_chunklist();
 	if (new_list == NULL)
 		return (NULL);
 	/* copy it */
 	memcpy(new_list, list, sizeof(*new_list));
 
 	return (new_list);
 }
 
 
 /*
  * add a chunk to the required chunks list
  */
 int
 sctp_auth_add_chunk(uint8_t chunk, sctp_auth_chklist_t *list)
 {
 	if (list == NULL)
 		return (-1);
 
 	/* is chunk restricted? */
 	if ((chunk == SCTP_INITIATION) ||
 	    (chunk == SCTP_INITIATION_ACK) ||
 	    (chunk == SCTP_SHUTDOWN_COMPLETE) ||
 	    (chunk == SCTP_AUTHENTICATION)) {
 		return (-1);
 	}
 	if (list->chunks[chunk] == 0) {
 		list->chunks[chunk] = 1;
 		list->num_chunks++;
 		SCTPDBG(SCTP_DEBUG_AUTH1,
 		    "SCTP: added chunk %u (0x%02x) to Auth list\n",
 		    chunk, chunk);
 	}
 	return (0);
 }
 
 /*
  * delete a chunk from the required chunks list
  */
 int
 sctp_auth_delete_chunk(uint8_t chunk, sctp_auth_chklist_t *list)
 {
 	if (list == NULL)
 		return (-1);
 
 	if (list->chunks[chunk] == 1) {
 		list->chunks[chunk] = 0;
 		list->num_chunks--;
 		SCTPDBG(SCTP_DEBUG_AUTH1,
 		    "SCTP: deleted chunk %u (0x%02x) from Auth list\n",
 		    chunk, chunk);
 	}
 	return (0);
 }
 
 size_t
 sctp_auth_get_chklist_size(const sctp_auth_chklist_t *list)
 {
 	if (list == NULL)
 		return (0);
 	else
 		return (list->num_chunks);
 }
 
 /*
  * return the current number and list of required chunks caller must
  * guarantee ptr has space for up to 256 bytes
  */
 int
 sctp_serialize_auth_chunks(const sctp_auth_chklist_t *list, uint8_t *ptr)
 {
 	int i, count = 0;
 
 	if (list == NULL)
 		return (0);
 
 	for (i = 0; i < 256; i++) {
 		if (list->chunks[i] != 0) {
 			*ptr++ = i;
 			count++;
 		}
 	}
 	return (count);
 }
 
 int
 sctp_pack_auth_chunks(const sctp_auth_chklist_t *list, uint8_t *ptr)
 {
 	int i, size = 0;
 
 	if (list == NULL)
 		return (0);
 
 	if (list->num_chunks <= 32) {
 		/* just list them, one byte each */
 		for (i = 0; i < 256; i++) {
 			if (list->chunks[i] != 0) {
 				*ptr++ = i;
 				size++;
 			}
 		}
 	} else {
 		int index, offset;
 
 		/* pack into a 32 byte bitfield */
 		for (i = 0; i < 256; i++) {
 			if (list->chunks[i] != 0) {
 				index = i / 8;
 				offset = i % 8;
 				ptr[index] |= (1 << offset);
 			}
 		}
 		size = 32;
 	}
 	return (size);
 }
 
 int
 sctp_unpack_auth_chunks(const uint8_t *ptr, uint8_t num_chunks,
     sctp_auth_chklist_t *list)
 {
 	int i;
 	int size;
 
 	if (list == NULL)
 		return (0);
 
 	if (num_chunks <= 32) {
 		/* just pull them, one byte each */
 		for (i = 0; i < num_chunks; i++) {
 			(void)sctp_auth_add_chunk(*ptr++, list);
 		}
 		size = num_chunks;
 	} else {
 		int index, offset;
 
 		/* unpack from a 32 byte bitfield */
 		for (index = 0; index < 32; index++) {
 			for (offset = 0; offset < 8; offset++) {
 				if (ptr[index] & (1 << offset)) {
 					(void)sctp_auth_add_chunk((index * 8) + offset, list);
 				}
 			}
 		}
 		size = 32;
 	}
 	return (size);
 }
 
 
 /*
  * allocate structure space for a key of length keylen
  */
 sctp_key_t *
 sctp_alloc_key(uint32_t keylen)
 {
 	sctp_key_t *new_key;
 
 	SCTP_MALLOC(new_key, sctp_key_t *, sizeof(*new_key) + keylen,
 	    SCTP_M_AUTH_KY);
 	if (new_key == NULL) {
 		/* out of memory */
 		return (NULL);
 	}
 	new_key->keylen = keylen;
 	return (new_key);
 }
 
 void
 sctp_free_key(sctp_key_t *key)
 {
 	if (key != NULL)
 		SCTP_FREE(key, SCTP_M_AUTH_KY);
 }
 
 void
 sctp_print_key(sctp_key_t *key, const char *str)
 {
 	uint32_t i;
 
 	if (key == NULL) {
 		SCTP_PRINTF("%s: [Null key]\n", str);
 		return;
 	}
 	SCTP_PRINTF("%s: len %u, ", str, key->keylen);
 	if (key->keylen) {
 		for (i = 0; i < key->keylen; i++)
 			SCTP_PRINTF("%02x", key->key[i]);
 		SCTP_PRINTF("\n");
 	} else {
 		SCTP_PRINTF("[Null key]\n");
 	}
 }
 
 void
 sctp_show_key(sctp_key_t *key, const char *str)
 {
 	uint32_t i;
 
 	if (key == NULL) {
 		SCTP_PRINTF("%s: [Null key]\n", str);
 		return;
 	}
 	SCTP_PRINTF("%s: len %u, ", str, key->keylen);
 	if (key->keylen) {
 		for (i = 0; i < key->keylen; i++)
 			SCTP_PRINTF("%02x", key->key[i]);
 		SCTP_PRINTF("\n");
 	} else {
 		SCTP_PRINTF("[Null key]\n");
 	}
 }
 
 static uint32_t
 sctp_get_keylen(sctp_key_t *key)
 {
 	if (key != NULL)
 		return (key->keylen);
 	else
 		return (0);
 }
 
 /*
  * generate a new random key of length 'keylen'
  */
 sctp_key_t *
 sctp_generate_random_key(uint32_t keylen)
 {
 	sctp_key_t *new_key;
 
 	new_key = sctp_alloc_key(keylen);
 	if (new_key == NULL) {
 		/* out of memory */
 		return (NULL);
 	}
 	SCTP_READ_RANDOM(new_key->key, keylen);
 	new_key->keylen = keylen;
 	return (new_key);
 }
 
 sctp_key_t *
 sctp_set_key(uint8_t *key, uint32_t keylen)
 {
 	sctp_key_t *new_key;
 
 	new_key = sctp_alloc_key(keylen);
 	if (new_key == NULL) {
 		/* out of memory */
 		return (NULL);
 	}
 	memcpy(new_key->key, key, keylen);
 	return (new_key);
 }
 
 /*-
  * given two keys of variable size, compute which key is "larger/smaller"
  * returns:  1 if key1 > key2
  *          -1 if key1 < key2
  *           0 if key1 = key2
  */
 static int
 sctp_compare_key(sctp_key_t *key1, sctp_key_t *key2)
 {
 	uint32_t maxlen;
 	uint32_t i;
 	uint32_t key1len, key2len;
 	uint8_t *key_1, *key_2;
 	uint8_t val1, val2;
 
 	/* sanity/length check */
 	key1len = sctp_get_keylen(key1);
 	key2len = sctp_get_keylen(key2);
 	if ((key1len == 0) && (key2len == 0))
 		return (0);
 	else if (key1len == 0)
 		return (-1);
 	else if (key2len == 0)
 		return (1);
 
 	if (key1len < key2len) {
 		maxlen = key2len;
 	} else {
 		maxlen = key1len;
 	}
 	key_1 = key1->key;
 	key_2 = key2->key;
 	/* check for numeric equality */
 	for (i = 0; i < maxlen; i++) {
 		/* left-pad with zeros */
 		val1 = (i < (maxlen - key1len)) ? 0 : *(key_1++);
 		val2 = (i < (maxlen - key2len)) ? 0 : *(key_2++);
 		if (val1 > val2) {
 			return (1);
 		} else if (val1 < val2) {
 			return (-1);
 		}
 	}
 	/* keys are equal value, so check lengths */
 	if (key1len == key2len)
 		return (0);
 	else if (key1len < key2len)
 		return (-1);
 	else
 		return (1);
 }
 
 /*
  * generate the concatenated keying material based on the two keys and the
  * shared key (if available). draft-ietf-tsvwg-auth specifies the specific
  * order for concatenation
  */
 sctp_key_t *
 sctp_compute_hashkey(sctp_key_t *key1, sctp_key_t *key2, sctp_key_t *shared)
 {
 	uint32_t keylen;
 	sctp_key_t *new_key;
 	uint8_t *key_ptr;
 
 	keylen = sctp_get_keylen(key1) + sctp_get_keylen(key2) +
 	    sctp_get_keylen(shared);
 
 	if (keylen > 0) {
 		/* get space for the new key */
 		new_key = sctp_alloc_key(keylen);
 		if (new_key == NULL) {
 			/* out of memory */
 			return (NULL);
 		}
 		new_key->keylen = keylen;
 		key_ptr = new_key->key;
 	} else {
 		/* all keys empty/null?! */
 		return (NULL);
 	}
 
 	/* concatenate the keys */
 	if (sctp_compare_key(key1, key2) <= 0) {
 		/* key is shared + key1 + key2 */
 		if (sctp_get_keylen(shared)) {
 			memcpy(key_ptr, shared->key, shared->keylen);
 			key_ptr += shared->keylen;
 		}
 		if (sctp_get_keylen(key1)) {
 			memcpy(key_ptr, key1->key, key1->keylen);
 			key_ptr += key1->keylen;
 		}
 		if (sctp_get_keylen(key2)) {
 			memcpy(key_ptr, key2->key, key2->keylen);
 		}
 	} else {
 		/* key is shared + key2 + key1 */
 		if (sctp_get_keylen(shared)) {
 			memcpy(key_ptr, shared->key, shared->keylen);
 			key_ptr += shared->keylen;
 		}
 		if (sctp_get_keylen(key2)) {
 			memcpy(key_ptr, key2->key, key2->keylen);
 			key_ptr += key2->keylen;
 		}
 		if (sctp_get_keylen(key1)) {
 			memcpy(key_ptr, key1->key, key1->keylen);
 		}
 	}
 	return (new_key);
 }
 
 
 sctp_sharedkey_t *
 sctp_alloc_sharedkey(void)
 {
 	sctp_sharedkey_t *new_key;
 
 	SCTP_MALLOC(new_key, sctp_sharedkey_t *, sizeof(*new_key),
 	    SCTP_M_AUTH_KY);
 	if (new_key == NULL) {
 		/* out of memory */
 		return (NULL);
 	}
 	new_key->keyid = 0;
 	new_key->key = NULL;
 	new_key->refcount = 1;
 	new_key->deactivated = 0;
 	return (new_key);
 }
 
 void
 sctp_free_sharedkey(sctp_sharedkey_t *skey)
 {
 	if (skey == NULL)
 		return;
 
 	if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&skey->refcount)) {
 		if (skey->key != NULL)
 			sctp_free_key(skey->key);
 		SCTP_FREE(skey, SCTP_M_AUTH_KY);
 	}
 }
 
 sctp_sharedkey_t *
 sctp_find_sharedkey(struct sctp_keyhead *shared_keys, uint16_t key_id)
 {
 	sctp_sharedkey_t *skey;
 
 	LIST_FOREACH(skey, shared_keys, next) {
 		if (skey->keyid == key_id)
 			return (skey);
 	}
 	return (NULL);
 }
 
 int
 sctp_insert_sharedkey(struct sctp_keyhead *shared_keys,
     sctp_sharedkey_t *new_skey)
 {
 	sctp_sharedkey_t *skey;
 
 	if ((shared_keys == NULL) || (new_skey == NULL))
 		return (EINVAL);
 
 	/* insert into an empty list? */
 	if (LIST_EMPTY(shared_keys)) {
 		LIST_INSERT_HEAD(shared_keys, new_skey, next);
 		return (0);
 	}
 	/* insert into the existing list, ordered by key id */
 	LIST_FOREACH(skey, shared_keys, next) {
 		if (new_skey->keyid < skey->keyid) {
 			/* insert it before here */
 			LIST_INSERT_BEFORE(skey, new_skey, next);
 			return (0);
 		} else if (new_skey->keyid == skey->keyid) {
 			/* replace the existing key */
 			/* verify this key *can* be replaced */
-			if ((skey->deactivated) && (skey->refcount > 1)) {
+			if ((skey->deactivated) || (skey->refcount > 1)) {
 				SCTPDBG(SCTP_DEBUG_AUTH1,
 				    "can't replace shared key id %u\n",
 				    new_skey->keyid);
 				return (EBUSY);
 			}
 			SCTPDBG(SCTP_DEBUG_AUTH1,
 			    "replacing shared key id %u\n",
 			    new_skey->keyid);
 			LIST_INSERT_BEFORE(skey, new_skey, next);
 			LIST_REMOVE(skey, next);
 			sctp_free_sharedkey(skey);
 			return (0);
 		}
 		if (LIST_NEXT(skey, next) == NULL) {
 			/* belongs at the end of the list */
 			LIST_INSERT_AFTER(skey, new_skey, next);
 			return (0);
 		}
 	}
 	/* shouldn't reach here */
 	return (EINVAL);
 }
 
 void
 sctp_auth_key_acquire(struct sctp_tcb *stcb, uint16_t key_id)
 {
 	sctp_sharedkey_t *skey;
 
 	/* find the shared key */
 	skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, key_id);
 
 	/* bump the ref count */
 	if (skey) {
 		atomic_add_int(&skey->refcount, 1);
 		SCTPDBG(SCTP_DEBUG_AUTH2,
 		    "%s: stcb %p key %u refcount acquire to %d\n",
 		    __func__, (void *)stcb, key_id, skey->refcount);
 	}
 }
 
 void
 sctp_auth_key_release(struct sctp_tcb *stcb, uint16_t key_id, int so_locked
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     SCTP_UNUSED
 #endif
 )
 {
 	sctp_sharedkey_t *skey;
 
 	/* find the shared key */
 	skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, key_id);
 
 	/* decrement the ref count */
 	if (skey) {
 		SCTPDBG(SCTP_DEBUG_AUTH2,
 		    "%s: stcb %p key %u refcount release to %d\n",
 		    __func__, (void *)stcb, key_id, skey->refcount);
 
 		/* see if a notification should be generated */
 		if ((skey->refcount <= 2) && (skey->deactivated)) {
 			/* notify ULP that key is no longer used */
 			sctp_ulp_notify(SCTP_NOTIFY_AUTH_FREE_KEY, stcb,
 			    key_id, 0, so_locked);
 			SCTPDBG(SCTP_DEBUG_AUTH2,
 			    "%s: stcb %p key %u no longer used, %d\n",
 			    __func__, (void *)stcb, key_id, skey->refcount);
 		}
 		sctp_free_sharedkey(skey);
 	}
 }
 
 static sctp_sharedkey_t *
 sctp_copy_sharedkey(const sctp_sharedkey_t *skey)
 {
 	sctp_sharedkey_t *new_skey;
 
 	if (skey == NULL)
 		return (NULL);
 	new_skey = sctp_alloc_sharedkey();
 	if (new_skey == NULL)
 		return (NULL);
 	if (skey->key != NULL)
 		new_skey->key = sctp_set_key(skey->key->key, skey->key->keylen);
 	else
 		new_skey->key = NULL;
 	new_skey->keyid = skey->keyid;
 	return (new_skey);
 }
 
 int
 sctp_copy_skeylist(const struct sctp_keyhead *src, struct sctp_keyhead *dest)
 {
 	sctp_sharedkey_t *skey, *new_skey;
 	int count = 0;
 
 	if ((src == NULL) || (dest == NULL))
 		return (0);
 	LIST_FOREACH(skey, src, next) {
 		new_skey = sctp_copy_sharedkey(skey);
 		if (new_skey != NULL) {
 			if (sctp_insert_sharedkey(dest, new_skey)) {
 				sctp_free_sharedkey(new_skey);
 			} else {
 				count++;
 			}
 		}
 	}
 	return (count);
 }
 
 
 sctp_hmaclist_t *
 sctp_alloc_hmaclist(uint16_t num_hmacs)
 {
 	sctp_hmaclist_t *new_list;
 	int alloc_size;
 
 	alloc_size = sizeof(*new_list) + num_hmacs * sizeof(new_list->hmac[0]);
 	SCTP_MALLOC(new_list, sctp_hmaclist_t *, alloc_size,
 	    SCTP_M_AUTH_HL);
 	if (new_list == NULL) {
 		/* out of memory */
 		return (NULL);
 	}
 	new_list->max_algo = num_hmacs;
 	new_list->num_algo = 0;
 	return (new_list);
 }
 
 void
 sctp_free_hmaclist(sctp_hmaclist_t *list)
 {
 	if (list != NULL) {
 		SCTP_FREE(list, SCTP_M_AUTH_HL);
 		list = NULL;
 	}
 }
 
 int
 sctp_auth_add_hmacid(sctp_hmaclist_t *list, uint16_t hmac_id)
 {
 	int i;
 
 	if (list == NULL)
 		return (-1);
 	if (list->num_algo == list->max_algo) {
 		SCTPDBG(SCTP_DEBUG_AUTH1,
 		    "SCTP: HMAC id list full, ignoring add %u\n", hmac_id);
 		return (-1);
 	}
 	if ((hmac_id != SCTP_AUTH_HMAC_ID_SHA1) &&
 	    (hmac_id != SCTP_AUTH_HMAC_ID_SHA256)) {
 		return (-1);
 	}
 	/* Now is it already in the list */
 	for (i = 0; i < list->num_algo; i++) {
 		if (list->hmac[i] == hmac_id) {
 			/* already in list */
 			return (-1);
 		}
 	}
 	SCTPDBG(SCTP_DEBUG_AUTH1, "SCTP: add HMAC id %u to list\n", hmac_id);
 	list->hmac[list->num_algo++] = hmac_id;
 	return (0);
 }
 
 sctp_hmaclist_t *
 sctp_copy_hmaclist(sctp_hmaclist_t *list)
 {
 	sctp_hmaclist_t *new_list;
 	int i;
 
 	if (list == NULL)
 		return (NULL);
 	/* get a new list */
 	new_list = sctp_alloc_hmaclist(list->max_algo);
 	if (new_list == NULL)
 		return (NULL);
 	/* copy it */
 	new_list->max_algo = list->max_algo;
 	new_list->num_algo = list->num_algo;
 	for (i = 0; i < list->num_algo; i++)
 		new_list->hmac[i] = list->hmac[i];
 	return (new_list);
 }
 
 sctp_hmaclist_t *
 sctp_default_supported_hmaclist(void)
 {
 	sctp_hmaclist_t *new_list;
 
 	new_list = sctp_alloc_hmaclist(2);
 	if (new_list == NULL)
 		return (NULL);
 	/* We prefer SHA256, so list it first */
 	(void)sctp_auth_add_hmacid(new_list, SCTP_AUTH_HMAC_ID_SHA256);
 	(void)sctp_auth_add_hmacid(new_list, SCTP_AUTH_HMAC_ID_SHA1);
 	return (new_list);
 }
 
 /*-
  * HMAC algos are listed in priority/preference order
  * find the best HMAC id to use for the peer based on local support
  */
 uint16_t
 sctp_negotiate_hmacid(sctp_hmaclist_t *peer, sctp_hmaclist_t *local)
 {
 	int i, j;
 
 	if ((local == NULL) || (peer == NULL))
 		return (SCTP_AUTH_HMAC_ID_RSVD);
 
 	for (i = 0; i < peer->num_algo; i++) {
 		for (j = 0; j < local->num_algo; j++) {
 			if (peer->hmac[i] == local->hmac[j]) {
 				/* found the "best" one */
 				SCTPDBG(SCTP_DEBUG_AUTH1,
 				    "SCTP: negotiated peer HMAC id %u\n",
 				    peer->hmac[i]);
 				return (peer->hmac[i]);
 			}
 		}
 	}
 	/* didn't find one! */
 	return (SCTP_AUTH_HMAC_ID_RSVD);
 }
 
 /*-
  * serialize the HMAC algo list and return space used
  * caller must guarantee ptr has appropriate space
  */
 int
 sctp_serialize_hmaclist(sctp_hmaclist_t *list, uint8_t *ptr)
 {
 	int i;
 	uint16_t hmac_id;
 
 	if (list == NULL)
 		return (0);
 
 	for (i = 0; i < list->num_algo; i++) {
 		hmac_id = htons(list->hmac[i]);
 		memcpy(ptr, &hmac_id, sizeof(hmac_id));
 		ptr += sizeof(hmac_id);
 	}
 	return (list->num_algo * sizeof(hmac_id));
 }
 
 int
 sctp_verify_hmac_param(struct sctp_auth_hmac_algo *hmacs, uint32_t num_hmacs)
 {
 	uint32_t i;
 
 	for (i = 0; i < num_hmacs; i++) {
 		if (ntohs(hmacs->hmac_ids[i]) == SCTP_AUTH_HMAC_ID_SHA1) {
 			return (0);
 		}
 	}
 	return (-1);
 }
 
 sctp_authinfo_t *
 sctp_alloc_authinfo(void)
 {
 	sctp_authinfo_t *new_authinfo;
 
 	SCTP_MALLOC(new_authinfo, sctp_authinfo_t *, sizeof(*new_authinfo),
 	    SCTP_M_AUTH_IF);
 
 	if (new_authinfo == NULL) {
 		/* out of memory */
 		return (NULL);
 	}
 	memset(new_authinfo, 0, sizeof(*new_authinfo));
 	return (new_authinfo);
 }
 
 void
 sctp_free_authinfo(sctp_authinfo_t *authinfo)
 {
 	if (authinfo == NULL)
 		return;
 
 	if (authinfo->random != NULL)
 		sctp_free_key(authinfo->random);
 	if (authinfo->peer_random != NULL)
 		sctp_free_key(authinfo->peer_random);
 	if (authinfo->assoc_key != NULL)
 		sctp_free_key(authinfo->assoc_key);
 	if (authinfo->recv_key != NULL)
 		sctp_free_key(authinfo->recv_key);
 
 	/* We are NOT dynamically allocating authinfo's right now... */
 	/* SCTP_FREE(authinfo, SCTP_M_AUTH_??); */
 }
 
 
 uint32_t
 sctp_get_auth_chunk_len(uint16_t hmac_algo)
 {
 	int size;
 
 	size = sizeof(struct sctp_auth_chunk) + sctp_get_hmac_digest_len(hmac_algo);
 	return (SCTP_SIZE32(size));
 }
 
 uint32_t
 sctp_get_hmac_digest_len(uint16_t hmac_algo)
 {
 	switch (hmac_algo) {
 	case SCTP_AUTH_HMAC_ID_SHA1:
 		return (SCTP_AUTH_DIGEST_LEN_SHA1);
 	case SCTP_AUTH_HMAC_ID_SHA256:
 		return (SCTP_AUTH_DIGEST_LEN_SHA256);
 	default:
 		/* unknown HMAC algorithm: can't do anything */
 		return (0);
 	}			/* end switch */
 }
 
 static inline int
 sctp_get_hmac_block_len(uint16_t hmac_algo)
 {
 	switch (hmac_algo) {
 	case SCTP_AUTH_HMAC_ID_SHA1:
 		return (64);
 	case SCTP_AUTH_HMAC_ID_SHA256:
 		return (64);
 	case SCTP_AUTH_HMAC_ID_RSVD:
 	default:
 		/* unknown HMAC algorithm: can't do anything */
 		return (0);
 	}			/* end switch */
 }
 
 static void
 sctp_hmac_init(uint16_t hmac_algo, sctp_hash_context_t *ctx)
 {
 	switch (hmac_algo) {
 	case SCTP_AUTH_HMAC_ID_SHA1:
 		SCTP_SHA1_INIT(&ctx->sha1);
 		break;
 	case SCTP_AUTH_HMAC_ID_SHA256:
 		SCTP_SHA256_INIT(&ctx->sha256);
 		break;
 	case SCTP_AUTH_HMAC_ID_RSVD:
 	default:
 		/* unknown HMAC algorithm: can't do anything */
 		return;
 	}			/* end switch */
 }
 
 static void
 sctp_hmac_update(uint16_t hmac_algo, sctp_hash_context_t *ctx,
     uint8_t *text, uint32_t textlen)
 {
 	switch (hmac_algo) {
 	case SCTP_AUTH_HMAC_ID_SHA1:
 		SCTP_SHA1_UPDATE(&ctx->sha1, text, textlen);
 		break;
 	case SCTP_AUTH_HMAC_ID_SHA256:
 		SCTP_SHA256_UPDATE(&ctx->sha256, text, textlen);
 		break;
 	case SCTP_AUTH_HMAC_ID_RSVD:
 	default:
 		/* unknown HMAC algorithm: can't do anything */
 		return;
 	}			/* end switch */
 }
 
 static void
 sctp_hmac_final(uint16_t hmac_algo, sctp_hash_context_t *ctx,
     uint8_t *digest)
 {
 	switch (hmac_algo) {
 	case SCTP_AUTH_HMAC_ID_SHA1:
 		SCTP_SHA1_FINAL(digest, &ctx->sha1);
 		break;
 	case SCTP_AUTH_HMAC_ID_SHA256:
 		SCTP_SHA256_FINAL(digest, &ctx->sha256);
 		break;
 	case SCTP_AUTH_HMAC_ID_RSVD:
 	default:
 		/* unknown HMAC algorithm: can't do anything */
 		return;
 	}			/* end switch */
 }
 
 /*-
  * Keyed-Hashing for Message Authentication: FIPS 198 (RFC 2104)
  *
  * Compute the HMAC digest using the desired hash key, text, and HMAC
  * algorithm.  Resulting digest is placed in 'digest' and digest length
  * is returned, if the HMAC was performed.
  *
  * WARNING: it is up to the caller to supply sufficient space to hold the
  * resultant digest.
  */
 uint32_t
 sctp_hmac(uint16_t hmac_algo, uint8_t *key, uint32_t keylen,
     uint8_t *text, uint32_t textlen, uint8_t *digest)
 {
 	uint32_t digestlen;
 	uint32_t blocklen;
 	sctp_hash_context_t ctx;
 	uint8_t ipad[128], opad[128];	/* keyed hash inner/outer pads */
 	uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
 	uint32_t i;
 
 	/* sanity check the material and length */
 	if ((key == NULL) || (keylen == 0) || (text == NULL) ||
 	    (textlen == 0) || (digest == NULL)) {
 		/* can't do HMAC with empty key or text or digest store */
 		return (0);
 	}
 	/* validate the hmac algo and get the digest length */
 	digestlen = sctp_get_hmac_digest_len(hmac_algo);
 	if (digestlen == 0)
 		return (0);
 
 	/* hash the key if it is longer than the hash block size */
 	blocklen = sctp_get_hmac_block_len(hmac_algo);
 	if (keylen > blocklen) {
 		sctp_hmac_init(hmac_algo, &ctx);
 		sctp_hmac_update(hmac_algo, &ctx, key, keylen);
 		sctp_hmac_final(hmac_algo, &ctx, temp);
 		/* set the hashed key as the key */
 		keylen = digestlen;
 		key = temp;
 	}
 	/* initialize the inner/outer pads with the key and "append" zeroes */
 	memset(ipad, 0, blocklen);
 	memset(opad, 0, blocklen);
 	memcpy(ipad, key, keylen);
 	memcpy(opad, key, keylen);
 
 	/* XOR the key with ipad and opad values */
 	for (i = 0; i < blocklen; i++) {
 		ipad[i] ^= 0x36;
 		opad[i] ^= 0x5c;
 	}
 
 	/* perform inner hash */
 	sctp_hmac_init(hmac_algo, &ctx);
 	sctp_hmac_update(hmac_algo, &ctx, ipad, blocklen);
 	sctp_hmac_update(hmac_algo, &ctx, text, textlen);
 	sctp_hmac_final(hmac_algo, &ctx, temp);
 
 	/* perform outer hash */
 	sctp_hmac_init(hmac_algo, &ctx);
 	sctp_hmac_update(hmac_algo, &ctx, opad, blocklen);
 	sctp_hmac_update(hmac_algo, &ctx, temp, digestlen);
 	sctp_hmac_final(hmac_algo, &ctx, digest);
 
 	return (digestlen);
 }
 
 /* mbuf version */
 uint32_t
 sctp_hmac_m(uint16_t hmac_algo, uint8_t *key, uint32_t keylen,
     struct mbuf *m, uint32_t m_offset, uint8_t *digest, uint32_t trailer)
 {
 	uint32_t digestlen;
 	uint32_t blocklen;
 	sctp_hash_context_t ctx;
 	uint8_t ipad[128], opad[128];	/* keyed hash inner/outer pads */
 	uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
 	uint32_t i;
 	struct mbuf *m_tmp;
 
 	/* sanity check the material and length */
 	if ((key == NULL) || (keylen == 0) || (m == NULL) || (digest == NULL)) {
 		/* can't do HMAC with empty key or text or digest store */
 		return (0);
 	}
 	/* validate the hmac algo and get the digest length */
 	digestlen = sctp_get_hmac_digest_len(hmac_algo);
 	if (digestlen == 0)
 		return (0);
 
 	/* hash the key if it is longer than the hash block size */
 	blocklen = sctp_get_hmac_block_len(hmac_algo);
 	if (keylen > blocklen) {
 		sctp_hmac_init(hmac_algo, &ctx);
 		sctp_hmac_update(hmac_algo, &ctx, key, keylen);
 		sctp_hmac_final(hmac_algo, &ctx, temp);
 		/* set the hashed key as the key */
 		keylen = digestlen;
 		key = temp;
 	}
 	/* initialize the inner/outer pads with the key and "append" zeroes */
 	memset(ipad, 0, blocklen);
 	memset(opad, 0, blocklen);
 	memcpy(ipad, key, keylen);
 	memcpy(opad, key, keylen);
 
 	/* XOR the key with ipad and opad values */
 	for (i = 0; i < blocklen; i++) {
 		ipad[i] ^= 0x36;
 		opad[i] ^= 0x5c;
 	}
 
 	/* perform inner hash */
 	sctp_hmac_init(hmac_algo, &ctx);
 	sctp_hmac_update(hmac_algo, &ctx, ipad, blocklen);
 	/* find the correct starting mbuf and offset (get start of text) */
 	m_tmp = m;
 	while ((m_tmp != NULL) && (m_offset >= (uint32_t)SCTP_BUF_LEN(m_tmp))) {
 		m_offset -= SCTP_BUF_LEN(m_tmp);
 		m_tmp = SCTP_BUF_NEXT(m_tmp);
 	}
 	/* now use the rest of the mbuf chain for the text */
 	while (m_tmp != NULL) {
 		if ((SCTP_BUF_NEXT(m_tmp) == NULL) && trailer) {
 			sctp_hmac_update(hmac_algo, &ctx, mtod(m_tmp, uint8_t *)+m_offset,
 			    SCTP_BUF_LEN(m_tmp) - (trailer + m_offset));
 		} else {
 			sctp_hmac_update(hmac_algo, &ctx, mtod(m_tmp, uint8_t *)+m_offset,
 			    SCTP_BUF_LEN(m_tmp) - m_offset);
 		}
 
 		/* clear the offset since it's only for the first mbuf */
 		m_offset = 0;
 		m_tmp = SCTP_BUF_NEXT(m_tmp);
 	}
 	sctp_hmac_final(hmac_algo, &ctx, temp);
 
 	/* perform outer hash */
 	sctp_hmac_init(hmac_algo, &ctx);
 	sctp_hmac_update(hmac_algo, &ctx, opad, blocklen);
 	sctp_hmac_update(hmac_algo, &ctx, temp, digestlen);
 	sctp_hmac_final(hmac_algo, &ctx, digest);
 
 	return (digestlen);
 }
 
 /*
  * computes the requested HMAC using a key struct (which may be modified if
  * the keylen exceeds the HMAC block len).
  */
 uint32_t
 sctp_compute_hmac(uint16_t hmac_algo, sctp_key_t *key, uint8_t *text,
     uint32_t textlen, uint8_t *digest)
 {
 	uint32_t digestlen;
 	uint32_t blocklen;
 	sctp_hash_context_t ctx;
 	uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
 
 	/* sanity check */
 	if ((key == NULL) || (text == NULL) || (textlen == 0) ||
 	    (digest == NULL)) {
 		/* can't do HMAC with empty key or text or digest store */
 		return (0);
 	}
 	/* validate the hmac algo and get the digest length */
 	digestlen = sctp_get_hmac_digest_len(hmac_algo);
 	if (digestlen == 0)
 		return (0);
 
 	/* hash the key if it is longer than the hash block size */
 	blocklen = sctp_get_hmac_block_len(hmac_algo);
 	if (key->keylen > blocklen) {
 		sctp_hmac_init(hmac_algo, &ctx);
 		sctp_hmac_update(hmac_algo, &ctx, key->key, key->keylen);
 		sctp_hmac_final(hmac_algo, &ctx, temp);
 		/* save the hashed key as the new key */
 		key->keylen = digestlen;
 		memcpy(key->key, temp, key->keylen);
 	}
 	return (sctp_hmac(hmac_algo, key->key, key->keylen, text, textlen,
 	    digest));
 }
 
 /* mbuf version */
 uint32_t
 sctp_compute_hmac_m(uint16_t hmac_algo, sctp_key_t *key, struct mbuf *m,
     uint32_t m_offset, uint8_t *digest)
 {
 	uint32_t digestlen;
 	uint32_t blocklen;
 	sctp_hash_context_t ctx;
 	uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
 
 	/* sanity check */
 	if ((key == NULL) || (m == NULL) || (digest == NULL)) {
 		/* can't do HMAC with empty key or text or digest store */
 		return (0);
 	}
 	/* validate the hmac algo and get the digest length */
 	digestlen = sctp_get_hmac_digest_len(hmac_algo);
 	if (digestlen == 0)
 		return (0);
 
 	/* hash the key if it is longer than the hash block size */
 	blocklen = sctp_get_hmac_block_len(hmac_algo);
 	if (key->keylen > blocklen) {
 		sctp_hmac_init(hmac_algo, &ctx);
 		sctp_hmac_update(hmac_algo, &ctx, key->key, key->keylen);
 		sctp_hmac_final(hmac_algo, &ctx, temp);
 		/* save the hashed key as the new key */
 		key->keylen = digestlen;
 		memcpy(key->key, temp, key->keylen);
 	}
 	return (sctp_hmac_m(hmac_algo, key->key, key->keylen, m, m_offset, digest, 0));
 }
 
 int
 sctp_auth_is_supported_hmac(sctp_hmaclist_t *list, uint16_t id)
 {
 	int i;
 
 	if ((list == NULL) || (id == SCTP_AUTH_HMAC_ID_RSVD))
 		return (0);
 
 	for (i = 0; i < list->num_algo; i++)
 		if (list->hmac[i] == id)
 			return (1);
 
 	/* not in the list */
 	return (0);
 }
 
 
 /*-
  * clear any cached key(s) if they match the given key id on an association.
  * the cached key(s) will be recomputed and re-cached at next use.
  * ASSUMES TCB_LOCK is already held
  */
 void
 sctp_clear_cachedkeys(struct sctp_tcb *stcb, uint16_t keyid)
 {
 	if (stcb == NULL)
 		return;
 
 	if (keyid == stcb->asoc.authinfo.assoc_keyid) {
 		sctp_free_key(stcb->asoc.authinfo.assoc_key);
 		stcb->asoc.authinfo.assoc_key = NULL;
 	}
 	if (keyid == stcb->asoc.authinfo.recv_keyid) {
 		sctp_free_key(stcb->asoc.authinfo.recv_key);
 		stcb->asoc.authinfo.recv_key = NULL;
 	}
 }
 
 /*-
  * clear any cached key(s) if they match the given key id for all assocs on
  * an endpoint.
  * ASSUMES INP_WLOCK is already held
  */
 void
 sctp_clear_cachedkeys_ep(struct sctp_inpcb *inp, uint16_t keyid)
 {
 	struct sctp_tcb *stcb;
 
 	if (inp == NULL)
 		return;
 
 	/* clear the cached keys on all assocs on this instance */
 	LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 		SCTP_TCB_LOCK(stcb);
 		sctp_clear_cachedkeys(stcb, keyid);
 		SCTP_TCB_UNLOCK(stcb);
 	}
 }
 
 /*-
  * delete a shared key from an association
  * ASSUMES TCB_LOCK is already held
  */
 int
 sctp_delete_sharedkey(struct sctp_tcb *stcb, uint16_t keyid)
 {
 	sctp_sharedkey_t *skey;
 
 	if (stcb == NULL)
 		return (-1);
 
 	/* is the keyid the assoc active sending key */
 	if (keyid == stcb->asoc.authinfo.active_keyid)
 		return (-1);
 
 	/* does the key exist? */
 	skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, keyid);
 	if (skey == NULL)
 		return (-1);
 
 	/* are there other refcount holders on the key? */
 	if (skey->refcount > 1)
 		return (-1);
 
 	/* remove it */
 	LIST_REMOVE(skey, next);
 	sctp_free_sharedkey(skey);	/* frees skey->key as well */
 
 	/* clear any cached keys */
 	sctp_clear_cachedkeys(stcb, keyid);
 	return (0);
 }
 
 /*-
  * deletes a shared key from the endpoint
  * ASSUMES INP_WLOCK is already held
  */
 int
 sctp_delete_sharedkey_ep(struct sctp_inpcb *inp, uint16_t keyid)
 {
 	sctp_sharedkey_t *skey;
 
 	if (inp == NULL)
 		return (-1);
 
 	/* is the keyid the active sending key on the endpoint */
 	if (keyid == inp->sctp_ep.default_keyid)
 		return (-1);
 
 	/* does the key exist? */
 	skey = sctp_find_sharedkey(&inp->sctp_ep.shared_keys, keyid);
 	if (skey == NULL)
 		return (-1);
 
 	/* endpoint keys are not refcounted */
 
 	/* remove it */
 	LIST_REMOVE(skey, next);
 	sctp_free_sharedkey(skey);	/* frees skey->key as well */
 
 	/* clear any cached keys */
 	sctp_clear_cachedkeys_ep(inp, keyid);
 	return (0);
 }
 
 /*-
  * set the active key on an association
  * ASSUMES TCB_LOCK is already held
  */
 int
 sctp_auth_setactivekey(struct sctp_tcb *stcb, uint16_t keyid)
 {
 	sctp_sharedkey_t *skey = NULL;
 
 	/* find the key on the assoc */
 	skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, keyid);
 	if (skey == NULL) {
 		/* that key doesn't exist */
 		return (-1);
 	}
 	if ((skey->deactivated) && (skey->refcount > 1)) {
 		/* can't reactivate a deactivated key with other refcounts */
 		return (-1);
 	}
 
 	/* set the (new) active key */
 	stcb->asoc.authinfo.active_keyid = keyid;
 	/* reset the deactivated flag */
 	skey->deactivated = 0;
 
 	return (0);
 }
 
 /*-
  * set the active key on an endpoint
  * ASSUMES INP_WLOCK is already held
  */
 int
 sctp_auth_setactivekey_ep(struct sctp_inpcb *inp, uint16_t keyid)
 {
 	sctp_sharedkey_t *skey;
 
 	/* find the key */
 	skey = sctp_find_sharedkey(&inp->sctp_ep.shared_keys, keyid);
 	if (skey == NULL) {
 		/* that key doesn't exist */
 		return (-1);
 	}
 	inp->sctp_ep.default_keyid = keyid;
 	return (0);
 }
 
 /*-
  * deactivates a shared key from the association
  * ASSUMES INP_WLOCK is already held
  */
 int
 sctp_deact_sharedkey(struct sctp_tcb *stcb, uint16_t keyid)
 {
 	sctp_sharedkey_t *skey;
 
 	if (stcb == NULL)
 		return (-1);
 
 	/* is the keyid the assoc active sending key */
 	if (keyid == stcb->asoc.authinfo.active_keyid)
 		return (-1);
 
 	/* does the key exist? */
 	skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, keyid);
 	if (skey == NULL)
 		return (-1);
 
 	/* are there other refcount holders on the key? */
 	if (skey->refcount == 1) {
 		/* no other users, send a notification for this key */
 		sctp_ulp_notify(SCTP_NOTIFY_AUTH_FREE_KEY, stcb, keyid, 0,
 		    SCTP_SO_LOCKED);
 	}
 
 	/* mark the key as deactivated */
 	skey->deactivated = 1;
 
 	return (0);
 }
 
 /*-
  * deactivates a shared key from the endpoint
  * ASSUMES INP_WLOCK is already held
  */
 int
 sctp_deact_sharedkey_ep(struct sctp_inpcb *inp, uint16_t keyid)
 {
 	sctp_sharedkey_t *skey;
 
 	if (inp == NULL)
 		return (-1);
 
 	/* is the keyid the active sending key on the endpoint */
 	if (keyid == inp->sctp_ep.default_keyid)
 		return (-1);
 
 	/* does the key exist? */
 	skey = sctp_find_sharedkey(&inp->sctp_ep.shared_keys, keyid);
 	if (skey == NULL)
 		return (-1);
 
 	/* endpoint keys are not refcounted */
 
 	/* remove it */
 	LIST_REMOVE(skey, next);
 	sctp_free_sharedkey(skey);	/* frees skey->key as well */
 
 	return (0);
 }
 
 /*
  * get local authentication parameters from cookie (from INIT-ACK)
  */
 void
 sctp_auth_get_cookie_params(struct sctp_tcb *stcb, struct mbuf *m,
     uint32_t offset, uint32_t length)
 {
 	struct sctp_paramhdr *phdr, tmp_param;
 	uint16_t plen, ptype;
 	uint8_t random_store[SCTP_PARAM_BUFFER_SIZE];
 	struct sctp_auth_random *p_random = NULL;
 	uint16_t random_len = 0;
 	uint8_t hmacs_store[SCTP_PARAM_BUFFER_SIZE];
 	struct sctp_auth_hmac_algo *hmacs = NULL;
 	uint16_t hmacs_len = 0;
 	uint8_t chunks_store[SCTP_PARAM_BUFFER_SIZE];
 	struct sctp_auth_chunk_list *chunks = NULL;
 	uint16_t num_chunks = 0;
 	sctp_key_t *new_key;
 	uint32_t keylen;
 
 	/* convert to upper bound */
 	length += offset;
 
 	phdr = (struct sctp_paramhdr *)sctp_m_getptr(m, offset,
 	    sizeof(struct sctp_paramhdr), (uint8_t *)&tmp_param);
 	while (phdr != NULL) {
 		ptype = ntohs(phdr->param_type);
 		plen = ntohs(phdr->param_length);
 
 		if ((plen == 0) || (offset + plen > length))
 			break;
 
 		if (ptype == SCTP_RANDOM) {
 			if (plen > sizeof(random_store))
 				break;
 			phdr = sctp_get_next_param(m, offset,
 			    (struct sctp_paramhdr *)random_store, plen);
 			if (phdr == NULL)
 				return;
 			/* save the random and length for the key */
 			p_random = (struct sctp_auth_random *)phdr;
 			random_len = plen - sizeof(*p_random);
 		} else if (ptype == SCTP_HMAC_LIST) {
 			uint16_t num_hmacs;
 			uint16_t i;
 
 			if (plen > sizeof(hmacs_store))
 				break;
 			phdr = sctp_get_next_param(m, offset,
 			    (struct sctp_paramhdr *)hmacs_store, plen);
 			if (phdr == NULL)
 				return;
 			/* save the hmacs list and num for the key */
 			hmacs = (struct sctp_auth_hmac_algo *)phdr;
 			hmacs_len = plen - sizeof(*hmacs);
 			num_hmacs = hmacs_len / sizeof(hmacs->hmac_ids[0]);
 			if (stcb->asoc.local_hmacs != NULL)
 				sctp_free_hmaclist(stcb->asoc.local_hmacs);
 			stcb->asoc.local_hmacs = sctp_alloc_hmaclist(num_hmacs);
 			if (stcb->asoc.local_hmacs != NULL) {
 				for (i = 0; i < num_hmacs; i++) {
 					(void)sctp_auth_add_hmacid(stcb->asoc.local_hmacs,
 					    ntohs(hmacs->hmac_ids[i]));
 				}
 			}
 		} else if (ptype == SCTP_CHUNK_LIST) {
 			int i;
 
 			if (plen > sizeof(chunks_store))
 				break;
 			phdr = sctp_get_next_param(m, offset,
 			    (struct sctp_paramhdr *)chunks_store, plen);
 			if (phdr == NULL)
 				return;
 			chunks = (struct sctp_auth_chunk_list *)phdr;
 			num_chunks = plen - sizeof(*chunks);
 			/* save chunks list and num for the key */
 			if (stcb->asoc.local_auth_chunks != NULL)
 				sctp_clear_chunklist(stcb->asoc.local_auth_chunks);
 			else
 				stcb->asoc.local_auth_chunks = sctp_alloc_chunklist();
 			for (i = 0; i < num_chunks; i++) {
 				(void)sctp_auth_add_chunk(chunks->chunk_types[i],
 				    stcb->asoc.local_auth_chunks);
 			}
 		}
 		/* get next parameter */
 		offset += SCTP_SIZE32(plen);
 		if (offset + sizeof(struct sctp_paramhdr) > length)
 			break;
 		phdr = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, sizeof(struct sctp_paramhdr),
 		    (uint8_t *)&tmp_param);
 	}
 	/* concatenate the full random key */
 	keylen = sizeof(*p_random) + random_len + sizeof(*hmacs) + hmacs_len;
 	if (chunks != NULL) {
 		keylen += sizeof(*chunks) + num_chunks;
 	}
 	new_key = sctp_alloc_key(keylen);
 	if (new_key != NULL) {
 		/* copy in the RANDOM */
 		if (p_random != NULL) {
 			keylen = sizeof(*p_random) + random_len;
 			memcpy(new_key->key, p_random, keylen);
 		} else {
 			keylen = 0;
 		}
 		/* append in the AUTH chunks */
 		if (chunks != NULL) {
 			memcpy(new_key->key + keylen, chunks,
 			    sizeof(*chunks) + num_chunks);
 			keylen += sizeof(*chunks) + num_chunks;
 		}
 		/* append in the HMACs */
 		if (hmacs != NULL) {
 			memcpy(new_key->key + keylen, hmacs,
 			    sizeof(*hmacs) + hmacs_len);
 		}
 	}
 	if (stcb->asoc.authinfo.random != NULL)
 		sctp_free_key(stcb->asoc.authinfo.random);
 	stcb->asoc.authinfo.random = new_key;
 	stcb->asoc.authinfo.random_len = random_len;
 	sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.assoc_keyid);
 	sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.recv_keyid);
 
 	/* negotiate what HMAC to use for the peer */
 	stcb->asoc.peer_hmac_id = sctp_negotiate_hmacid(stcb->asoc.peer_hmacs,
 	    stcb->asoc.local_hmacs);
 
 	/* copy defaults from the endpoint */
 	/* FIX ME: put in cookie? */
 	stcb->asoc.authinfo.active_keyid = stcb->sctp_ep->sctp_ep.default_keyid;
 	/* copy out the shared key list (by reference) from the endpoint */
 	(void)sctp_copy_skeylist(&stcb->sctp_ep->sctp_ep.shared_keys,
 	    &stcb->asoc.shared_keys);
 }
 
 /*
  * compute and fill in the HMAC digest for a packet
  */
 void
 sctp_fill_hmac_digest_m(struct mbuf *m, uint32_t auth_offset,
     struct sctp_auth_chunk *auth, struct sctp_tcb *stcb, uint16_t keyid)
 {
 	uint32_t digestlen;
 	sctp_sharedkey_t *skey;
 	sctp_key_t *key;
 
 	if ((stcb == NULL) || (auth == NULL))
 		return;
 
 	/* zero the digest + chunk padding */
 	digestlen = sctp_get_hmac_digest_len(stcb->asoc.peer_hmac_id);
 	memset(auth->hmac, 0, SCTP_SIZE32(digestlen));
 
 	/* is the desired key cached? */
 	if ((keyid != stcb->asoc.authinfo.assoc_keyid) ||
 	    (stcb->asoc.authinfo.assoc_key == NULL)) {
 		if (stcb->asoc.authinfo.assoc_key != NULL) {
 			/* free the old cached key */
 			sctp_free_key(stcb->asoc.authinfo.assoc_key);
 		}
 		skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, keyid);
 		/* the only way skey is NULL is if null key id 0 is used */
 		if (skey != NULL)
 			key = skey->key;
 		else
 			key = NULL;
 		/* compute a new assoc key and cache it */
 		stcb->asoc.authinfo.assoc_key =
 		    sctp_compute_hashkey(stcb->asoc.authinfo.random,
 		    stcb->asoc.authinfo.peer_random, key);
 		stcb->asoc.authinfo.assoc_keyid = keyid;
 		SCTPDBG(SCTP_DEBUG_AUTH1, "caching key id %u\n",
 		    stcb->asoc.authinfo.assoc_keyid);
 #ifdef SCTP_DEBUG
 		if (SCTP_AUTH_DEBUG)
 			sctp_print_key(stcb->asoc.authinfo.assoc_key,
 			    "Assoc Key");
 #endif
 	}
 
 	/* set in the active key id */
 	auth->shared_key_id = htons(keyid);
 
 	/* compute and fill in the digest */
 	(void)sctp_compute_hmac_m(stcb->asoc.peer_hmac_id, stcb->asoc.authinfo.assoc_key,
 	    m, auth_offset, auth->hmac);
 }
 
 
 static void
 sctp_zero_m(struct mbuf *m, uint32_t m_offset, uint32_t size)
 {
 	struct mbuf *m_tmp;
 	uint8_t *data;
 
 	/* sanity check */
 	if (m == NULL)
 		return;
 
 	/* find the correct starting mbuf and offset (get start position) */
 	m_tmp = m;
 	while ((m_tmp != NULL) && (m_offset >= (uint32_t)SCTP_BUF_LEN(m_tmp))) {
 		m_offset -= SCTP_BUF_LEN(m_tmp);
 		m_tmp = SCTP_BUF_NEXT(m_tmp);
 	}
 	/* now use the rest of the mbuf chain */
 	while ((m_tmp != NULL) && (size > 0)) {
 		data = mtod(m_tmp, uint8_t *)+m_offset;
 		if (size > (uint32_t)(SCTP_BUF_LEN(m_tmp) - m_offset)) {
 			memset(data, 0, SCTP_BUF_LEN(m_tmp) - m_offset);
 			size -= SCTP_BUF_LEN(m_tmp) - m_offset;
 		} else {
 			memset(data, 0, size);
 			size = 0;
 		}
 		/* clear the offset since it's only for the first mbuf */
 		m_offset = 0;
 		m_tmp = SCTP_BUF_NEXT(m_tmp);
 	}
 }
 
 /*-
  * process the incoming Authentication chunk
  * return codes:
  *   -1 on any authentication error
  *    0 on authentication verification
  */
 int
 sctp_handle_auth(struct sctp_tcb *stcb, struct sctp_auth_chunk *auth,
     struct mbuf *m, uint32_t offset)
 {
 	uint16_t chunklen;
 	uint16_t shared_key_id;
 	uint16_t hmac_id;
 	sctp_sharedkey_t *skey;
 	uint32_t digestlen;
 	uint8_t digest[SCTP_AUTH_DIGEST_LEN_MAX];
 	uint8_t computed_digest[SCTP_AUTH_DIGEST_LEN_MAX];
 
 	/* auth is checked for NULL by caller */
 	chunklen = ntohs(auth->ch.chunk_length);
 	if (chunklen < sizeof(*auth)) {
 		SCTP_STAT_INCR(sctps_recvauthfailed);
 		return (-1);
 	}
 	SCTP_STAT_INCR(sctps_recvauth);
 
 	/* get the auth params */
 	shared_key_id = ntohs(auth->shared_key_id);
 	hmac_id = ntohs(auth->hmac_id);
 	SCTPDBG(SCTP_DEBUG_AUTH1,
 	    "SCTP AUTH Chunk: shared key %u, HMAC id %u\n",
 	    shared_key_id, hmac_id);
 
 	/* is the indicated HMAC supported? */
 	if (!sctp_auth_is_supported_hmac(stcb->asoc.local_hmacs, hmac_id)) {
 		struct mbuf *op_err;
 		struct sctp_error_auth_invalid_hmac *cause;
 
 		SCTP_STAT_INCR(sctps_recvivalhmacid);
 		SCTPDBG(SCTP_DEBUG_AUTH1,
 		    "SCTP Auth: unsupported HMAC id %u\n",
 		    hmac_id);
 		/*
 		 * report this in an Error Chunk: Unsupported HMAC
 		 * Identifier
 		 */
 		op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_error_auth_invalid_hmac),
 		    0, M_NOWAIT, 1, MT_HEADER);
 		if (op_err != NULL) {
 			/* pre-reserve some space */
 			SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
 			/* fill in the error */
 			cause = mtod(op_err, struct sctp_error_auth_invalid_hmac *);
 			cause->cause.code = htons(SCTP_CAUSE_UNSUPPORTED_HMACID);
 			cause->cause.length = htons(sizeof(struct sctp_error_auth_invalid_hmac));
 			cause->hmac_id = ntohs(hmac_id);
 			SCTP_BUF_LEN(op_err) = sizeof(struct sctp_error_auth_invalid_hmac);
 			/* queue it */
 			sctp_queue_op_err(stcb, op_err);
 		}
 		return (-1);
 	}
 	/* get the indicated shared key, if available */
 	if ((stcb->asoc.authinfo.recv_key == NULL) ||
 	    (stcb->asoc.authinfo.recv_keyid != shared_key_id)) {
 		/* find the shared key on the assoc first */
 		skey = sctp_find_sharedkey(&stcb->asoc.shared_keys,
 		    shared_key_id);
 		/* if the shared key isn't found, discard the chunk */
 		if (skey == NULL) {
 			SCTP_STAT_INCR(sctps_recvivalkeyid);
 			SCTPDBG(SCTP_DEBUG_AUTH1,
 			    "SCTP Auth: unknown key id %u\n",
 			    shared_key_id);
 			return (-1);
 		}
 		/* generate a notification if this is a new key id */
 		if (stcb->asoc.authinfo.recv_keyid != shared_key_id)
 			/*
 			 * sctp_ulp_notify(SCTP_NOTIFY_AUTH_NEW_KEY, stcb,
 			 * shared_key_id, (void
 			 * *)stcb->asoc.authinfo.recv_keyid);
 			 */
 			sctp_notify_authentication(stcb, SCTP_AUTH_NEW_KEY,
 			    shared_key_id, stcb->asoc.authinfo.recv_keyid,
 			    SCTP_SO_NOT_LOCKED);
 		/* compute a new recv assoc key and cache it */
 		if (stcb->asoc.authinfo.recv_key != NULL)
 			sctp_free_key(stcb->asoc.authinfo.recv_key);
 		stcb->asoc.authinfo.recv_key =
 		    sctp_compute_hashkey(stcb->asoc.authinfo.random,
 		    stcb->asoc.authinfo.peer_random, skey->key);
 		stcb->asoc.authinfo.recv_keyid = shared_key_id;
 #ifdef SCTP_DEBUG
 		if (SCTP_AUTH_DEBUG)
 			sctp_print_key(stcb->asoc.authinfo.recv_key, "Recv Key");
 #endif
 	}
 	/* validate the digest length */
 	digestlen = sctp_get_hmac_digest_len(hmac_id);
 	if (chunklen < (sizeof(*auth) + digestlen)) {
 		/* invalid digest length */
 		SCTP_STAT_INCR(sctps_recvauthfailed);
 		SCTPDBG(SCTP_DEBUG_AUTH1,
 		    "SCTP Auth: chunk too short for HMAC\n");
 		return (-1);
 	}
 	/* save a copy of the digest, zero the pseudo header, and validate */
 	memcpy(digest, auth->hmac, digestlen);
 	sctp_zero_m(m, offset + sizeof(*auth), SCTP_SIZE32(digestlen));
 	(void)sctp_compute_hmac_m(hmac_id, stcb->asoc.authinfo.recv_key,
 	    m, offset, computed_digest);
 
 	/* compare the computed digest with the one in the AUTH chunk */
 	if (timingsafe_bcmp(digest, computed_digest, digestlen) != 0) {
 		SCTP_STAT_INCR(sctps_recvauthfailed);
 		SCTPDBG(SCTP_DEBUG_AUTH1,
 		    "SCTP Auth: HMAC digest check failed\n");
 		return (-1);
 	}
 	return (0);
 }
 
 /*
  * Generate NOTIFICATION
  */
 void
 sctp_notify_authentication(struct sctp_tcb *stcb, uint32_t indication,
     uint16_t keyid, uint16_t alt_keyid, int so_locked
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     SCTP_UNUSED
 #endif
 )
 {
 	struct mbuf *m_notify;
 	struct sctp_authkey_event *auth;
 	struct sctp_queued_to_read *control;
 
 	if ((stcb == NULL) ||
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
 	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)
 	    ) {
 		/* If the socket is gone we are out of here */
 		return;
 	}
 
 	if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_AUTHEVNT))
 		/* event not enabled */
 		return;
 
 	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_authkey_event),
 	    0, M_NOWAIT, 1, MT_HEADER);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
 
 	SCTP_BUF_LEN(m_notify) = 0;
 	auth = mtod(m_notify, struct sctp_authkey_event *);
 	memset(auth, 0, sizeof(struct sctp_authkey_event));
 	auth->auth_type = SCTP_AUTHENTICATION_EVENT;
 	auth->auth_flags = 0;
 	auth->auth_length = sizeof(*auth);
 	auth->auth_keynumber = keyid;
 	auth->auth_altkeynumber = alt_keyid;
 	auth->auth_indication = indication;
 	auth->auth_assoc_id = sctp_get_associd(stcb);
 
 	SCTP_BUF_LEN(m_notify) = sizeof(*auth);
 	SCTP_BUF_NEXT(m_notify) = NULL;
 
 	/* append to socket */
 	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
 	    0, 0, stcb->asoc.context, 0, 0, 0, m_notify);
 	if (control == NULL) {
 		/* no memory */
 		sctp_m_freem(m_notify);
 		return;
 	}
 	control->length = SCTP_BUF_LEN(m_notify);
 	control->spec_flags = M_NOTIFICATION;
 	/* not that we need this */
 	control->tail_mbuf = m_notify;
 	sctp_add_to_readq(stcb->sctp_ep, stcb, control,
 	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, so_locked);
 }
 
 
 /*-
  * validates the AUTHentication related parameters in an INIT/INIT-ACK
  * Note: currently only used for INIT as INIT-ACK is handled inline
  * with sctp_load_addresses_from_init()
  */
 int
 sctp_validate_init_auth_params(struct mbuf *m, int offset, int limit)
 {
 	struct sctp_paramhdr *phdr, param_buf;
 	uint16_t ptype, plen;
 	int peer_supports_asconf = 0;
 	int peer_supports_auth = 0;
 	int got_random = 0, got_hmacs = 0, got_chklist = 0;
 	uint8_t saw_asconf = 0;
 	uint8_t saw_asconf_ack = 0;
 
 	/* go through each of the params. */
 	phdr = sctp_get_next_param(m, offset, &param_buf, sizeof(param_buf));
 	while (phdr) {
 		ptype = ntohs(phdr->param_type);
 		plen = ntohs(phdr->param_length);
 
 		if (offset + plen > limit) {
 			break;
 		}
 		if (plen < sizeof(struct sctp_paramhdr)) {
 			break;
 		}
 		if (ptype == SCTP_SUPPORTED_CHUNK_EXT) {
 			/* A supported extension chunk */
 			struct sctp_supported_chunk_types_param *pr_supported;
 			uint8_t local_store[SCTP_SMALL_CHUNK_STORE];
 			int num_ent, i;
 
 			if (plen > sizeof(local_store)) {
 				break;
 			}
 			phdr = sctp_get_next_param(m, offset,
 			    (struct sctp_paramhdr *)&local_store,
 			    plen);
 			if (phdr == NULL) {
 				return (-1);
 			}
 			pr_supported = (struct sctp_supported_chunk_types_param *)phdr;
 			num_ent = plen - sizeof(struct sctp_paramhdr);
 			for (i = 0; i < num_ent; i++) {
 				switch (pr_supported->chunk_types[i]) {
 				case SCTP_ASCONF:
 				case SCTP_ASCONF_ACK:
 					peer_supports_asconf = 1;
 					break;
 				default:
 					/* one we don't care about */
 					break;
 				}
 			}
 		} else if (ptype == SCTP_RANDOM) {
 			/* enforce the random length */
 			if (plen != (sizeof(struct sctp_auth_random) +
 			    SCTP_AUTH_RANDOM_SIZE_REQUIRED)) {
 				SCTPDBG(SCTP_DEBUG_AUTH1,
 				    "SCTP: invalid RANDOM len\n");
 				return (-1);
 			}
 			got_random = 1;
 		} else if (ptype == SCTP_HMAC_LIST) {
 			struct sctp_auth_hmac_algo *hmacs;
 			uint8_t store[SCTP_PARAM_BUFFER_SIZE];
 			int num_hmacs;
 
 			if (plen > sizeof(store)) {
 				break;
 			}
 			phdr = sctp_get_next_param(m, offset,
 			    (struct sctp_paramhdr *)store,
 			    plen);
 			if (phdr == NULL) {
 				return (-1);
 			}
 			hmacs = (struct sctp_auth_hmac_algo *)phdr;
 			num_hmacs = (plen - sizeof(*hmacs)) / sizeof(hmacs->hmac_ids[0]);
 			/* validate the hmac list */
 			if (sctp_verify_hmac_param(hmacs, num_hmacs)) {
 				SCTPDBG(SCTP_DEBUG_AUTH1,
 				    "SCTP: invalid HMAC param\n");
 				return (-1);
 			}
 			got_hmacs = 1;
 		} else if (ptype == SCTP_CHUNK_LIST) {
 			struct sctp_auth_chunk_list *chunks;
 			uint8_t chunks_store[SCTP_SMALL_CHUNK_STORE];
 			int i, num_chunks;
 
 			if (plen > sizeof(chunks_store)) {
 				break;
 			}
 			phdr = sctp_get_next_param(m, offset,
 			    (struct sctp_paramhdr *)chunks_store,
 			    plen);
 			if (phdr == NULL) {
 				return (-1);
 			}
 			/*-
 			 * Flip through the list and mark that the
 			 * peer supports asconf/asconf_ack.
 			 */
 			chunks = (struct sctp_auth_chunk_list *)phdr;
 			num_chunks = plen - sizeof(*chunks);
 			for (i = 0; i < num_chunks; i++) {
 				/* record asconf/asconf-ack if listed */
 				if (chunks->chunk_types[i] == SCTP_ASCONF)
 					saw_asconf = 1;
 				if (chunks->chunk_types[i] == SCTP_ASCONF_ACK)
 					saw_asconf_ack = 1;
 
 			}
 			if (num_chunks)
 				got_chklist = 1;
 		}
 
 		offset += SCTP_SIZE32(plen);
 		if (offset >= limit) {
 			break;
 		}
 		phdr = sctp_get_next_param(m, offset, &param_buf,
 		    sizeof(param_buf));
 	}
 	/* validate authentication required parameters */
 	if (got_random && got_hmacs) {
 		peer_supports_auth = 1;
 	} else {
 		peer_supports_auth = 0;
 	}
 	if (!peer_supports_auth && got_chklist) {
 		SCTPDBG(SCTP_DEBUG_AUTH1,
 		    "SCTP: peer sent chunk list w/o AUTH\n");
 		return (-1);
 	}
 	if (peer_supports_asconf && !peer_supports_auth) {
 		SCTPDBG(SCTP_DEBUG_AUTH1,
 		    "SCTP: peer supports ASCONF but not AUTH\n");
 		return (-1);
 	} else if ((peer_supports_asconf) && (peer_supports_auth) &&
 	    ((saw_asconf == 0) || (saw_asconf_ack == 0))) {
 		return (-2);
 	}
 	return (0);
 }
 
 void
 sctp_initialize_auth_params(struct sctp_inpcb *inp, struct sctp_tcb *stcb)
 {
 	uint16_t chunks_len = 0;
 	uint16_t hmacs_len = 0;
 	uint16_t random_len = SCTP_AUTH_RANDOM_SIZE_DEFAULT;
 	sctp_key_t *new_key;
 	uint16_t keylen;
 
 	/* initialize hmac list from endpoint */
 	stcb->asoc.local_hmacs = sctp_copy_hmaclist(inp->sctp_ep.local_hmacs);
 	if (stcb->asoc.local_hmacs != NULL) {
 		hmacs_len = stcb->asoc.local_hmacs->num_algo *
 		    sizeof(stcb->asoc.local_hmacs->hmac[0]);
 	}
 	/* initialize auth chunks list from endpoint */
 	stcb->asoc.local_auth_chunks =
 	    sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks);
 	if (stcb->asoc.local_auth_chunks != NULL) {
 		int i;
 
 		for (i = 0; i < 256; i++) {
 			if (stcb->asoc.local_auth_chunks->chunks[i])
 				chunks_len++;
 		}
 	}
 	/* copy defaults from the endpoint */
 	stcb->asoc.authinfo.active_keyid = inp->sctp_ep.default_keyid;
 
 	/* copy out the shared key list (by reference) from the endpoint */
 	(void)sctp_copy_skeylist(&inp->sctp_ep.shared_keys,
 	    &stcb->asoc.shared_keys);
 
 	/* now set the concatenated key (random + chunks + hmacs) */
 	/* key includes parameter headers */
 	keylen = (3 * sizeof(struct sctp_paramhdr)) + random_len + chunks_len +
 	    hmacs_len;
 	new_key = sctp_alloc_key(keylen);
 	if (new_key != NULL) {
 		struct sctp_paramhdr *ph;
 		int plen;
 
 		/* generate and copy in the RANDOM */
 		ph = (struct sctp_paramhdr *)new_key->key;
 		ph->param_type = htons(SCTP_RANDOM);
 		plen = sizeof(*ph) + random_len;
 		ph->param_length = htons(plen);
 		SCTP_READ_RANDOM(new_key->key + sizeof(*ph), random_len);
 		keylen = plen;
 
 		/* append in the AUTH chunks */
 		/* NOTE: currently we always have chunks to list */
 		ph = (struct sctp_paramhdr *)(new_key->key + keylen);
 		ph->param_type = htons(SCTP_CHUNK_LIST);
 		plen = sizeof(*ph) + chunks_len;
 		ph->param_length = htons(plen);
 		keylen += sizeof(*ph);
 		if (stcb->asoc.local_auth_chunks) {
 			int i;
 
 			for (i = 0; i < 256; i++) {
 				if (stcb->asoc.local_auth_chunks->chunks[i])
 					new_key->key[keylen++] = i;
 			}
 		}
 
 		/* append in the HMACs */
 		ph = (struct sctp_paramhdr *)(new_key->key + keylen);
 		ph->param_type = htons(SCTP_HMAC_LIST);
 		plen = sizeof(*ph) + hmacs_len;
 		ph->param_length = htons(plen);
 		keylen += sizeof(*ph);
 		(void)sctp_serialize_hmaclist(stcb->asoc.local_hmacs,
 		    new_key->key + keylen);
 	}
 	if (stcb->asoc.authinfo.random != NULL)
 		sctp_free_key(stcb->asoc.authinfo.random);
 	stcb->asoc.authinfo.random = new_key;
 	stcb->asoc.authinfo.random_len = random_len;
 }
Index: projects/clang900-import/sys/netinet/tcp_stacks/rack.c
===================================================================
--- projects/clang900-import/sys/netinet/tcp_stacks/rack.c	(revision 352536)
+++ projects/clang900-import/sys/netinet/tcp_stacks/rack.c	(revision 352537)
@@ -1,9246 +1,9261 @@
 /*-
  * Copyright (c) 2016-2019
  *	Netflix Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #ifdef TCP_HHOOK
 #include <sys/hhook.h>
 #endif
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>		/* for proc0 declaration */
 #ifdef NETFLIX_STATS
 #include <sys/qmath.h>
 #endif
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/tree.h>
 #ifdef NETFLIX_STATS
 #include <sys/stats.h> /* Must come after qmath.h and tree.h */
 #endif
 #include <sys/refcount.h>
 #include <sys/queue.h>
 #include <sys/smp.h>
 #include <sys/kthread.h>
 #include <sys/kern_prefetch.h>
 
 #include <vm/uma.h>
 
 #include <net/route.h>
 #include <net/vnet.h>
 
 #define TCPSTATES		/* for logging */
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>	/* required for icmp_var.h */
 #include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
 #include <netinet/ip_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #define	TCPOUTFLAGS
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_hpts.h>
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
 #include <netinet/tcp_fastopen.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif				/* TCPDEBUG */
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
 
 #include <netipsec/ipsec_support.h>
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #endif				/* IPSEC */
 
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <machine/in_cksum.h>
 
 #ifdef MAC
 #include <security/mac/mac_framework.h>
 #endif
 #include "sack_filter.h"
 #include "tcp_rack.h"
 #include "rack_bbr_common.h"
 
 uma_zone_t rack_zone;
 uma_zone_t rack_pcb_zone;
 
 #ifndef TICKS2SBT
 #define	TICKS2SBT(__t)	(tick_sbt * ((sbintime_t)(__t)))
 #endif
 
 struct sysctl_ctx_list rack_sysctl_ctx;
 struct sysctl_oid *rack_sysctl_root;
 
 #define CUM_ACKED 1
 #define SACKED 2
 
 /*
  * The RACK module incorporates a number of
  * TCP ideas that have been put out into the IETF
  * over the last few years:
  * - Matt Mathis's Rate Halving which slowly drops
  *    the congestion window so that the ack clock can
  *    be maintained during a recovery.
  * - Yuchung Cheng's RACK TCP (for which its named) that
  *    will stop us using the number of dup acks and instead
  *    use time as the gage of when we retransmit.
  * - Reorder Detection of RFC4737 and the Tail-Loss probe draft
  *    of Dukkipati et.al.
  * RACK depends on SACK, so if an endpoint arrives that
  * cannot do SACK the state machine below will shuttle the
  * connection back to using the "default" TCP stack that is
  * in FreeBSD.
  *
  * To implement RACK the original TCP stack was first decomposed
  * into a functional state machine with individual states
  * for each of the possible TCP connection states. The do_segement
  * functions role in life is to mandate the connection supports SACK
  * initially and then assure that the RACK state matches the conenction
  * state before calling the states do_segment function. Each
  * state is simplified due to the fact that the original do_segment
  * has been decomposed and we *know* what state we are in (no
  * switches on the state) and all tests for SACK are gone. This
  * greatly simplifies what each state does.
  *
  * TCP output is also over-written with a new version since it
  * must maintain the new rack scoreboard.
  *
  */
 static int32_t rack_precache = 1;
 static int32_t rack_tlp_thresh = 1;
 static int32_t rack_reorder_thresh = 2;
 static int32_t rack_reorder_fade = 60000;	/* 0 - never fade, def 60,000
 						 * - 60 seconds */
 static int32_t rack_pkt_delay = 1;
 static int32_t rack_inc_var = 0;/* For TLP */
 static int32_t rack_reduce_largest_on_idle = 0;
 static int32_t rack_min_pace_time = 0;
 static int32_t rack_min_pace_time_seg_req=6;
 static int32_t rack_early_recovery = 1;
 static int32_t rack_early_recovery_max_seg = 6;
 static int32_t rack_send_a_lot_in_prr = 1;
 static int32_t rack_min_to = 1;	/* Number of ms minimum timeout */
 static int32_t rack_tlp_in_recovery = 1;	/* Can we do TLP in recovery? */
 static int32_t rack_verbose_logging = 0;
 static int32_t rack_ignore_data_after_close = 1;
 static int32_t rack_map_entries_limit = 1024;
 static int32_t rack_map_split_limit = 256;
 
 /*
  * Currently regular tcp has a rto_min of 30ms
  * the backoff goes 12 times so that ends up
  * being a total of 122.850 seconds before a
  * connection is killed.
  */
 static int32_t rack_tlp_min = 10;
 static int32_t rack_rto_min = 30;	/* 30ms same as main freebsd */
 static int32_t rack_rto_max = 30000;	/* 30 seconds */
 static const int32_t rack_free_cache = 2;
 static int32_t rack_hptsi_segments = 40;
 static int32_t rack_rate_sample_method = USE_RTT_LOW;
 static int32_t rack_pace_every_seg = 1;
 static int32_t rack_delayed_ack_time = 200;	/* 200ms */
 static int32_t rack_slot_reduction = 4;
 static int32_t rack_lower_cwnd_at_tlp = 0;
 static int32_t rack_use_proportional_reduce = 0;
 static int32_t rack_proportional_rate = 10;
 static int32_t rack_tlp_max_resend = 2;
 static int32_t rack_limited_retran = 0;
 static int32_t rack_always_send_oldest = 0;
 static int32_t rack_sack_block_limit = 128;
 static int32_t rack_use_sack_filter = 1;
 static int32_t rack_tlp_threshold_use = TLP_USE_TWO_ONE;
 
 /* Rack specific counters */
 counter_u64_t rack_badfr;
 counter_u64_t rack_badfr_bytes;
 counter_u64_t rack_rtm_prr_retran;
 counter_u64_t rack_rtm_prr_newdata;
 counter_u64_t rack_timestamp_mismatch;
 counter_u64_t rack_reorder_seen;
 counter_u64_t rack_paced_segments;
 counter_u64_t rack_unpaced_segments;
 counter_u64_t rack_saw_enobuf;
 counter_u64_t rack_saw_enetunreach;
 
 /* Tail loss probe counters */
 counter_u64_t rack_tlp_tot;
 counter_u64_t rack_tlp_newdata;
 counter_u64_t rack_tlp_retran;
 counter_u64_t rack_tlp_retran_bytes;
 counter_u64_t rack_tlp_retran_fail;
 counter_u64_t rack_to_tot;
 counter_u64_t rack_to_arm_rack;
 counter_u64_t rack_to_arm_tlp;
 counter_u64_t rack_to_alloc;
 counter_u64_t rack_to_alloc_hard;
 counter_u64_t rack_to_alloc_emerg;
 counter_u64_t rack_to_alloc_limited;
 counter_u64_t rack_alloc_limited_conns;
 counter_u64_t rack_split_limited;
 
 counter_u64_t rack_sack_proc_all;
 counter_u64_t rack_sack_proc_short;
 counter_u64_t rack_sack_proc_restart;
 counter_u64_t rack_runt_sacks;
 counter_u64_t rack_used_tlpmethod;
 counter_u64_t rack_used_tlpmethod2;
 counter_u64_t rack_enter_tlp_calc;
 counter_u64_t rack_input_idle_reduces;
 counter_u64_t rack_tlp_does_nada;
 
 /* Temp CPU counters */
 counter_u64_t rack_find_high;
 
 counter_u64_t rack_progress_drops;
 counter_u64_t rack_out_size[TCP_MSS_ACCT_SIZE];
 counter_u64_t rack_opts_arry[RACK_OPTS_SIZE];
 
 /*
  * This was originally defined in tcp_timer.c, but is now reproduced here given
  * the unification of the SYN and non-SYN retransmit timer exponents combined
  * with wanting to retain previous behaviour for previously deployed stack
  * versions.
  */
 int	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
 
 static void
 rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick,  int event, int line);
 
 static int
 rack_process_ack(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, 
     uint32_t tiwin, int32_t tlen, int32_t * ofia, int32_t thflags, int32_t * ret_val);
 static int
 rack_process_data(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
 static void
 rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack,
     struct tcphdr *th, uint16_t nsegs, uint16_t type, int32_t recovery);
 static struct rack_sendmap *rack_alloc(struct tcp_rack *rack);
 static struct rack_sendmap *rack_alloc_limit(struct tcp_rack *rack,
     uint8_t limit_type);
 static struct rack_sendmap *
 rack_check_recovery_mode(struct tcpcb *tp,
     uint32_t tsused);
 static void
 rack_cong_signal(struct tcpcb *tp, struct tcphdr *th,
     uint32_t type);
 static void rack_counter_destroy(void);
 static int
 rack_ctloutput(struct socket *so, struct sockopt *sopt,
     struct inpcb *inp, struct tcpcb *tp);
 static int32_t rack_ctor(void *mem, int32_t size, void *arg, int32_t how);
 static void
 rack_do_segment(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
     uint8_t iptos);
 static void rack_dtor(void *mem, int32_t size, void *arg);
 static void
 rack_earlier_retran(struct tcpcb *tp, struct rack_sendmap *rsm,
     uint32_t t, uint32_t cts);
 static struct rack_sendmap *
 rack_find_high_nonack(struct tcp_rack *rack,
     struct rack_sendmap *rsm);
 static struct rack_sendmap *rack_find_lowest_rsm(struct tcp_rack *rack);
 static void rack_free(struct tcp_rack *rack, struct rack_sendmap *rsm);
 static void rack_fini(struct tcpcb *tp, int32_t tcb_is_purged);
 static int
 rack_get_sockopt(struct socket *so, struct sockopt *sopt,
     struct inpcb *inp, struct tcpcb *tp, struct tcp_rack *rack);
 static int32_t rack_handoff_ok(struct tcpcb *tp);
 static int32_t rack_init(struct tcpcb *tp);
 static void rack_init_sysctls(void);
 static void
 rack_log_ack(struct tcpcb *tp, struct tcpopt *to,
     struct tcphdr *th);
 static void
 rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len,
     uint32_t seq_out, uint8_t th_flags, int32_t err, uint32_t ts,
     uint8_t pass, struct rack_sendmap *hintrsm);
 static void
 rack_log_sack_passed(struct tcpcb *tp, struct tcp_rack *rack,
     struct rack_sendmap *rsm);
 static void rack_log_to_event(struct tcp_rack *rack, int32_t to_num);
 static int32_t rack_output(struct tcpcb *tp);
 static void
 rack_hpts_do_segment(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
     uint8_t iptos, int32_t nxt_pkt, struct timeval *tv);
 
 static uint32_t
 rack_proc_sack_blk(struct tcpcb *tp, struct tcp_rack *rack,
     struct sackblk *sack, struct tcpopt *to, struct rack_sendmap **prsm,
     uint32_t cts);
 static void rack_post_recovery(struct tcpcb *tp, struct tcphdr *th);
 static void rack_remxt_tmr(struct tcpcb *tp);
 static int
 rack_set_sockopt(struct socket *so, struct sockopt *sopt,
     struct inpcb *inp, struct tcpcb *tp, struct tcp_rack *rack);
 static void rack_set_state(struct tcpcb *tp, struct tcp_rack *rack);
 static int32_t rack_stopall(struct tcpcb *tp);
 static void
 rack_timer_activate(struct tcpcb *tp, uint32_t timer_type,
     uint32_t delta);
 static int32_t rack_timer_active(struct tcpcb *tp, uint32_t timer_type);
 static void rack_timer_cancel(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, int line);
 static void rack_timer_stop(struct tcpcb *tp, uint32_t timer_type);
 static uint32_t
 rack_update_entry(struct tcpcb *tp, struct tcp_rack *rack,
     struct rack_sendmap *rsm, uint32_t ts, int32_t * lenp);
 static void
 rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack,
     struct rack_sendmap *rsm, uint32_t ts);
 static int
 rack_update_rtt(struct tcpcb *tp, struct tcp_rack *rack,
     struct rack_sendmap *rsm, struct tcpopt *to, uint32_t cts, int32_t ack_type);
 static int32_t tcp_addrack(module_t mod, int32_t type, void *data);
 static void
 rack_challenge_ack(struct mbuf *m, struct tcphdr *th,
     struct tcpcb *tp, int32_t * ret_val);
 static int
 rack_do_close_wait(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
     int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
 static int
 rack_do_closing(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
     int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
 static void rack_do_drop(struct mbuf *m, struct tcpcb *tp);
 static void
 rack_do_dropafterack(struct mbuf *m, struct tcpcb *tp,
     struct tcphdr *th, int32_t thflags, int32_t tlen, int32_t * ret_val);
 static void
 rack_do_dropwithreset(struct mbuf *m, struct tcpcb *tp,
     struct tcphdr *th, int32_t rstreason, int32_t tlen);
 static int
 rack_do_established(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
     int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
 static int
 rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
     int32_t tlen, uint32_t tiwin, int32_t nxt_pkt);
 static int
 rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
     int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
 static int
 rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
     int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
 static int
 rack_do_lastack(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
     int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
 static int
 rack_do_syn_recv(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
     int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
 static int
 rack_do_syn_sent(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
     int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
 static int
 rack_drop_checks(struct tcpopt *to, struct mbuf *m,
     struct tcphdr *th, struct tcpcb *tp, int32_t * tlenp, int32_t * thf,
     int32_t * drop_hdrlen, int32_t * ret_val);
 static int
 rack_process_rst(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp);
 struct rack_sendmap *
 tcp_rack_output(struct tcpcb *tp, struct tcp_rack *rack,
     uint32_t tsused);
 static void tcp_rack_xmit_timer(struct tcp_rack *rack, int32_t rtt);
 static void
      tcp_rack_partialack(struct tcpcb *tp, struct tcphdr *th);
 
 static int
 rack_ts_check(struct mbuf *m, struct tcphdr *th,
     struct tcpcb *tp, int32_t tlen, int32_t thflags, int32_t * ret_val);
 
 int32_t rack_clear_counter=0;
 
 
 static int
 sysctl_rack_clear(SYSCTL_HANDLER_ARGS)
 {
 	uint32_t stat;
 	int32_t error;
 
 	error = SYSCTL_OUT(req, &rack_clear_counter, sizeof(uint32_t));
 	if (error || req->newptr == NULL)
 		return error;
 
 	error = SYSCTL_IN(req, &stat, sizeof(uint32_t));
 	if (error)
 		return (error);
 	if (stat == 1) {
 #ifdef INVARIANTS
 		printf("Clearing RACK counters\n");
 #endif
 		counter_u64_zero(rack_badfr);
 		counter_u64_zero(rack_badfr_bytes);
 		counter_u64_zero(rack_rtm_prr_retran);
 		counter_u64_zero(rack_rtm_prr_newdata);
 		counter_u64_zero(rack_timestamp_mismatch);
 		counter_u64_zero(rack_reorder_seen);
 		counter_u64_zero(rack_tlp_tot);
 		counter_u64_zero(rack_tlp_newdata);
 		counter_u64_zero(rack_tlp_retran);
 		counter_u64_zero(rack_tlp_retran_bytes);
 		counter_u64_zero(rack_tlp_retran_fail);
 		counter_u64_zero(rack_to_tot);
 		counter_u64_zero(rack_to_arm_rack);
 		counter_u64_zero(rack_to_arm_tlp);
 		counter_u64_zero(rack_paced_segments);
 		counter_u64_zero(rack_unpaced_segments);
 		counter_u64_zero(rack_saw_enobuf);
 		counter_u64_zero(rack_saw_enetunreach);
 		counter_u64_zero(rack_to_alloc_hard);
 		counter_u64_zero(rack_to_alloc_emerg);
 		counter_u64_zero(rack_sack_proc_all);
 		counter_u64_zero(rack_sack_proc_short);
 		counter_u64_zero(rack_sack_proc_restart);
 		counter_u64_zero(rack_to_alloc);
 		counter_u64_zero(rack_to_alloc_limited);
 		counter_u64_zero(rack_alloc_limited_conns);
 		counter_u64_zero(rack_split_limited);
 		counter_u64_zero(rack_find_high);
 		counter_u64_zero(rack_runt_sacks);
 		counter_u64_zero(rack_used_tlpmethod);
 		counter_u64_zero(rack_used_tlpmethod2);
 		counter_u64_zero(rack_enter_tlp_calc);
 		counter_u64_zero(rack_progress_drops);
 		counter_u64_zero(rack_tlp_does_nada);
 	}
 	rack_clear_counter = 0;
 	return (0);
 }
 
 
 
 static void
 rack_init_sysctls()
 {
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "map_limit", CTLFLAG_RW,
 	    &rack_map_entries_limit , 1024,
 	    "Is there a limit on how big the sendmap can grow? ");
 
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "map_splitlimit", CTLFLAG_RW,
 	    &rack_map_split_limit , 256,
 	    "Is there a limit on how much splitting a peer can do?");
 
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "rate_sample_method", CTLFLAG_RW,
 	    &rack_rate_sample_method , USE_RTT_LOW,
 	    "What method should we use for rate sampling 0=high, 1=low ");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "data_after_close", CTLFLAG_RW,
 	    &rack_ignore_data_after_close, 0,
 	    "Do we hold off sending a RST until all pending data is ack'd");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "tlpmethod", CTLFLAG_RW,
 	    &rack_tlp_threshold_use, TLP_USE_TWO_ONE,
 	    "What method do we do for TLP time calc 0=no-de-ack-comp, 1=ID, 2=2.1, 3=2.2");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "min_pace_time", CTLFLAG_RW,
 	    &rack_min_pace_time, 0,
 	    "Should we enforce a minimum pace time of 1ms");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "min_pace_segs", CTLFLAG_RW,
 	    &rack_min_pace_time_seg_req, 6,
 	    "How many segments have to be in the len to enforce min-pace-time");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "idle_reduce_high", CTLFLAG_RW,
 	    &rack_reduce_largest_on_idle, 0,
 	    "Should we reduce the largest cwnd seen to IW on idle reduction");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "bb_verbose", CTLFLAG_RW,
 	    &rack_verbose_logging, 0,
 	    "Should RACK black box logging be verbose");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "sackfiltering", CTLFLAG_RW,
 	    &rack_use_sack_filter, 1,
 	    "Do we use sack filtering?");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "delayed_ack", CTLFLAG_RW,
 	    &rack_delayed_ack_time, 200,
 	    "Delayed ack time (200ms)");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "tlpminto", CTLFLAG_RW,
 	    &rack_tlp_min, 10,
 	    "TLP minimum timeout per the specification (10ms)");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "precache", CTLFLAG_RW,
 	    &rack_precache, 0,
 	    "Where should we precache the mcopy (0 is not at all)");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "sblklimit", CTLFLAG_RW,
 	    &rack_sack_block_limit, 128,
 	    "When do we start paying attention to small sack blocks");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "send_oldest", CTLFLAG_RW,
 	    &rack_always_send_oldest, 1,
 	    "Should we always send the oldest TLP and RACK-TLP");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "rack_tlp_in_recovery", CTLFLAG_RW,
 	    &rack_tlp_in_recovery, 1,
 	    "Can we do a TLP during recovery?");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "rack_tlimit", CTLFLAG_RW,
 	    &rack_limited_retran, 0,
 	    "How many times can a rack timeout drive out sends");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "minrto", CTLFLAG_RW,
 	    &rack_rto_min, 0,
 	    "Minimum RTO in ms -- set with caution below 1000 due to TLP");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "maxrto", CTLFLAG_RW,
 	    &rack_rto_max, 0,
 	    "Maxiumum RTO in ms -- should be at least as large as min_rto");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "tlp_retry", CTLFLAG_RW,
 	    &rack_tlp_max_resend, 2,
 	    "How many times does TLP retry a single segment or multiple with no ACK");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "recovery_loss_prop", CTLFLAG_RW,
 	    &rack_use_proportional_reduce, 0,
 	    "Should we proportionaly reduce cwnd based on the number of losses ");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "recovery_prop", CTLFLAG_RW,
 	    &rack_proportional_rate, 10,
 	    "What percent reduction per loss");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "tlp_cwnd_flag", CTLFLAG_RW,
 	    &rack_lower_cwnd_at_tlp, 0,
 	    "When a TLP completes a retran should we enter recovery?");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "hptsi_reduces", CTLFLAG_RW,
 	    &rack_slot_reduction, 4,
 	    "When setting a slot should we reduce by divisor");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "hptsi_every_seg", CTLFLAG_RW,
 	    &rack_pace_every_seg, 1,
 	    "Should we pace out every segment hptsi");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "hptsi_seg_max", CTLFLAG_RW,
 	    &rack_hptsi_segments, 6,
 	    "Should we pace out only a limited size of segments");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "prr_sendalot", CTLFLAG_RW,
 	    &rack_send_a_lot_in_prr, 1,
 	    "Send a lot in prr");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "minto", CTLFLAG_RW,
 	    &rack_min_to, 1,
 	    "Minimum rack timeout in milliseconds");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "earlyrecoveryseg", CTLFLAG_RW,
 	    &rack_early_recovery_max_seg, 6,
 	    "Max segments in early recovery");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "earlyrecovery", CTLFLAG_RW,
 	    &rack_early_recovery, 1,
 	    "Do we do early recovery with rack");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "reorder_thresh", CTLFLAG_RW,
 	    &rack_reorder_thresh, 2,
 	    "What factor for rack will be added when seeing reordering (shift right)");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "rtt_tlp_thresh", CTLFLAG_RW,
 	    &rack_tlp_thresh, 1,
 	    "what divisor for TLP rtt/retran will be added (1=rtt, 2=1/2 rtt etc)");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "reorder_fade", CTLFLAG_RW,
 	    &rack_reorder_fade, 0,
 	    "Does reorder detection fade, if so how many ms (0 means never)");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "pktdelay", CTLFLAG_RW,
 	    &rack_pkt_delay, 1,
 	    "Extra RACK time (in ms) besides reordering thresh");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "inc_var", CTLFLAG_RW,
 	    &rack_inc_var, 0,
 	    "Should rack add to the TLP timer the variance in rtt calculation");
 	rack_badfr = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "badfr", CTLFLAG_RD,
 	    &rack_badfr, "Total number of bad FRs");
 	rack_badfr_bytes = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "badfr_bytes", CTLFLAG_RD,
 	    &rack_badfr_bytes, "Total number of bad FRs");
 	rack_rtm_prr_retran = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "prrsndret", CTLFLAG_RD,
 	    &rack_rtm_prr_retran,
 	    "Total number of prr based retransmits");
 	rack_rtm_prr_newdata = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "prrsndnew", CTLFLAG_RD,
 	    &rack_rtm_prr_newdata,
 	    "Total number of prr based new transmits");
 	rack_timestamp_mismatch = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "tsnf", CTLFLAG_RD,
 	    &rack_timestamp_mismatch,
 	    "Total number of timestamps that we could not find the reported ts");
 	rack_find_high = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "findhigh", CTLFLAG_RD,
 	    &rack_find_high,
 	    "Total number of FIN causing find-high");
 	rack_reorder_seen = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "reordering", CTLFLAG_RD,
 	    &rack_reorder_seen,
 	    "Total number of times we added delay due to reordering");
 	rack_tlp_tot = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "tlp_to_total", CTLFLAG_RD,
 	    &rack_tlp_tot,
 	    "Total number of tail loss probe expirations");
 	rack_tlp_newdata = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "tlp_new", CTLFLAG_RD,
 	    &rack_tlp_newdata,
 	    "Total number of tail loss probe sending new data");
 
 	rack_tlp_retran = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "tlp_retran", CTLFLAG_RD,
 	    &rack_tlp_retran,
 	    "Total number of tail loss probe sending retransmitted data");
 	rack_tlp_retran_bytes = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "tlp_retran_bytes", CTLFLAG_RD,
 	    &rack_tlp_retran_bytes,
 	    "Total bytes of tail loss probe sending retransmitted data");
 	rack_tlp_retran_fail = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "tlp_retran_fail", CTLFLAG_RD,
 	    &rack_tlp_retran_fail,
 	    "Total number of tail loss probe sending retransmitted data that failed (wait for t3)");
 	rack_to_tot = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "rack_to_tot", CTLFLAG_RD,
 	    &rack_to_tot,
 	    "Total number of times the rack to expired?");
 	rack_to_arm_rack = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "arm_rack", CTLFLAG_RD,
 	    &rack_to_arm_rack,
 	    "Total number of times the rack timer armed?");
 	rack_to_arm_tlp = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "arm_tlp", CTLFLAG_RD,
 	    &rack_to_arm_tlp,
 	    "Total number of times the tlp timer armed?");
 	rack_paced_segments = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "paced", CTLFLAG_RD,
 	    &rack_paced_segments,
 	    "Total number of times a segment send caused hptsi");
 	rack_unpaced_segments = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "unpaced", CTLFLAG_RD,
 	    &rack_unpaced_segments,
 	    "Total number of times a segment did not cause hptsi");
 	rack_saw_enobuf = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "saw_enobufs", CTLFLAG_RD,
 	    &rack_saw_enobuf,
 	    "Total number of times a segment did not cause hptsi");
 	rack_saw_enetunreach = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "saw_enetunreach", CTLFLAG_RD,
 	    &rack_saw_enetunreach,
 	    "Total number of times a segment did not cause hptsi");
 	rack_to_alloc = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "allocs", CTLFLAG_RD,
 	    &rack_to_alloc,
 	    "Total allocations of tracking structures");
 	rack_to_alloc_hard = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "allochard", CTLFLAG_RD,
 	    &rack_to_alloc_hard,
 	    "Total allocations done with sleeping the hard way");
 	rack_to_alloc_emerg = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "allocemerg", CTLFLAG_RD,
 	    &rack_to_alloc_emerg,
 	    "Total allocations done from emergency cache");
 	rack_to_alloc_limited = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "alloc_limited", CTLFLAG_RD,
 	    &rack_to_alloc_limited,
 	    "Total allocations dropped due to limit");
 	rack_alloc_limited_conns = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "alloc_limited_conns", CTLFLAG_RD,
 	    &rack_alloc_limited_conns,
 	    "Connections with allocations dropped due to limit");
 	rack_split_limited = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "split_limited", CTLFLAG_RD,
 	    &rack_split_limited,
 	    "Split allocations dropped due to limit");
 	rack_sack_proc_all = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "sack_long", CTLFLAG_RD,
 	    &rack_sack_proc_all,
 	    "Total times we had to walk whole list for sack processing");
 
 	rack_sack_proc_restart = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "sack_restart", CTLFLAG_RD,
 	    &rack_sack_proc_restart,
 	    "Total times we had to walk whole list due to a restart");
 	rack_sack_proc_short = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "sack_short", CTLFLAG_RD,
 	    &rack_sack_proc_short,
 	    "Total times we took shortcut for sack processing");
 	rack_enter_tlp_calc = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "tlp_calc_entered", CTLFLAG_RD,
 	    &rack_enter_tlp_calc,
 	    "Total times we called calc-tlp");
 	rack_used_tlpmethod = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "hit_tlp_method", CTLFLAG_RD,
 	    &rack_used_tlpmethod,
 	    "Total number of runt sacks");
 	rack_used_tlpmethod2 = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "hit_tlp_method2", CTLFLAG_RD,
 	    &rack_used_tlpmethod2,
 	    "Total number of runt sacks 2");
 	rack_runt_sacks = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "runtsacks", CTLFLAG_RD,
 	    &rack_runt_sacks,
 	    "Total number of runt sacks");
 	rack_progress_drops = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "prog_drops", CTLFLAG_RD,
 	    &rack_progress_drops,
 	    "Total number of progress drops");
 	rack_input_idle_reduces = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "idle_reduce_oninput", CTLFLAG_RD,
 	    &rack_input_idle_reduces,
 	    "Total number of idle reductions on input");
 	rack_tlp_does_nada = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "tlp_nada", CTLFLAG_RD,
 	    &rack_tlp_does_nada,
 	    "Total number of nada tlp calls");
 	COUNTER_ARRAY_ALLOC(rack_out_size, TCP_MSS_ACCT_SIZE, M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64_ARRAY(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "outsize", CTLFLAG_RD,
 	    rack_out_size, TCP_MSS_ACCT_SIZE, "MSS send sizes");
 	COUNTER_ARRAY_ALLOC(rack_opts_arry, RACK_OPTS_SIZE, M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64_ARRAY(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "opts", CTLFLAG_RD,
 	    rack_opts_arry, RACK_OPTS_SIZE, "RACK Option Stats");
 	SYSCTL_ADD_PROC(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "clear", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 	    &rack_clear_counter, 0, sysctl_rack_clear, "IU", "Clear counters");
 }
 
 static inline int32_t
 rack_progress_timeout_check(struct tcpcb *tp)
 {
 #ifdef NETFLIX_PROGRESS
 	if (tp->t_maxunacktime && tp->t_acktime && TSTMP_GT(ticks, tp->t_acktime)) {
 		if ((ticks - tp->t_acktime) >= tp->t_maxunacktime) {
 			/*
 			 * There is an assumption that the caller
 			 * will drop the connection so we will
 			 * increment the counters here.
 			 */
 			struct tcp_rack *rack;
 			rack = (struct tcp_rack *)tp->t_fb_ptr;
 			counter_u64_add(rack_progress_drops, 1);
 			TCPSTAT_INC(tcps_progdrops);
 			rack_log_progress_event(rack, tp, ticks, PROGRESS_DROP, __LINE__);
 			return (1);
 		}
 	}
 #endif
 	return (0);
 }
 
 
 static void
 rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t slot, uint8_t which)
 {
 	if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) {
 		union tcp_log_stackspecific log;
 
 		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
 		log.u_bbr.flex1 = TICKS_2_MSEC(rack->rc_tp->t_srtt >> TCP_RTT_SHIFT);
 		log.u_bbr.flex2 = to;
 		log.u_bbr.flex3 = rack->r_ctl.rc_hpts_flags;
 		log.u_bbr.flex4 = slot;
 		log.u_bbr.flex5 = rack->rc_inp->inp_hptsslot;
 		log.u_bbr.flex6 = rack->rc_tp->t_rxtcur;
 		log.u_bbr.flex8 = which;
 		log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts;
 		log.u_bbr.ininput = rack->rc_inp->inp_in_input;
 		TCP_LOG_EVENT(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
 		    BBR_LOG_TIMERSTAR, 0,
 		    0, &log, false);
 	}
 }
 
 static void
 rack_log_to_event(struct tcp_rack *rack, int32_t to_num)
 {
 	if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) {
 		union tcp_log_stackspecific log;
 
 		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
 		log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts;
 		log.u_bbr.ininput = rack->rc_inp->inp_in_input;
 		log.u_bbr.flex8 = to_num;
 		log.u_bbr.flex1 = rack->r_ctl.rc_rack_min_rtt;
 		log.u_bbr.flex2 = rack->rc_rack_rtt;
 		TCP_LOG_EVENT(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
 		    BBR_LOG_RTO, 0,
 		    0, &log, false);
 	}
 }
 
 static void
 rack_log_rtt_upd(struct tcpcb *tp, struct tcp_rack *rack, int32_t t,
     uint32_t o_srtt, uint32_t o_var)
 {
 	if (tp->t_logstate != TCP_LOG_STATE_OFF) {
 		union tcp_log_stackspecific log;
 
 		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
 		log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts;
 		log.u_bbr.ininput = rack->rc_inp->inp_in_input;
 		log.u_bbr.flex1 = t;
 		log.u_bbr.flex2 = o_srtt;
 		log.u_bbr.flex3 = o_var;
 		log.u_bbr.flex4 = rack->r_ctl.rack_rs.rs_rtt_lowest;
 		log.u_bbr.flex5 = rack->r_ctl.rack_rs.rs_rtt_highest;		
 		log.u_bbr.flex6 = rack->r_ctl.rack_rs.rs_rtt_cnt;
 		log.u_bbr.rttProp = rack->r_ctl.rack_rs.rs_rtt_tot;
 		log.u_bbr.flex8 = rack->r_ctl.rc_rate_sample_method;
 		TCP_LOG_EVENT(tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
 		    BBR_LOG_BBRRTT, 0,
 		    0, &log, false);
 	}
 }
 
 static void
 rack_log_rtt_sample(struct tcp_rack *rack, uint32_t rtt)
 {
 	/* 
 	 * Log the rtt sample we are
 	 * applying to the srtt algorithm in
 	 * useconds.
 	 */
 	if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) {
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 		
 		memset(&log, 0, sizeof(log));
 		/* Convert our ms to a microsecond */
 		log.u_bbr.flex1 = rtt * 1000;
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
 		    TCP_LOG_RTT, 0,
 		    0, &log, false, &tv);
 	}
 }
 
 
 static inline void
 rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick,  int event, int line)
 {
 	if (rack_verbose_logging && (tp->t_logstate != TCP_LOG_STATE_OFF)) {
 		union tcp_log_stackspecific log;
 
 		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
 		log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts;
 		log.u_bbr.ininput = rack->rc_inp->inp_in_input;
 		log.u_bbr.flex1 = line;
 		log.u_bbr.flex2 = tick;
 		log.u_bbr.flex3 = tp->t_maxunacktime;
 		log.u_bbr.flex4 = tp->t_acktime;
 		log.u_bbr.flex8 = event;
 		TCP_LOG_EVENT(tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
 		    BBR_LOG_PROGRESS, 0,
 		    0, &log, false);
 	}
 }
 
 static void
 rack_log_type_bbrsnd(struct tcp_rack *rack, uint32_t len, uint32_t slot, uint32_t cts)
 {
 	if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) {
 		union tcp_log_stackspecific log;
 
 		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
 		log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts;
 		log.u_bbr.ininput = rack->rc_inp->inp_in_input;
 		log.u_bbr.flex1 = slot;
 		log.u_bbr.flex7 = (0x0000ffff & rack->r_ctl.rc_hpts_flags);
 		log.u_bbr.flex8 = rack->rc_in_persist;
 		TCP_LOG_EVENT(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
 		    BBR_LOG_BBRSND, 0,
 		    0, &log, false);
 	}
 }
 
 static void
 rack_log_doseg_done(struct tcp_rack *rack, uint32_t cts, int32_t nxt_pkt, int32_t did_out, int way_out)
 {
 	if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) {
 		union tcp_log_stackspecific log;
 
 		memset(&log, 0, sizeof(log));
 		log.u_bbr.flex1 = did_out;
 		log.u_bbr.flex2 = nxt_pkt;
 		log.u_bbr.flex3 = way_out;
 		log.u_bbr.flex4 = rack->r_ctl.rc_hpts_flags;
 		log.u_bbr.flex7 = rack->r_wanted_output;
 		log.u_bbr.flex8 = rack->rc_in_persist;
 		TCP_LOG_EVENT(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
 		    BBR_LOG_DOSEG_DONE, 0,
 		    0, &log, false);
 	}
 }
 
 
 static void
 rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, uint32_t slot, uint8_t hpts_calling)
 {
 	if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) {
 		union tcp_log_stackspecific log;
 
 		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
 		log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts;
 		log.u_bbr.ininput = rack->rc_inp->inp_in_input;
 		log.u_bbr.flex1 = slot;
 		log.u_bbr.flex2 = rack->r_ctl.rc_hpts_flags;
 		log.u_bbr.flex7 = hpts_calling;
 		log.u_bbr.flex8 = rack->rc_in_persist;
 		TCP_LOG_EVENT(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
 		    BBR_LOG_JUSTRET, 0,
 		    tlen, &log, false);
 	}
 }
 
 static void
 rack_log_to_cancel(struct tcp_rack *rack, int32_t hpts_removed, int line)
 {
 	if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) {
 		union tcp_log_stackspecific log;
 
 		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
 		log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts;
 		log.u_bbr.ininput = rack->rc_inp->inp_in_input;
 		log.u_bbr.flex1 = line;
 		log.u_bbr.flex2 = 0;
 		log.u_bbr.flex3 = rack->r_ctl.rc_hpts_flags;
 		log.u_bbr.flex4 = 0;
 		log.u_bbr.flex6 = rack->rc_tp->t_rxtcur;
 		log.u_bbr.flex8 = hpts_removed;
 		TCP_LOG_EVENT(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
 		    BBR_LOG_TIMERCANC, 0,
 		    0, &log, false);
 	}
 }
 
 static void
 rack_log_to_processing(struct tcp_rack *rack, uint32_t cts, int32_t ret, int32_t timers)
 {
 	if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) {
 		union tcp_log_stackspecific log;
 
 		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
 		log.u_bbr.flex1 = timers;
 		log.u_bbr.flex2 = ret;
 		log.u_bbr.flex3 = rack->r_ctl.rc_timer_exp;
 		log.u_bbr.flex4 = rack->r_ctl.rc_hpts_flags;
 		log.u_bbr.flex5 = cts;
 		TCP_LOG_EVENT(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
 		    BBR_LOG_TO_PROCESS, 0,
 		    0, &log, false);
 	}
 }
 
 static void
 rack_counter_destroy()
 {
 	counter_u64_free(rack_badfr);
 	counter_u64_free(rack_badfr_bytes);
 	counter_u64_free(rack_rtm_prr_retran);
 	counter_u64_free(rack_rtm_prr_newdata);
 	counter_u64_free(rack_timestamp_mismatch);
 	counter_u64_free(rack_reorder_seen);
 	counter_u64_free(rack_tlp_tot);
 	counter_u64_free(rack_tlp_newdata);
 	counter_u64_free(rack_tlp_retran);
 	counter_u64_free(rack_tlp_retran_bytes);
 	counter_u64_free(rack_tlp_retran_fail);
 	counter_u64_free(rack_to_tot);
 	counter_u64_free(rack_to_arm_rack);
 	counter_u64_free(rack_to_arm_tlp);
 	counter_u64_free(rack_paced_segments);
 	counter_u64_free(rack_unpaced_segments);
 	counter_u64_free(rack_saw_enobuf);
 	counter_u64_free(rack_saw_enetunreach);
 	counter_u64_free(rack_to_alloc_hard);
 	counter_u64_free(rack_to_alloc_emerg);
 	counter_u64_free(rack_sack_proc_all);
 	counter_u64_free(rack_sack_proc_short);
 	counter_u64_free(rack_sack_proc_restart);
 	counter_u64_free(rack_to_alloc);
 	counter_u64_free(rack_to_alloc_limited);
 	counter_u64_free(rack_split_limited);
 	counter_u64_free(rack_find_high);
 	counter_u64_free(rack_runt_sacks);
 	counter_u64_free(rack_enter_tlp_calc);
 	counter_u64_free(rack_used_tlpmethod);
 	counter_u64_free(rack_used_tlpmethod2);
 	counter_u64_free(rack_progress_drops);
 	counter_u64_free(rack_input_idle_reduces);
 	counter_u64_free(rack_tlp_does_nada);
 	COUNTER_ARRAY_FREE(rack_out_size, TCP_MSS_ACCT_SIZE);
 	COUNTER_ARRAY_FREE(rack_opts_arry, RACK_OPTS_SIZE);
 }
 
 static struct rack_sendmap *
 rack_alloc(struct tcp_rack *rack)
 {
 	struct rack_sendmap *rsm;
 
 	rsm = uma_zalloc(rack_zone, M_NOWAIT);
 	if (rsm) {
 		rack->r_ctl.rc_num_maps_alloced++;
 		counter_u64_add(rack_to_alloc, 1);
 		return (rsm);
 	}
 	if (rack->rc_free_cnt) {
 		counter_u64_add(rack_to_alloc_emerg, 1);
 		rsm = TAILQ_FIRST(&rack->r_ctl.rc_free);
 		TAILQ_REMOVE(&rack->r_ctl.rc_free, rsm, r_next);
 		rack->rc_free_cnt--;
 		return (rsm);
 	}
 	return (NULL);
 }
 
 static struct rack_sendmap *
 rack_alloc_full_limit(struct tcp_rack *rack)
 {
 	if ((rack_map_entries_limit > 0) &&
 	    (rack->r_ctl.rc_num_maps_alloced >= rack_map_entries_limit)) {
 		counter_u64_add(rack_to_alloc_limited, 1);
 		if (!rack->alloc_limit_reported) {
 			rack->alloc_limit_reported = 1;
 			counter_u64_add(rack_alloc_limited_conns, 1);
 		}
 		return (NULL);
 	}
 	return (rack_alloc(rack));
 }
 
 /* wrapper to allocate a sendmap entry, subject to a specific limit */
 static struct rack_sendmap *
 rack_alloc_limit(struct tcp_rack *rack, uint8_t limit_type)
 {
 	struct rack_sendmap *rsm;
 
 	if (limit_type) {
 		/* currently there is only one limit type */
 		if (rack_map_split_limit > 0 &&
 		    rack->r_ctl.rc_num_split_allocs >= rack_map_split_limit) {
 			counter_u64_add(rack_split_limited, 1);
 			if (!rack->alloc_limit_reported) {
 				rack->alloc_limit_reported = 1;
 				counter_u64_add(rack_alloc_limited_conns, 1);
 			}
 			return (NULL);
 		}
 	}
 
 	/* allocate and mark in the limit type, if set */
 	rsm = rack_alloc(rack);
 	if (rsm != NULL && limit_type) {
 		rsm->r_limit_type = limit_type;
 		rack->r_ctl.rc_num_split_allocs++;
 	}
 	return (rsm);
 }
 
 static void
 rack_free(struct tcp_rack *rack, struct rack_sendmap *rsm)
 {
 	if (rsm->r_limit_type) {
 		/* currently there is only one limit type */
 		rack->r_ctl.rc_num_split_allocs--;
 	}
 	if (rack->r_ctl.rc_tlpsend == rsm)
 		rack->r_ctl.rc_tlpsend = NULL;
 	if (rack->r_ctl.rc_next == rsm)
 		rack->r_ctl.rc_next = NULL;
 	if (rack->r_ctl.rc_sacklast == rsm)
 		rack->r_ctl.rc_sacklast = NULL;
 	if (rack->rc_free_cnt < rack_free_cache) {
 		memset(rsm, 0, sizeof(struct rack_sendmap));
 		TAILQ_INSERT_TAIL(&rack->r_ctl.rc_free, rsm, r_next);
 		rsm->r_limit_type = 0;
 		rack->rc_free_cnt++;
 		return;
 	}
 	rack->r_ctl.rc_num_maps_alloced--;
 	uma_zfree(rack_zone, rsm);
 }
 
 /*
  * CC wrapper hook functions
  */
 static void
 rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, uint16_t nsegs,
     uint16_t type, int32_t recovery)
 {
 #ifdef NETFLIX_STATS
 	int32_t gput;
 #endif
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	tp->ccv->nsegs = nsegs;
 	tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th);
 	if ((recovery) && (rack->r_ctl.rc_early_recovery_segs)) {
 		uint32_t max;
 
 		max = rack->r_ctl.rc_early_recovery_segs * tp->t_maxseg;
 		if (tp->ccv->bytes_this_ack > max) {
 			tp->ccv->bytes_this_ack = max;
 		}
 	}
 	if (tp->snd_cwnd <= tp->snd_wnd)
 		tp->ccv->flags |= CCF_CWND_LIMITED;
 	else
 		tp->ccv->flags &= ~CCF_CWND_LIMITED;
 
 	if (type == CC_ACK) {
 #ifdef NETFLIX_STATS
 		stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF,
 		    ((int32_t) tp->snd_cwnd) - tp->snd_wnd);
 		if ((tp->t_flags & TF_GPUTINPROG) &&
 		    SEQ_GEQ(th->th_ack, tp->gput_ack)) {
 			gput = (((int64_t) (th->th_ack - tp->gput_seq)) << 3) /
 			    max(1, tcp_ts_getticks() - tp->gput_ts);
 			stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_GPUT,
 			    gput);
 			/*
 			 * XXXLAS: This is a temporary hack, and should be
 			 * chained off VOI_TCP_GPUT when stats(9) grows an
 			 * API to deal with chained VOIs.
 			 */
 			if (tp->t_stats_gput_prev > 0)
 				stats_voi_update_abs_s32(tp->t_stats,
 				    VOI_TCP_GPUT_ND,
 				    ((gput - tp->t_stats_gput_prev) * 100) /
 				    tp->t_stats_gput_prev);
 			tp->t_flags &= ~TF_GPUTINPROG;
 			tp->t_stats_gput_prev = gput;
 			if (tp->t_maxpeakrate) {
 				/*
 				 * We update t_peakrate_thr. This gives us roughly
 				 * one update per round trip time.
 				 */
 				tcp_update_peakrate_thr(tp);
 			}
 		}
 #endif
 		if (tp->snd_cwnd > tp->snd_ssthresh) {
 			tp->t_bytes_acked += min(tp->ccv->bytes_this_ack,
 			    nsegs * V_tcp_abc_l_var * tp->t_maxseg);
 			if (tp->t_bytes_acked >= tp->snd_cwnd) {
 				tp->t_bytes_acked -= tp->snd_cwnd;
 				tp->ccv->flags |= CCF_ABC_SENTAWND;
 			}
 		} else {
 			tp->ccv->flags &= ~CCF_ABC_SENTAWND;
 			tp->t_bytes_acked = 0;
 		}
 	}
 	if (CC_ALGO(tp)->ack_received != NULL) {
 		/* XXXLAS: Find a way to live without this */
 		tp->ccv->curack = th->th_ack;
 		CC_ALGO(tp)->ack_received(tp->ccv, type);
 	}
 #ifdef NETFLIX_STATS
 	stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd);
 #endif
 	if (rack->r_ctl.rc_rack_largest_cwnd < tp->snd_cwnd) {
 		rack->r_ctl.rc_rack_largest_cwnd = tp->snd_cwnd;
 	}
 	/* we enforce max peak rate if it is set. */
 	if (tp->t_peakrate_thr && tp->snd_cwnd > tp->t_peakrate_thr) {
 		tp->snd_cwnd = tp->t_peakrate_thr;
 	}
 }
 
 static void
 tcp_rack_partialack(struct tcpcb *tp, struct tcphdr *th)
 {
 	struct tcp_rack *rack;
 
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	if (rack->r_ctl.rc_prr_sndcnt > 0)
 		rack->r_wanted_output++;
 }
 
 static void
 rack_post_recovery(struct tcpcb *tp, struct tcphdr *th)
 {
 	struct tcp_rack *rack;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	if (CC_ALGO(tp)->post_recovery != NULL) {
 		tp->ccv->curack = th->th_ack;
 		CC_ALGO(tp)->post_recovery(tp->ccv);
 	}
 	/*
 	 * Here we can in theory adjust cwnd to be based on the number of
 	 * losses in the window (rack->r_ctl.rc_loss_count). This is done
 	 * based on the rack_use_proportional flag.
 	 */
 	if (rack->r_ctl.rc_prop_reduce && rack->r_ctl.rc_prop_rate) {
 		int32_t reduce;
 
 		reduce = (rack->r_ctl.rc_loss_count * rack->r_ctl.rc_prop_rate);
 		if (reduce > 50) {
 			reduce = 50;
 		}
 		tp->snd_cwnd -= ((reduce * tp->snd_cwnd) / 100);
 	} else {
 		if (tp->snd_cwnd > tp->snd_ssthresh) {
 			/* Drop us down to the ssthresh (1/2 cwnd at loss) */
 			tp->snd_cwnd = tp->snd_ssthresh;
 		}
 	}
 	if (rack->r_ctl.rc_prr_sndcnt > 0) {
 		/* Suck the next prr cnt back into cwnd */
 		tp->snd_cwnd += rack->r_ctl.rc_prr_sndcnt;
 		rack->r_ctl.rc_prr_sndcnt = 0;
 	}
 	tp->snd_recover = tp->snd_una;
 	EXIT_RECOVERY(tp->t_flags);
 }
 
 static void
 rack_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type)
 {
 	struct tcp_rack *rack;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	switch (type) {
 	case CC_NDUPACK:
 /*		rack->r_ctl.rc_ssthresh_set = 1;*/
 		if (!IN_FASTRECOVERY(tp->t_flags)) {
 			rack->r_ctl.rc_tlp_rtx_out = 0;
 			rack->r_ctl.rc_prr_delivered = 0;
 			rack->r_ctl.rc_prr_out = 0;
 			rack->r_ctl.rc_loss_count = 0;
 			rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg;
 			rack->r_ctl.rc_prr_recovery_fs = tp->snd_max - tp->snd_una;
 			tp->snd_recover = tp->snd_max;
 			if (tp->t_flags & TF_ECN_PERMIT)
 				tp->t_flags |= TF_ECN_SND_CWR;
 		}
 		break;
 	case CC_ECN:
 		if (!IN_CONGRECOVERY(tp->t_flags)) {
 			TCPSTAT_INC(tcps_ecn_rcwnd);
 			tp->snd_recover = tp->snd_max;
 			if (tp->t_flags & TF_ECN_PERMIT)
 				tp->t_flags |= TF_ECN_SND_CWR;
 		}
 		break;
 	case CC_RTO:
 		tp->t_dupacks = 0;
 		tp->t_bytes_acked = 0;
 		EXIT_RECOVERY(tp->t_flags);
 		tp->snd_ssthresh = max(2, min(tp->snd_wnd, tp->snd_cwnd) / 2 /
 		    tp->t_maxseg) * tp->t_maxseg;
 		tp->snd_cwnd = tp->t_maxseg;
 		break;
 	case CC_RTO_ERR:
 		TCPSTAT_INC(tcps_sndrexmitbad);
 		/* RTO was unnecessary, so reset everything. */
 		tp->snd_cwnd = tp->snd_cwnd_prev;
 		tp->snd_ssthresh = tp->snd_ssthresh_prev;
 		tp->snd_recover = tp->snd_recover_prev;
 		if (tp->t_flags & TF_WASFRECOVERY)
 			ENTER_FASTRECOVERY(tp->t_flags);
 		if (tp->t_flags & TF_WASCRECOVERY)
 			ENTER_CONGRECOVERY(tp->t_flags);
 		tp->snd_nxt = tp->snd_max;
 		tp->t_badrxtwin = 0;
 		break;
 	}
 
 	if (CC_ALGO(tp)->cong_signal != NULL) {
 		if (th != NULL)
 			tp->ccv->curack = th->th_ack;
 		CC_ALGO(tp)->cong_signal(tp->ccv, type);
 	}
 }
 
 
 
 static inline void
 rack_cc_after_idle(struct tcpcb *tp, int reduce_largest)
 {
 	uint32_t i_cwnd;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 #ifdef NETFLIX_STATS
 	TCPSTAT_INC(tcps_idle_restarts);
 	if (tp->t_state == TCPS_ESTABLISHED)
 		TCPSTAT_INC(tcps_idle_estrestarts);
 #endif
 	if (CC_ALGO(tp)->after_idle != NULL)
 		CC_ALGO(tp)->after_idle(tp->ccv);
 
 	if (V_tcp_initcwnd_segments)
 		i_cwnd = min((V_tcp_initcwnd_segments * tp->t_maxseg),
 		    max(2 * tp->t_maxseg, 14600));
 	else if (V_tcp_do_rfc3390)
 		i_cwnd = min(4 * tp->t_maxseg,
 		    max(2 * tp->t_maxseg, 4380));
 	else {
 		/* Per RFC5681 Section 3.1 */
 		if (tp->t_maxseg > 2190)
 			i_cwnd = 2 * tp->t_maxseg;
 		else if (tp->t_maxseg > 1095)
 			i_cwnd = 3 * tp->t_maxseg;
 		else
 			i_cwnd = 4 * tp->t_maxseg;
 	}
 	if (reduce_largest) {
 		/*
 		 * Do we reduce the largest cwnd to make 
 		 * rack play nice on restart hptsi wise?
 		 */
 		if (((struct tcp_rack *)tp->t_fb_ptr)->r_ctl.rc_rack_largest_cwnd  > i_cwnd)
 			((struct tcp_rack *)tp->t_fb_ptr)->r_ctl.rc_rack_largest_cwnd = i_cwnd;
 	}
 	/*
 	 * Being idle is no differnt than the initial window. If the cc
 	 * clamps it down below the initial window raise it to the initial
 	 * window.
 	 */
 	if (tp->snd_cwnd < i_cwnd) {
 		tp->snd_cwnd = i_cwnd;
 	}
 }
 
 
 /*
  * Indicate whether this ack should be delayed.  We can delay the ack if
  * following conditions are met:
  *	- There is no delayed ack timer in progress.
  *	- Our last ack wasn't a 0-sized window. We never want to delay
  *	  the ack that opens up a 0-sized window.
  *	- LRO wasn't used for this segment. We make sure by checking that the
  *	  segment size is not larger than the MSS.
  *	- Delayed acks are enabled or this is a half-synchronized T/TCP
  *	  connection.
  */
 #define DELAY_ACK(tp, tlen)			 \
 	(((tp->t_flags & TF_RXWIN0SENT) == 0) && \
 	((tp->t_flags & TF_DELACK) == 0) && 	 \
 	(tlen <= tp->t_maxseg) &&		 \
 	(tp->t_delayed_ack || (tp->t_flags & TF_NEEDSYN)))
 
 static inline void
 rack_calc_rwin(struct socket *so, struct tcpcb *tp)
 {
 	int32_t win;
 
 	/*
 	 * Calculate amount of space in receive window, and then do TCP
 	 * input processing. Receive window is amount of space in rcv queue,
 	 * but not less than advertised window.
 	 */
 	win = sbspace(&so->so_rcv);
 	if (win < 0)
 		win = 0;
 	tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
 }
 
 static void
 rack_do_drop(struct mbuf *m, struct tcpcb *tp)
 {
 	/*
 	 * Drop space held by incoming segment and return.
 	 */
 	if (tp != NULL)
 		INP_WUNLOCK(tp->t_inpcb);
 	if (m)
 		m_freem(m);
 }
 
 static void
 rack_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t rstreason, int32_t tlen)
 {
 	if (tp != NULL) {
 		tcp_dropwithreset(m, th, tp, tlen, rstreason);
 		INP_WUNLOCK(tp->t_inpcb);
 	} else
 		tcp_dropwithreset(m, th, NULL, tlen, rstreason);
 }
 
 /*
  * The value in ret_val informs the caller
  * if we dropped the tcb (and lock) or not.
  * 1 = we dropped it, 0 = the TCB is still locked
  * and valid.
  */
 static void
 rack_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t thflags, int32_t tlen, int32_t * ret_val)
 {
 	/*
 	 * Generate an ACK dropping incoming segment if it occupies sequence
 	 * space, where the ACK reflects our state.
 	 *
 	 * We can now skip the test for the RST flag since all paths to this
 	 * code happen after packets containing RST have been dropped.
 	 *
 	 * In the SYN-RECEIVED state, don't send an ACK unless the segment
 	 * we received passes the SYN-RECEIVED ACK test. If it fails send a
 	 * RST.  This breaks the loop in the "LAND" DoS attack, and also
 	 * prevents an ACK storm between two listening ports that have been
 	 * sent forged SYN segments, each with the source address of the
 	 * other.
 	 */
 	struct tcp_rack *rack;
 
 	if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) &&
 	    (SEQ_GT(tp->snd_una, th->th_ack) ||
 	    SEQ_GT(th->th_ack, tp->snd_max))) {
 		*ret_val = 1;
 		rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
 		return;
 	} else
 		*ret_val = 0;
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	rack->r_wanted_output++;
 	tp->t_flags |= TF_ACKNOW;
 	if (m)
 		m_freem(m);
 }
 
 
 static int
 rack_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp)
 {
 	/*
 	 * RFC5961 Section 3.2
 	 *
 	 * - RST drops connection only if SEG.SEQ == RCV.NXT. - If RST is in
 	 * window, we send challenge ACK.
 	 *
 	 * Note: to take into account delayed ACKs, we should test against
 	 * last_ack_sent instead of rcv_nxt. Note 2: we handle special case
 	 * of closed window, not covered by the RFC.
 	 */
 	int dropped = 0;
 
 	if ((SEQ_GEQ(th->th_seq, (tp->last_ack_sent - 1)) &&
 	    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
 	    (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
 
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 		KASSERT(tp->t_state != TCPS_SYN_SENT,
 		    ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
 		    __func__, th, tp));
 
 		if (V_tcp_insecure_rst ||
 		    (tp->last_ack_sent == th->th_seq) ||
 		    (tp->rcv_nxt == th->th_seq) ||
 		    ((tp->last_ack_sent - 1) == th->th_seq)) {
 			TCPSTAT_INC(tcps_drops);
 			/* Drop the connection. */
 			switch (tp->t_state) {
 			case TCPS_SYN_RECEIVED:
 				so->so_error = ECONNREFUSED;
 				goto close;
 			case TCPS_ESTABLISHED:
 			case TCPS_FIN_WAIT_1:
 			case TCPS_FIN_WAIT_2:
 			case TCPS_CLOSE_WAIT:
 			case TCPS_CLOSING:
 			case TCPS_LAST_ACK:
 				so->so_error = ECONNRESET;
 		close:
 				tcp_state_change(tp, TCPS_CLOSED);
 				/* FALLTHROUGH */
 			default:
 				tp = tcp_close(tp);
 			}
 			dropped = 1;
 			rack_do_drop(m, tp);
 		} else {
 			TCPSTAT_INC(tcps_badrst);
 			/* Send challenge ACK. */
 			tcp_respond(tp, mtod(m, void *), th, m,
 			    tp->rcv_nxt, tp->snd_nxt, TH_ACK);
 			tp->last_ack_sent = tp->rcv_nxt;
 		}
 	} else {
 		m_freem(m);
 	}
 	return (dropped);
 }
 
 /*
  * The value in ret_val informs the caller
  * if we dropped the tcb (and lock) or not.
  * 1 = we dropped it, 0 = the TCB is still locked
  * and valid.
  */
 static void
 rack_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * ret_val)
 {
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 	TCPSTAT_INC(tcps_badsyn);
 	if (V_tcp_insecure_syn &&
 	    SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
 	    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
 		tp = tcp_drop(tp, ECONNRESET);
 		*ret_val = 1;
 		rack_do_drop(m, tp);
 	} else {
 		/* Send challenge ACK. */
 		tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt,
 		    tp->snd_nxt, TH_ACK);
 		tp->last_ack_sent = tp->rcv_nxt;
 		m = NULL;
 		*ret_val = 0;
 		rack_do_drop(m, NULL);
 	}
 }
 
 /*
  * rack_ts_check returns 1 for you should not proceed. It places
  * in ret_val what should be returned 1/0 by the caller. The 1 indicates
  * that the TCB is unlocked and probably dropped. The 0 indicates the
  * TCB is still valid and locked.
  */
 static int
 rack_ts_check(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t tlen, int32_t thflags, int32_t * ret_val)
 {
 
 	/* Check to see if ts_recent is over 24 days old.  */
 	if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) {
 		/*
 		 * Invalidate ts_recent.  If this segment updates ts_recent,
 		 * the age will be reset later and ts_recent will get a
 		 * valid value.  If it does not, setting ts_recent to zero
 		 * will at least satisfy the requirement that zero be placed
 		 * in the timestamp echo reply when ts_recent isn't valid.
 		 * The age isn't reset until we get a valid ts_recent
 		 * because we don't want out-of-order segments to be dropped
 		 * when ts_recent is old.
 		 */
 		tp->ts_recent = 0;
 	} else {
 		TCPSTAT_INC(tcps_rcvduppack);
 		TCPSTAT_ADD(tcps_rcvdupbyte, tlen);
 		TCPSTAT_INC(tcps_pawsdrop);
 		*ret_val = 0;
 		if (tlen) {
 			rack_do_dropafterack(m, tp, th, thflags, tlen, ret_val);
 		} else {
 			rack_do_drop(m, NULL);
 		}
 		return (1);
 	}
 	return (0);
 }
 
 /*
  * rack_drop_checks returns 1 for you should not proceed. It places
  * in ret_val what should be returned 1/0 by the caller. The 1 indicates
  * that the TCB is unlocked and probably dropped. The 0 indicates the
  * TCB is still valid and locked.
  */
 static int
 rack_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * tlenp, int32_t * thf, int32_t * drop_hdrlen, int32_t * ret_val)
 {
 	int32_t todrop;
 	int32_t thflags;
 	int32_t tlen;
 
 	thflags = *thf;
 	tlen = *tlenp;
 	todrop = tp->rcv_nxt - th->th_seq;
 	if (todrop > 0) {
 		if (thflags & TH_SYN) {
 			thflags &= ~TH_SYN;
 			th->th_seq++;
 			if (th->th_urp > 1)
 				th->th_urp--;
 			else
 				thflags &= ~TH_URG;
 			todrop--;
 		}
 		/*
 		 * Following if statement from Stevens, vol. 2, p. 960.
 		 */
 		if (todrop > tlen
 		    || (todrop == tlen && (thflags & TH_FIN) == 0)) {
 			/*
 			 * Any valid FIN must be to the left of the window.
 			 * At this point the FIN must be a duplicate or out
 			 * of sequence; drop it.
 			 */
 			thflags &= ~TH_FIN;
 			/*
 			 * Send an ACK to resynchronize and drop any data.
 			 * But keep on processing for RST or ACK.
 			 */
 			tp->t_flags |= TF_ACKNOW;
 			todrop = tlen;
 			TCPSTAT_INC(tcps_rcvduppack);
 			TCPSTAT_ADD(tcps_rcvdupbyte, todrop);
 		} else {
 			TCPSTAT_INC(tcps_rcvpartduppack);
 			TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
 		}
 		if (tp->t_flags & TF_SACK_PERMIT) {
 			/*
 			 * record the left, to-be-dropped edge of data
 			 * here, for use as dsack block further down
 			 */
 			tcp_update_sack_list(tp, th->th_seq,
 			    th->th_seq + todrop);
 			/*
 			 * ACK now, as the next in-sequence segment
 			 * will clear the DSACK block again
 			 */
 			tp->t_flags |= TF_ACKNOW;
 		}
 		*drop_hdrlen += todrop;	/* drop from the top afterwards */
 		th->th_seq += todrop;
 		tlen -= todrop;
 		if (th->th_urp > todrop)
 			th->th_urp -= todrop;
 		else {
 			thflags &= ~TH_URG;
 			th->th_urp = 0;
 		}
 	}
 	/*
 	 * If segment ends after window, drop trailing data (and PUSH and
 	 * FIN); if nothing left, just ACK.
 	 */
 	todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd);
 	if (todrop > 0) {
 		TCPSTAT_INC(tcps_rcvpackafterwin);
 		if (todrop >= tlen) {
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, tlen);
 			/*
 			 * If window is closed can only take segments at
 			 * window edge, and have to drop data and PUSH from
 			 * incoming segments.  Continue processing, but
 			 * remember to ack.  Otherwise, drop segment and
 			 * ack.
 			 */
 			if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
 				tp->t_flags |= TF_ACKNOW;
 				TCPSTAT_INC(tcps_rcvwinprobe);
 			} else {
 				rack_do_dropafterack(m, tp, th, thflags, tlen, ret_val);
 				return (1);
 			}
 		} else
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
 		m_adj(m, -todrop);
 		tlen -= todrop;
 		thflags &= ~(TH_PUSH | TH_FIN);
 	}
 	*thf = thflags;
 	*tlenp = tlen;
 	return (0);
 }
 
 static struct rack_sendmap *
 rack_find_lowest_rsm(struct tcp_rack *rack)
 {
 	struct rack_sendmap *rsm;
 
 	/*
 	 * Walk the time-order transmitted list looking for an rsm that is
 	 * not acked. This will be the one that was sent the longest time
 	 * ago that is still outstanding.
 	 */
 	TAILQ_FOREACH(rsm, &rack->r_ctl.rc_tmap, r_tnext) {
 		if (rsm->r_flags & RACK_ACKED) {
 			continue;
 		}
 		goto finish;
 	}
 finish:
 	return (rsm);
 }
 
 static struct rack_sendmap *
 rack_find_high_nonack(struct tcp_rack *rack, struct rack_sendmap *rsm)
 {
 	struct rack_sendmap *prsm;
 
 	/*
 	 * Walk the sequence order list backward until we hit and arrive at
 	 * the highest seq not acked. In theory when this is called it
 	 * should be the last segment (which it was not).
 	 */
 	counter_u64_add(rack_find_high, 1);
 	prsm = rsm;
 	TAILQ_FOREACH_REVERSE_FROM(prsm, &rack->r_ctl.rc_map, rack_head, r_next) {
 		if (prsm->r_flags & (RACK_ACKED | RACK_HAS_FIN)) {
 			continue;
 		}
 		return (prsm);
 	}
 	return (NULL);
 }
 
 
 static uint32_t
 rack_calc_thresh_rack(struct tcp_rack *rack, uint32_t srtt, uint32_t cts)
 {
 	int32_t lro;
 	uint32_t thresh;
 
 	/*
 	 * lro is the flag we use to determine if we have seen reordering.
 	 * If it gets set we have seen reordering. The reorder logic either
 	 * works in one of two ways:
 	 *
 	 * If reorder-fade is configured, then we track the last time we saw
 	 * re-ordering occur. If we reach the point where enough time as
 	 * passed we no longer consider reordering has occuring.
 	 *
 	 * Or if reorder-face is 0, then once we see reordering we consider
 	 * the connection to alway be subject to reordering and just set lro
 	 * to 1.
 	 *
 	 * In the end if lro is non-zero we add the extra time for
 	 * reordering in.
 	 */
 	if (srtt == 0)
 		srtt = 1;
 	if (rack->r_ctl.rc_reorder_ts) {
 		if (rack->r_ctl.rc_reorder_fade) {
 			if (SEQ_GEQ(cts, rack->r_ctl.rc_reorder_ts)) {
 				lro = cts - rack->r_ctl.rc_reorder_ts;
 				if (lro == 0) {
 					/*
 					 * No time as passed since the last
 					 * reorder, mark it as reordering.
 					 */
 					lro = 1;
 				}
 			} else {
 				/* Negative time? */
 				lro = 0;
 			}
 			if (lro > rack->r_ctl.rc_reorder_fade) {
 				/* Turn off reordering seen too */
 				rack->r_ctl.rc_reorder_ts = 0;
 				lro = 0;
 			}
 		} else {
 			/* Reodering does not fade */
 			lro = 1;
 		}
 	} else {
 		lro = 0;
 	}
 	thresh = srtt + rack->r_ctl.rc_pkt_delay;
 	if (lro) {
 		/* It must be set, if not you get 1/4 rtt */
 		if (rack->r_ctl.rc_reorder_shift)
 			thresh += (srtt >> rack->r_ctl.rc_reorder_shift);
 		else
 			thresh += (srtt >> 2);
 	} else {
 		thresh += 1;
 	}
 	/* We don't let the rack timeout be above a RTO */
 	
 	if (thresh > TICKS_2_MSEC(rack->rc_tp->t_rxtcur)) {
 		thresh = TICKS_2_MSEC(rack->rc_tp->t_rxtcur);
 	}
 	/* And we don't want it above the RTO max either */
 	if (thresh > rack_rto_max) {
 		thresh = rack_rto_max;
 	}
 	return (thresh);
 }
 
 static uint32_t
 rack_calc_thresh_tlp(struct tcpcb *tp, struct tcp_rack *rack,
 		     struct rack_sendmap *rsm, uint32_t srtt)
 {
 	struct rack_sendmap *prsm;
 	uint32_t thresh, len;
 	int maxseg;
 	
 	if (srtt == 0)
 		srtt = 1;
 	if (rack->r_ctl.rc_tlp_threshold)
 		thresh = srtt + (srtt / rack->r_ctl.rc_tlp_threshold);
 	else
 		thresh = (srtt * 2);
 	
 	/* Get the previous sent packet, if any  */
 	maxseg = tcp_maxseg(tp);
 	counter_u64_add(rack_enter_tlp_calc, 1);
 	len = rsm->r_end - rsm->r_start;
 	if (rack->rack_tlp_threshold_use == TLP_USE_ID) {
 		/* Exactly like the ID */
 		if (((tp->snd_max - tp->snd_una) - rack->r_ctl.rc_sacked + rack->r_ctl.rc_holes_rxt) <= maxseg) {
 			uint32_t alt_thresh;
 			/*
 			 * Compensate for delayed-ack with the d-ack time.
 			 */
 			counter_u64_add(rack_used_tlpmethod, 1);
 			alt_thresh = srtt + (srtt / 2) + rack_delayed_ack_time;
 			if (alt_thresh > thresh)
 				thresh = alt_thresh;
 		}
 	} else if (rack->rack_tlp_threshold_use == TLP_USE_TWO_ONE) {
 		/* 2.1 behavior */
 		prsm = TAILQ_PREV(rsm, rack_head, r_tnext);
 		if (prsm && (len <= maxseg)) {
 			/*
 			 * Two packets outstanding, thresh should be (2*srtt) +
 			 * possible inter-packet delay (if any).
 			 */
 			uint32_t inter_gap = 0;
 			int idx, nidx;
 			
 			counter_u64_add(rack_used_tlpmethod, 1);
 			idx = rsm->r_rtr_cnt - 1;
 			nidx = prsm->r_rtr_cnt - 1;
 			if (TSTMP_GEQ(rsm->r_tim_lastsent[nidx], prsm->r_tim_lastsent[idx])) {
 				/* Yes it was sent later (or at the same time) */
 				inter_gap = rsm->r_tim_lastsent[idx] - prsm->r_tim_lastsent[nidx];
 			}
 			thresh += inter_gap;
 		} else 	if (len <= maxseg) {
 			/*
 			 * Possibly compensate for delayed-ack.
 			 */
 			uint32_t alt_thresh;
 			
 			counter_u64_add(rack_used_tlpmethod2, 1);
 			alt_thresh = srtt + (srtt / 2) + rack_delayed_ack_time;
 			if (alt_thresh > thresh)
 				thresh = alt_thresh;
 		}
 	} else if (rack->rack_tlp_threshold_use == TLP_USE_TWO_TWO) {
 		/* 2.2 behavior */
 		if (len <= maxseg) {
 			uint32_t alt_thresh;
 			/*
 			 * Compensate for delayed-ack with the d-ack time.
 			 */
 			counter_u64_add(rack_used_tlpmethod, 1);
 			alt_thresh = srtt + (srtt / 2) + rack_delayed_ack_time;
 			if (alt_thresh > thresh)
 				thresh = alt_thresh;
 		}
 	}
  	/* Not above an RTO */
 	if (thresh > TICKS_2_MSEC(tp->t_rxtcur)) {
 		thresh = TICKS_2_MSEC(tp->t_rxtcur);
 	}
 	/* Not above a RTO max */
 	if (thresh > rack_rto_max) {
 		thresh = rack_rto_max;
 	}
 	/* Apply user supplied min TLP */
 	if (thresh < rack_tlp_min) {
 		thresh = rack_tlp_min;
 	}
 	return (thresh);
 }
 
 static struct rack_sendmap *
 rack_check_recovery_mode(struct tcpcb *tp, uint32_t tsused)
 {
 	/*
 	 * Check to see that we don't need to fall into recovery. We will
 	 * need to do so if our oldest transmit is past the time we should
 	 * have had an ack.
 	 */
 	struct tcp_rack *rack;
 	struct rack_sendmap *rsm;
 	int32_t idx;
 	uint32_t srtt_cur, srtt, thresh;
 
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	if (TAILQ_EMPTY(&rack->r_ctl.rc_map)) {
 		return (NULL);
 	}
 	srtt_cur = tp->t_srtt >> TCP_RTT_SHIFT;
 	srtt = TICKS_2_MSEC(srtt_cur);
 	if (rack->rc_rack_rtt && (srtt > rack->rc_rack_rtt))
 		srtt = rack->rc_rack_rtt;
 
 	rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
 	if (rsm == NULL)
 		return (NULL);
 
 	if (rsm->r_flags & RACK_ACKED) {
 		rsm = rack_find_lowest_rsm(rack);
 		if (rsm == NULL)
 			return (NULL);
 	}
 	idx = rsm->r_rtr_cnt - 1;
 	thresh = rack_calc_thresh_rack(rack, srtt, tsused);
 	if (tsused < rsm->r_tim_lastsent[idx]) {
 		return (NULL);
 	}
 	if ((tsused - rsm->r_tim_lastsent[idx]) < thresh) {
 		return (NULL);
 	}
 	/* Ok if we reach here we are over-due */
 	rack->r_ctl.rc_rsm_start = rsm->r_start;
 	rack->r_ctl.rc_cwnd_at = tp->snd_cwnd;
 	rack->r_ctl.rc_ssthresh_at = tp->snd_ssthresh;
 	rack_cong_signal(tp, NULL, CC_NDUPACK);
 	return (rsm);
 }
 
 static uint32_t
 rack_get_persists_timer_val(struct tcpcb *tp, struct tcp_rack *rack)
 {
 	int32_t t;
 	int32_t tt;
 	uint32_t ret_val;
 
 	t = TICKS_2_MSEC((tp->t_srtt >> TCP_RTT_SHIFT) + ((tp->t_rttvar * 4) >> TCP_RTT_SHIFT));
 	TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
 	    tcp_persmin, tcp_persmax);
 	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
 		tp->t_rxtshift++;
 	rack->r_ctl.rc_hpts_flags |= PACE_TMR_PERSIT;
 	ret_val = (uint32_t)tt;
 	return (ret_val);
 }
 
 static uint32_t
 rack_timer_start(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 {
 	/*
 	 * Start the FR timer, we do this based on getting the first one in
 	 * the rc_tmap. Note that if its NULL we must stop the timer. in all
 	 * events we need to stop the running timer (if its running) before
 	 * starting the new one.
 	 */
 	uint32_t thresh, exp, to, srtt, time_since_sent;
 	uint32_t srtt_cur;
 	int32_t idx;
 	int32_t is_tlp_timer = 0;
 	struct rack_sendmap *rsm;
 	
 	if (rack->t_timers_stopped) {
 		/* All timers have been stopped none are to run */
 		return (0);
 	}
 	if (rack->rc_in_persist) {
 		/* We can't start any timer in persists */
 		return (rack_get_persists_timer_val(tp, rack));
 	}
 	rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
 	if (rsm == NULL) {
 		/* Nothing on the send map */
 activate_rxt:
 		if (SEQ_LT(tp->snd_una, tp->snd_max) || sbavail(&(tp->t_inpcb->inp_socket->so_snd))) {
 			rack->r_ctl.rc_hpts_flags |= PACE_TMR_RXT;
 			to = TICKS_2_MSEC(tp->t_rxtcur);
 			if (to == 0)
 				to = 1;
 			return (to);
 		}
 		return (0);
 	}
 	if (rsm->r_flags & RACK_ACKED) {
 		rsm = rack_find_lowest_rsm(rack);
 		if (rsm == NULL) {
 			/* No lowest? */
 			goto activate_rxt;
 		}
 	}
 	/* Convert from ms to usecs */
 	if (rsm->r_flags & RACK_SACK_PASSED) {
 		if ((tp->t_flags & TF_SENTFIN) &&
 		    ((tp->snd_max - tp->snd_una) == 1) &&
 		    (rsm->r_flags & RACK_HAS_FIN)) {
 			/*
 			 * We don't start a rack timer if all we have is a
 			 * FIN outstanding.
 			 */
 			goto activate_rxt;
 		}
 		if (tp->t_srtt) {
 			srtt_cur = (tp->t_srtt >> TCP_RTT_SHIFT);
 			srtt = TICKS_2_MSEC(srtt_cur);
 		} else
 			srtt = RACK_INITIAL_RTO;
 
 		thresh = rack_calc_thresh_rack(rack, srtt, cts);
 		idx = rsm->r_rtr_cnt - 1;
 		exp = rsm->r_tim_lastsent[idx] + thresh;
 		if (SEQ_GEQ(exp, cts)) {
 			to = exp - cts;
 			if (to < rack->r_ctl.rc_min_to) {
 				to = rack->r_ctl.rc_min_to;
 			}
 		} else {
 			to = rack->r_ctl.rc_min_to;
 		}
 	} else {
 		/* Ok we need to do a TLP not RACK */
 		if ((rack->rc_tlp_in_progress != 0) ||
 		    (rack->r_ctl.rc_tlp_rtx_out != 0)) {
 			/*
 			 * The previous send was a TLP or a tlp_rtx is in
 			 * process.
 			 */
 			goto activate_rxt;
 		}
 		if ((tp->snd_max - tp->snd_una) > tp->snd_wnd) {
 			/*
 			 * Peer collapsed rwnd, don't do TLP.
 			 */
 			goto activate_rxt;
 		}
 		rsm = TAILQ_LAST_FAST(&rack->r_ctl.rc_tmap, rack_sendmap, r_tnext);
 		if (rsm == NULL) {
 			/* We found no rsm to TLP with. */
 			goto activate_rxt;
 		}
 		if (rsm->r_flags & RACK_HAS_FIN) {
 			/* If its a FIN we dont do TLP */
 			rsm = NULL;
 			goto activate_rxt;
 		}
 		idx = rsm->r_rtr_cnt - 1;
 		if (TSTMP_GT(cts,  rsm->r_tim_lastsent[idx])) 
 			time_since_sent = cts - rsm->r_tim_lastsent[idx];
 		else
 			time_since_sent = 0;
 		is_tlp_timer = 1;
 		if (tp->t_srtt) {
 			srtt_cur = (tp->t_srtt >> TCP_RTT_SHIFT);
 			srtt = TICKS_2_MSEC(srtt_cur);
 		} else
 			srtt = RACK_INITIAL_RTO;
 		thresh = rack_calc_thresh_tlp(tp, rack, rsm, srtt);
 		if (thresh > time_since_sent)
 			to = thresh - time_since_sent;
 		else
 			to = rack->r_ctl.rc_min_to;
 		if (to > TCPTV_REXMTMAX) {
 			/*
 			 * If the TLP time works out to larger than the max
 			 * RTO lets not do TLP.. just RTO.
 			 */
 			goto activate_rxt;
 		}
 		if (rsm->r_start != rack->r_ctl.rc_last_tlp_seq) {
 			/*
 			 * The tail is no longer the last one I did a probe
 			 * on
 			 */
 			rack->r_ctl.rc_tlp_seg_send_cnt = 0;
 			rack->r_ctl.rc_last_tlp_seq = rsm->r_start;
 		}
 	}
 	if (is_tlp_timer == 0) {
 		rack->r_ctl.rc_hpts_flags |= PACE_TMR_RACK;
 	} else {
 		if ((rack->r_ctl.rc_tlp_send_cnt > rack_tlp_max_resend) ||
 		    (rack->r_ctl.rc_tlp_seg_send_cnt > rack_tlp_max_resend)) {
 			/*
 			 * We have exceeded how many times we can retran the
 			 * current TLP timer, switch to the RTO timer.
 			 */
 			goto activate_rxt;
 		} else {
 			rack->r_ctl.rc_hpts_flags |= PACE_TMR_TLP;
 		}
 	}
 	if (to == 0)
 		to = 1;
 	return (to);
 }
 
 static void
 rack_enter_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 {
 	if (rack->rc_in_persist == 0) {
 		if (((tp->t_flags & TF_SENTFIN) == 0) &&
 		    (tp->snd_max - tp->snd_una) >= sbavail(&rack->rc_inp->inp_socket->so_snd))
 			/* Must need to send more data to enter persist */
 			return;
 		rack->r_ctl.rc_went_idle_time = cts;
 		rack_timer_cancel(tp, rack, cts, __LINE__);
 		tp->t_rxtshift = 0;
 		rack->rc_in_persist = 1;
 	}
 }
 
 static void
 rack_exit_persist(struct tcpcb *tp, struct tcp_rack *rack)
 {
 	if (rack->rc_inp->inp_in_hpts)  {
 		tcp_hpts_remove(rack->rc_inp, HPTS_REMOVE_OUTPUT);
 		rack->r_ctl.rc_hpts_flags  = 0;
 	}
 	rack->rc_in_persist = 0;
 	rack->r_ctl.rc_went_idle_time = 0;
 	tp->t_flags &= ~TF_FORCEDATA;
 	tp->t_rxtshift = 0;
 }
 
 static void
 rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts, int32_t line,
     int32_t slot, uint32_t tot_len_this_send, int32_t frm_out_sbavail)
 {
 	struct inpcb *inp;
 	uint32_t delayed_ack = 0;
 	uint32_t hpts_timeout;
 	uint8_t stopped;
 	uint32_t left = 0;
 
 	inp = tp->t_inpcb;
 	if (inp->inp_in_hpts) {
 		/* A previous call is already set up */
 		return;
 	}
 
 	if ((tp->t_state == TCPS_CLOSED) ||
 	    (tp->t_state == TCPS_LISTEN)) {
 		return;
 	}
 	stopped = rack->rc_tmr_stopped;
 	if (stopped && TSTMP_GT(rack->r_ctl.rc_timer_exp, cts)) {
 		left = rack->r_ctl.rc_timer_exp - cts;
 	}
 	rack->r_ctl.rc_timer_exp = 0;
 	if (rack->rc_inp->inp_in_hpts == 0) {
 		rack->r_ctl.rc_hpts_flags = 0;
 	} 
 	if (slot) {
 		/* We are hptsi too */
 		rack->r_ctl.rc_hpts_flags |= PACE_PKT_OUTPUT;
 	} else if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) {
 		/* 
 		 * We are still left on the hpts when the to goes
 		 * it will be for output.
 		 */
 		if (TSTMP_GT(rack->r_ctl.rc_last_output_to, cts))
 			slot = rack->r_ctl.rc_last_output_to - cts;
 		else
 			slot = 1;
 	}
 	if ((tp->snd_wnd == 0) && TCPS_HAVEESTABLISHED(tp->t_state)) {
 		/* No send window.. we must enter persist */
 		rack_enter_persist(tp, rack, cts);
 	} else if ((frm_out_sbavail &&
 		    (frm_out_sbavail > (tp->snd_max - tp->snd_una)) &&
 		    (tp->snd_wnd < tp->t_maxseg)) &&
 	    TCPS_HAVEESTABLISHED(tp->t_state)) {
 		/*
 		 * If we have no window or we can't send a segment (and have
 		 * data to send.. we cheat here and frm_out_sbavail is
 		 * passed in with the sbavail(sb) only from bbr_output) and
 		 * we are established, then we must enter persits (if not
 		 * already in persits).
 		 */
 		rack_enter_persist(tp, rack, cts);
 	}
 	hpts_timeout = rack_timer_start(tp, rack, cts);
 	if (tp->t_flags & TF_DELACK) {
 		delayed_ack = TICKS_2_MSEC(tcp_delacktime);
 		rack->r_ctl.rc_hpts_flags |= PACE_TMR_DELACK;
 	}
 	if (delayed_ack && ((hpts_timeout == 0) ||
 			    (delayed_ack < hpts_timeout)))
 		hpts_timeout = delayed_ack;
 	else 
 		rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_DELACK;
 	/*
 	 * If no timers are going to run and we will fall off the hptsi
 	 * wheel, we resort to a keep-alive timer if its configured.
 	 */
 	if ((hpts_timeout == 0) &&
 	    (slot == 0)) {
 		if ((tcp_always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
 		    (tp->t_state <= TCPS_CLOSING)) {
 			/*
 			 * Ok we have no timer (persists, rack, tlp, rxt  or
 			 * del-ack), we don't have segments being paced. So
 			 * all that is left is the keepalive timer.
 			 */
 			if (TCPS_HAVEESTABLISHED(tp->t_state)) {
 				/* Get the established keep-alive time */
 				hpts_timeout = TP_KEEPIDLE(tp);
 			} else {
 				/* Get the initial setup keep-alive time */
 				hpts_timeout = TP_KEEPINIT(tp);
 			}
 			rack->r_ctl.rc_hpts_flags |= PACE_TMR_KEEP;
 		}
 	}
 	if (left && (stopped & (PACE_TMR_KEEP | PACE_TMR_DELACK)) ==
 	    (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK)) {
 		/*
 		 * RACK, TLP, persists and RXT timers all are restartable
 		 * based on actions input .. i.e we received a packet (ack
 		 * or sack) and that changes things (rw, or snd_una etc).
 		 * Thus we can restart them with a new value. For
 		 * keep-alive, delayed_ack we keep track of what was left
 		 * and restart the timer with a smaller value.
 		 */
 		if (left < hpts_timeout)
 			hpts_timeout = left;
 	}
 	if (hpts_timeout) {
 		/*
 		 * Hack alert for now we can't time-out over 2,147,483
 		 * seconds (a bit more than 596 hours), which is probably ok
 		 * :).
 		 */
 		if (hpts_timeout > 0x7ffffffe)
 			hpts_timeout = 0x7ffffffe;
 		rack->r_ctl.rc_timer_exp = cts + hpts_timeout;
 	}
 	if (slot) {
 		rack->r_ctl.rc_last_output_to = cts + slot;
 		if ((hpts_timeout == 0) || (hpts_timeout > slot)) {
 			if (rack->rc_inp->inp_in_hpts == 0)
 				tcp_hpts_insert(tp->t_inpcb, HPTS_MS_TO_SLOTS(slot));
 			rack_log_to_start(rack, cts, hpts_timeout, slot, 1);
 		} else {
 			/*
 			 * Arrange for the hpts to kick back in after the
 			 * t-o if the t-o does not cause a send.
 			 */
 			if (rack->rc_inp->inp_in_hpts == 0)
 				tcp_hpts_insert(tp->t_inpcb, HPTS_MS_TO_SLOTS(hpts_timeout));
 			rack_log_to_start(rack, cts, hpts_timeout, slot, 0);
 		}
 	} else if (hpts_timeout) {
 		if (rack->rc_inp->inp_in_hpts == 0)
 			tcp_hpts_insert(tp->t_inpcb, HPTS_MS_TO_SLOTS(hpts_timeout));
 		rack_log_to_start(rack, cts, hpts_timeout, slot, 0);
 	} else {
 		/* No timer starting */
 #ifdef INVARIANTS
 		if (SEQ_GT(tp->snd_max, tp->snd_una)) {
 			panic("tp:%p rack:%p tlts:%d cts:%u slot:%u pto:%u -- no timer started?",
 			    tp, rack, tot_len_this_send, cts, slot, hpts_timeout);
 		}
 #endif
 	}
 	rack->rc_tmr_stopped = 0;
 	if (slot)
 		rack_log_type_bbrsnd(rack, tot_len_this_send, slot, cts);
 }
 
 /*
  * RACK Timer, here we simply do logging and house keeping.
  * the normal rack_output() function will call the
  * appropriate thing to check if we need to do a RACK retransmit.
  * We return 1, saying don't proceed with rack_output only
  * when all timers have been stopped (destroyed PCB?).
  */
 static int
 rack_timeout_rack(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 {
 	/*
 	 * This timer simply provides an internal trigger to send out data.
 	 * The check_recovery_mode call will see if there are needed
 	 * retransmissions, if so we will enter fast-recovery. The output
 	 * call may or may not do the same thing depending on sysctl
 	 * settings.
 	 */
 	struct rack_sendmap *rsm;
 	int32_t recovery;
 
 	if (tp->t_timers->tt_flags & TT_STOPPED) {
 		return (1);
 	}
 	if (TSTMP_LT(cts, rack->r_ctl.rc_timer_exp)) {
 		/* Its not time yet */
 		return (0);
 	}
 	rack_log_to_event(rack, RACK_TO_FRM_RACK);
 	recovery = IN_RECOVERY(tp->t_flags);
 	counter_u64_add(rack_to_tot, 1);
 	if (rack->r_state && (rack->r_state != tp->t_state))
 		rack_set_state(tp, rack);
 	rsm = rack_check_recovery_mode(tp, cts);
 	if (rsm) {
 		uint32_t rtt;
 
 		rtt = rack->rc_rack_rtt;
 		if (rtt == 0)
 			rtt = 1;
 		if ((recovery == 0) &&
 		    (rack->r_ctl.rc_prr_sndcnt < tp->t_maxseg)) {
 			/*
 			 * The rack-timeout that enter's us into recovery
 			 * will force out one MSS and set us up so that we
 			 * can do one more send in 2*rtt (transitioning the
 			 * rack timeout into a rack-tlp).
 			 */
 			rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg;
 		} else if ((rack->r_ctl.rc_prr_sndcnt < tp->t_maxseg) &&
 		    ((rsm->r_end - rsm->r_start) > rack->r_ctl.rc_prr_sndcnt)) {
 			/*
 			 * When a rack timer goes, we have to send at 
 			 * least one segment. They will be paced a min of 1ms
 			 * apart via the next rack timer (or further
 			 * if the rack timer dictates it).
 			 */
 			rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg;
 		}
 	} else {
 		/* This is a case that should happen rarely if ever */
 		counter_u64_add(rack_tlp_does_nada, 1);
 #ifdef TCP_BLACKBOX
 		tcp_log_dump_tp_logbuf(tp, "nada counter trips", M_NOWAIT, true);
 #endif
 		rack->r_ctl.rc_resend = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
 	}
 	rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_RACK;
 	return (0);
 }
 
 static struct rack_sendmap *
 rack_merge_rsm(struct tcp_rack *rack,
 	       struct rack_sendmap *l_rsm,
 	       struct rack_sendmap *r_rsm)
 {
 	/* 
 	 * We are merging two ack'd RSM's,
 	 * the l_rsm is on the left (lower seq
 	 * values) and the r_rsm is on the right
 	 * (higher seq value). The simplest way
 	 * to merge these is to move the right
 	 * one into the left. I don't think there
 	 * is any reason we need to try to find
 	 * the oldest (or last oldest retransmitted).
 	 */
 	l_rsm->r_end = r_rsm->r_end;
 	if (r_rsm->r_rtr_bytes)
 		l_rsm->r_rtr_bytes += r_rsm->r_rtr_bytes;
 	if (r_rsm->r_in_tmap) {
 		/* This really should not happen */
 		TAILQ_REMOVE(&rack->r_ctl.rc_tmap, r_rsm, r_tnext);
 	}
 	/* Now the flags */
 	if (r_rsm->r_flags & RACK_HAS_FIN)
 		l_rsm->r_flags |= RACK_HAS_FIN;
 	if (r_rsm->r_flags & RACK_TLP)
 		l_rsm->r_flags |= RACK_TLP;
 	TAILQ_REMOVE(&rack->r_ctl.rc_map, r_rsm, r_next);	
 	if ((r_rsm->r_limit_type == 0) && (l_rsm->r_limit_type != 0)) {
 		/* Transfer the split limit to the map we free */
 		r_rsm->r_limit_type = l_rsm->r_limit_type;
 		l_rsm->r_limit_type = 0;
 	}
 	rack_free(rack, r_rsm);
 	return(l_rsm);
 }
 
 /*
  * TLP Timer, here we simply setup what segment we want to
  * have the TLP expire on, the normal rack_output() will then
  * send it out.
  *
  * We return 1, saying don't proceed with rack_output only
  * when all timers have been stopped (destroyed PCB?).
  */
 static int
 rack_timeout_tlp(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 {
 	/*
 	 * Tail Loss Probe.
 	 */
 	struct rack_sendmap *rsm = NULL;
 	struct socket *so;
 	uint32_t amm, old_prr_snd = 0;
 	uint32_t out, avail;
 
 	if (tp->t_timers->tt_flags & TT_STOPPED) {
 		return (1);
 	}
 	if (TSTMP_LT(cts, rack->r_ctl.rc_timer_exp)) {
 		/* Its not time yet */
 		return (0);
 	}
 	if (rack_progress_timeout_check(tp)) {
 		tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT);
 		return (1);
 	}
 	/*
 	 * A TLP timer has expired. We have been idle for 2 rtts. So we now
 	 * need to figure out how to force a full MSS segment out.
 	 */
 	rack_log_to_event(rack, RACK_TO_FRM_TLP);
 	counter_u64_add(rack_tlp_tot, 1);
 	if (rack->r_state && (rack->r_state != tp->t_state))
 		rack_set_state(tp, rack);
 	so = tp->t_inpcb->inp_socket;
 	avail = sbavail(&so->so_snd);
 	out = tp->snd_max - tp->snd_una;
 	rack->rc_timer_up = 1;
 	/*
 	 * If we are in recovery we can jazz out a segment if new data is
 	 * present simply by setting rc_prr_sndcnt to a segment.
 	 */
 	if ((avail > out) &&
 	    ((rack_always_send_oldest == 0) || (TAILQ_EMPTY(&rack->r_ctl.rc_tmap)))) {
 		/* New data is available */
 		amm = avail - out;
 		if (amm > tp->t_maxseg) {
 			amm = tp->t_maxseg;
 		} else if ((amm < tp->t_maxseg) && ((tp->t_flags & TF_NODELAY) == 0)) {
 			/* not enough to fill a MTU and no-delay is off */
 			goto need_retran;
 		}
 		if (IN_RECOVERY(tp->t_flags)) {
 			/* Unlikely */
 			old_prr_snd = rack->r_ctl.rc_prr_sndcnt;
 			if (out + amm <= tp->snd_wnd)
 				rack->r_ctl.rc_prr_sndcnt = amm;
 			else
 				goto need_retran;
 		} else {
 			/* Set the send-new override */
 			if (out + amm <= tp->snd_wnd)
 				rack->r_ctl.rc_tlp_new_data = amm;
 			else
 				goto need_retran;
 		}
 		rack->r_ctl.rc_tlp_seg_send_cnt = 0;
 		rack->r_ctl.rc_last_tlp_seq = tp->snd_max;
 		rack->r_ctl.rc_tlpsend = NULL;
 		counter_u64_add(rack_tlp_newdata, 1);
 		goto send;
 	}
 need_retran:
 	/*
 	 * Ok we need to arrange the last un-acked segment to be re-sent, or
 	 * optionally the first un-acked segment.
 	 */
 	if (rack_always_send_oldest)
 		rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
 	else {
 		rsm = TAILQ_LAST_FAST(&rack->r_ctl.rc_map, rack_sendmap, r_next);
 		if (rsm && (rsm->r_flags & (RACK_ACKED | RACK_HAS_FIN))) {
 			rsm = rack_find_high_nonack(rack, rsm);
 		}
 	}
 	if (rsm == NULL) {
 		counter_u64_add(rack_tlp_does_nada, 1);
 #ifdef TCP_BLACKBOX
 		tcp_log_dump_tp_logbuf(tp, "nada counter trips", M_NOWAIT, true);
 #endif
 		goto out;
 	}
 	if ((rsm->r_end - rsm->r_start) > tp->t_maxseg) {
 		/*
 		 * We need to split this the last segment in two.
 		 */
 		int32_t idx;
 		struct rack_sendmap *nrsm;
 
 		nrsm = rack_alloc_full_limit(rack);
 		if (nrsm == NULL) {
 			/*
 			 * No memory to split, we will just exit and punt
 			 * off to the RXT timer.
 			 */
 			counter_u64_add(rack_tlp_does_nada, 1);
 			goto out;
 		}
 		nrsm->r_start = (rsm->r_end - tp->t_maxseg);
 		nrsm->r_end = rsm->r_end;
 		nrsm->r_rtr_cnt = rsm->r_rtr_cnt;
 		nrsm->r_flags = rsm->r_flags;
 		nrsm->r_sndcnt = rsm->r_sndcnt;
 		nrsm->r_rtr_bytes = 0;
 		rsm->r_end = nrsm->r_start;
 		for (idx = 0; idx < nrsm->r_rtr_cnt; idx++) {
 			nrsm->r_tim_lastsent[idx] = rsm->r_tim_lastsent[idx];
 		}
 		TAILQ_INSERT_AFTER(&rack->r_ctl.rc_map, rsm, nrsm, r_next);
 		if (rsm->r_in_tmap) {
 			TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext);
 			nrsm->r_in_tmap = 1;
 		}
 		rsm->r_flags &= (~RACK_HAS_FIN);
 		rsm = nrsm;
 	}
 	rack->r_ctl.rc_tlpsend = rsm;
 	rack->r_ctl.rc_tlp_rtx_out = 1;
 	if (rsm->r_start == rack->r_ctl.rc_last_tlp_seq) {
 		rack->r_ctl.rc_tlp_seg_send_cnt++;
 		tp->t_rxtshift++;
 	} else {
 		rack->r_ctl.rc_last_tlp_seq = rsm->r_start;
 		rack->r_ctl.rc_tlp_seg_send_cnt = 1;
 	}
 send:
 	rack->r_ctl.rc_tlp_send_cnt++;
 	if (rack->r_ctl.rc_tlp_send_cnt > rack_tlp_max_resend) {
 		/*
 		 * Can't [re]/transmit a segment we have not heard from the
 		 * peer in max times. We need the retransmit timer to take
 		 * over.
 		 */
 restore:
 		rack->r_ctl.rc_tlpsend = NULL;
 		if (rsm)
 			rsm->r_flags &= ~RACK_TLP;
 		rack->r_ctl.rc_prr_sndcnt = old_prr_snd;
 		counter_u64_add(rack_tlp_retran_fail, 1);
 		goto out;
 	} else if (rsm) {
 		rsm->r_flags |= RACK_TLP;
 	}
 	if (rsm && (rsm->r_start == rack->r_ctl.rc_last_tlp_seq) &&
 	    (rack->r_ctl.rc_tlp_seg_send_cnt > rack_tlp_max_resend)) {
 		/*
 		 * We don't want to send a single segment more than the max
 		 * either.
 		 */
 		goto restore;
 	}
 	rack->r_timer_override = 1;
 	rack->r_tlp_running = 1;
 	rack->rc_tlp_in_progress = 1;
 	rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_TLP;
 	return (0);
 out:
 	rack->rc_timer_up = 0;
 	rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_TLP;
 	return (0);
 }
 
 /*
  * Delayed ack Timer, here we simply need to setup the
  * ACK_NOW flag and remove the DELACK flag. From there
  * the output routine will send the ack out.
  *
  * We only return 1, saying don't proceed, if all timers
  * are stopped (destroyed PCB?).
  */
 static int
 rack_timeout_delack(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 {
 	if (tp->t_timers->tt_flags & TT_STOPPED) {
 		return (1);
 	}
 	rack_log_to_event(rack, RACK_TO_FRM_DELACK);
 	tp->t_flags &= ~TF_DELACK;
 	tp->t_flags |= TF_ACKNOW;
 	TCPSTAT_INC(tcps_delack);
 	rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_DELACK;
 	return (0);
 }
 
 /*
  * Persists timer, here we simply need to setup the
  * FORCE-DATA flag the output routine will send
  * the one byte send.
  *
  * We only return 1, saying don't proceed, if all timers
  * are stopped (destroyed PCB?).
  */
 static int
 rack_timeout_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 {
 	struct inpcb *inp;
 	int32_t retval = 0;
 
 	inp = tp->t_inpcb;
 
 	if (tp->t_timers->tt_flags & TT_STOPPED) {
 		return (1);
 	}
 	if (rack->rc_in_persist == 0)
 		return (0);
 	if (rack_progress_timeout_check(tp)) {
 		tcp_set_inp_to_drop(inp, ETIMEDOUT);
 		return (1);
 	}
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	/*
 	 * Persistence timer into zero window. Force a byte to be output, if
 	 * possible.
 	 */
 	TCPSTAT_INC(tcps_persisttimeo);
 	/*
 	 * Hack: if the peer is dead/unreachable, we do not time out if the
 	 * window is closed.  After a full backoff, drop the connection if
 	 * the idle time (no responses to probes) reaches the maximum
 	 * backoff that we would use if retransmitting.
 	 */
 	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
 	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
 	    ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
 		TCPSTAT_INC(tcps_persistdrop);
 		retval = 1;
 		tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT);
 		goto out;
 	}
 	if ((sbavail(&rack->rc_inp->inp_socket->so_snd) == 0) &&
 	    tp->snd_una == tp->snd_max)
 		rack_exit_persist(tp, rack);
 	rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_PERSIT;
 	/*
 	 * If the user has closed the socket then drop a persisting
 	 * connection after a much reduced timeout.
 	 */
 	if (tp->t_state > TCPS_CLOSE_WAIT &&
 	    (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
 		retval = 1;
 		TCPSTAT_INC(tcps_persistdrop);
 		tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT);
 		goto out;
 	}
 	tp->t_flags |= TF_FORCEDATA;
 out:
 	rack_log_to_event(rack, RACK_TO_FRM_PERSIST);
 	return (retval);
 }
 
 /*
  * If a keepalive goes off, we had no other timers
  * happening. We always return 1 here since this
  * routine either drops the connection or sends
  * out a segment with respond.
  */
 static int
 rack_timeout_keepalive(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 {
 	struct tcptemp *t_template;
 	struct inpcb *inp;
 
 	if (tp->t_timers->tt_flags & TT_STOPPED) {
 		return (1);
 	}
 	rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_KEEP;
 	inp = tp->t_inpcb;
 	rack_log_to_event(rack, RACK_TO_FRM_KEEP);
 	/*
 	 * Keep-alive timer went off; send something or drop connection if
 	 * idle for too long.
 	 */
 	TCPSTAT_INC(tcps_keeptimeo);
 	if (tp->t_state < TCPS_ESTABLISHED)
 		goto dropit;
 	if ((tcp_always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
 	    tp->t_state <= TCPS_CLOSING) {
 		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
 			goto dropit;
 		/*
 		 * Send a packet designed to force a response if the peer is
 		 * up and reachable: either an ACK if the connection is
 		 * still alive, or an RST if the peer has closed the
 		 * connection due to timeout or reboot. Using sequence
 		 * number tp->snd_una-1 causes the transmitted zero-length
 		 * segment to lie outside the receive window; by the
 		 * protocol spec, this requires the correspondent TCP to
 		 * respond.
 		 */
 		TCPSTAT_INC(tcps_keepprobe);
 		t_template = tcpip_maketemplate(inp);
 		if (t_template) {
 			tcp_respond(tp, t_template->tt_ipgen,
 			    &t_template->tt_t, (struct mbuf *)NULL,
 			    tp->rcv_nxt, tp->snd_una - 1, 0);
 			free(t_template, M_TEMP);
 		}
 	}
 	rack_start_hpts_timer(rack, tp, cts, __LINE__, 0, 0, 0);
 	return (1);
 dropit:
 	TCPSTAT_INC(tcps_keepdrops);
 	tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT);
 	return (1);
 }
 
 /*
  * Retransmit helper function, clear up all the ack
  * flags and take care of important book keeping.
  */
 static void
 rack_remxt_tmr(struct tcpcb *tp)
 {
 	/*
 	 * The retransmit timer went off, all sack'd blocks must be
 	 * un-acked.
 	 */
 	struct rack_sendmap *rsm, *trsm = NULL;
 	struct tcp_rack *rack;
 	int32_t cnt = 0;
 
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	rack_timer_cancel(tp, rack, tcp_ts_getticks(), __LINE__);
 	rack_log_to_event(rack, RACK_TO_FRM_TMR);
 	if (rack->r_state && (rack->r_state != tp->t_state))
 		rack_set_state(tp, rack);
 	/*
 	 * Ideally we would like to be able to
 	 * mark SACK-PASS on anything not acked here.
 	 * However, if we do that we would burst out
 	 * all that data 1ms apart. This would be unwise,
 	 * so for now we will just let the normal rxt timer
 	 * and tlp timer take care of it.
 	 */
 	TAILQ_FOREACH(rsm, &rack->r_ctl.rc_map, r_next) {
 		if (rsm->r_flags & RACK_ACKED) {
 			cnt++;
 			rsm->r_sndcnt = 0;
 			if (rsm->r_in_tmap == 0) {
 				/* We must re-add it back to the tlist */
 				if (trsm == NULL) {
 					TAILQ_INSERT_HEAD(&rack->r_ctl.rc_tmap, rsm, r_tnext);
 				} else {
 					TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, trsm, rsm, r_tnext);
 				}
 				rsm->r_in_tmap = 1;
 				trsm = rsm;
 			}
 		}
 		rsm->r_flags &= ~(RACK_ACKED | RACK_SACK_PASSED | RACK_WAS_SACKPASS);
 	}
 	/* Clear the count (we just un-acked them) */
 	rack->r_ctl.rc_sacked = 0;
 	/* Clear the tlp rtx mark */
 	rack->r_ctl.rc_tlp_rtx_out = 0;
 	rack->r_ctl.rc_tlp_seg_send_cnt = 0;
 	rack->r_ctl.rc_resend = TAILQ_FIRST(&rack->r_ctl.rc_map);
 	/* Setup so we send one segment */
 	if (rack->r_ctl.rc_prr_sndcnt < tp->t_maxseg)
 		rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg;
 	rack->r_timer_override = 1;
 }
 
 /*
  * Re-transmit timeout! If we drop the PCB we will return 1, otherwise
  * we will setup to retransmit the lowest seq number outstanding.
  */
 static int
 rack_timeout_rxt(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 {
 	int32_t rexmt;
 	struct inpcb *inp;
 	int32_t retval = 0;
 
 	inp = tp->t_inpcb;
 	if (tp->t_timers->tt_flags & TT_STOPPED) {
 		return (1);
 	}
 	if (rack_progress_timeout_check(tp)) {
 		tcp_set_inp_to_drop(inp, ETIMEDOUT);
 		return (1);
 	}
 	rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_RXT;
 	if (TCPS_HAVEESTABLISHED(tp->t_state) &&
 	    (tp->snd_una == tp->snd_max)) {
 		/* Nothing outstanding .. nothing to do */
 		return (0);
 	}
 	/*
 	 * Retransmission timer went off.  Message has not been acked within
 	 * retransmit interval.  Back off to a longer retransmit interval
 	 * and retransmit one segment.
 	 */
 	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
 		tp->t_rxtshift = TCP_MAXRXTSHIFT;
 		TCPSTAT_INC(tcps_timeoutdrop);
 		retval = 1;
 		tcp_set_inp_to_drop(rack->rc_inp,
 		    (tp->t_softerror ? (uint16_t) tp->t_softerror : ETIMEDOUT));
 		goto out;
 	}
 	rack_remxt_tmr(tp);
 	if (tp->t_state == TCPS_SYN_SENT) {
 		/*
 		 * If the SYN was retransmitted, indicate CWND to be limited
 		 * to 1 segment in cc_conn_init().
 		 */
 		tp->snd_cwnd = 1;
 	} else if (tp->t_rxtshift == 1) {
 		/*
 		 * first retransmit; record ssthresh and cwnd so they can be
 		 * recovered if this turns out to be a "bad" retransmit. A
 		 * retransmit is considered "bad" if an ACK for this segment
 		 * is received within RTT/2 interval; the assumption here is
 		 * that the ACK was already in flight.  See "On Estimating
 		 * End-to-End Network Path Properties" by Allman and Paxson
 		 * for more details.
 		 */
 		tp->snd_cwnd_prev = tp->snd_cwnd;
 		tp->snd_ssthresh_prev = tp->snd_ssthresh;
 		tp->snd_recover_prev = tp->snd_recover;
 		if (IN_FASTRECOVERY(tp->t_flags))
 			tp->t_flags |= TF_WASFRECOVERY;
 		else
 			tp->t_flags &= ~TF_WASFRECOVERY;
 		if (IN_CONGRECOVERY(tp->t_flags))
 			tp->t_flags |= TF_WASCRECOVERY;
 		else
 			tp->t_flags &= ~TF_WASCRECOVERY;
 		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
 		tp->t_flags |= TF_PREVVALID;
 	} else
 		tp->t_flags &= ~TF_PREVVALID;
 	TCPSTAT_INC(tcps_rexmttimeo);
 	if ((tp->t_state == TCPS_SYN_SENT) ||
 	    (tp->t_state == TCPS_SYN_RECEIVED))
 		rexmt = MSEC_2_TICKS(RACK_INITIAL_RTO * tcp_syn_backoff[tp->t_rxtshift]);
 	else
 		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
 	TCPT_RANGESET(tp->t_rxtcur, rexmt,
 	   max(MSEC_2_TICKS(rack_rto_min), rexmt),
 	   MSEC_2_TICKS(rack_rto_max));
 	/*
 	 * We enter the path for PLMTUD if connection is established or, if
 	 * connection is FIN_WAIT_1 status, reason for the last is that if
 	 * amount of data we send is very small, we could send it in couple
 	 * of packets and process straight to FIN. In that case we won't
 	 * catch ESTABLISHED state.
 	 */
 	if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
 	    || (tp->t_state == TCPS_FIN_WAIT_1))) {
 #ifdef INET6
 		int32_t isipv6;
 #endif
 
 		/*
 		 * Idea here is that at each stage of mtu probe (usually,
 		 * 1448 -> 1188 -> 524) should be given 2 chances to recover
 		 * before further clamping down. 'tp->t_rxtshift % 2 == 0'
 		 * should take care of that.
 		 */
 		if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD | TF2_PLPMTU_MAXSEGSNT)) ==
 		    (TF2_PLPMTU_PMTUD | TF2_PLPMTU_MAXSEGSNT)) &&
 		    (tp->t_rxtshift >= 2 && tp->t_rxtshift < 6 &&
 		    tp->t_rxtshift % 2 == 0)) {
 			/*
 			 * Enter Path MTU Black-hole Detection mechanism: -
 			 * Disable Path MTU Discovery (IP "DF" bit). -
 			 * Reduce MTU to lower value than what we negotiated
 			 * with peer.
 			 */
 			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) == 0) {
 				/* Record that we may have found a black hole. */
 				tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
 				/* Keep track of previous MSS. */
 				tp->t_pmtud_saved_maxseg = tp->t_maxseg;
 			}
 
 			/*
 			 * Reduce the MSS to blackhole value or to the
 			 * default in an attempt to retransmit.
 			 */
 #ifdef INET6
 			isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
 			if (isipv6 &&
 			    tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss) {
 				/* Use the sysctl tuneable blackhole MSS. */
 				tp->t_maxseg = V_tcp_v6pmtud_blackhole_mss;
 				TCPSTAT_INC(tcps_pmtud_blackhole_activated);
 			} else if (isipv6) {
 				/* Use the default MSS. */
 				tp->t_maxseg = V_tcp_v6mssdflt;
 				/*
 				 * Disable Path MTU Discovery when we switch
 				 * to minmss.
 				 */
 				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 				TCPSTAT_INC(tcps_pmtud_blackhole_activated_min_mss);
 			}
 #endif
 #if defined(INET6) && defined(INET)
 			else
 #endif
 #ifdef INET
 			if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss) {
 				/* Use the sysctl tuneable blackhole MSS. */
 				tp->t_maxseg = V_tcp_pmtud_blackhole_mss;
 				TCPSTAT_INC(tcps_pmtud_blackhole_activated);
 			} else {
 				/* Use the default MSS. */
 				tp->t_maxseg = V_tcp_mssdflt;
 				/*
 				 * Disable Path MTU Discovery when we switch
 				 * to minmss.
 				 */
 				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 				TCPSTAT_INC(tcps_pmtud_blackhole_activated_min_mss);
 			}
 #endif
 		} else {
 			/*
 			 * If further retransmissions are still unsuccessful
 			 * with a lowered MTU, maybe this isn't a blackhole
 			 * and we restore the previous MSS and blackhole
 			 * detection flags. The limit '6' is determined by
 			 * giving each probe stage (1448, 1188, 524) 2
 			 * chances to recover.
 			 */
 			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
 			    (tp->t_rxtshift >= 6)) {
 				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
 				tp->t_maxseg = tp->t_pmtud_saved_maxseg;
 				TCPSTAT_INC(tcps_pmtud_blackhole_failed);
 			}
 		}
 	}
 	/*
 	 * Disable RFC1323 and SACK if we haven't got any response to our
 	 * third SYN to work-around some broken terminal servers (most of
 	 * which have hopefully been retired) that have bad VJ header
 	 * compression code which trashes TCP segments containing
 	 * unknown-to-them TCP options.
 	 */
 	if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
 	    (tp->t_rxtshift == 3))
 		tp->t_flags &= ~(TF_REQ_SCALE | TF_REQ_TSTMP | TF_SACK_PERMIT);
 	/*
 	 * If we backed off this far, our srtt estimate is probably bogus.
 	 * Clobber it so we'll take the next rtt measurement as our srtt;
 	 * move the current srtt into rttvar to keep the current retransmit
 	 * times until then.
 	 */
 	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
 #ifdef INET6
 		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
 			in6_losing(tp->t_inpcb);
 		else
 #endif
 			in_losing(tp->t_inpcb);
 		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
 		tp->t_srtt = 0;
 	}
 	if (rack_use_sack_filter)
 		sack_filter_clear(&rack->r_ctl.rack_sf, tp->snd_una);
 	tp->snd_recover = tp->snd_max;
 	tp->t_flags |= TF_ACKNOW;
 	tp->t_rtttime = 0;
 	rack_cong_signal(tp, NULL, CC_RTO);
 out:
 	return (retval);
 }
 
 static int
 rack_process_timers(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, uint8_t hpts_calling)
 {
 	int32_t ret = 0;
 	int32_t timers = (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK);
 
 	if (timers == 0) {
 		return (0);
 	}
 	if (tp->t_state == TCPS_LISTEN) {
 		/* no timers on listen sockets */
 		if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT)
 			return (0);
 		return (1);
 	}
 	if (TSTMP_LT(cts, rack->r_ctl.rc_timer_exp)) {
 		uint32_t left;
 
 		if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) {
 			ret = -1;
 			rack_log_to_processing(rack, cts, ret, 0);
 			return (0);
 		}
 		if (hpts_calling == 0) {
 			ret = -2;
 			rack_log_to_processing(rack, cts, ret, 0);
 			return (0);
 		}
 		/*
 		 * Ok our timer went off early and we are not paced false
 		 * alarm, go back to sleep.
 		 */
 		ret = -3;
 		left = rack->r_ctl.rc_timer_exp - cts;
 		tcp_hpts_insert(tp->t_inpcb, HPTS_MS_TO_SLOTS(left));
 		rack_log_to_processing(rack, cts, ret, left);
 		rack->rc_last_pto_set = 0;
 		return (1);
 	}
 	rack->rc_tmr_stopped = 0;
 	rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_MASK;
 	if (timers & PACE_TMR_DELACK) {
 		ret = rack_timeout_delack(tp, rack, cts);
 	} else if (timers & PACE_TMR_RACK) {
 		ret = rack_timeout_rack(tp, rack, cts);
 	} else if (timers & PACE_TMR_TLP) {
 		ret = rack_timeout_tlp(tp, rack, cts);
 	} else if (timers & PACE_TMR_RXT) {
 		ret = rack_timeout_rxt(tp, rack, cts);
 	} else if (timers & PACE_TMR_PERSIT) {
 		ret = rack_timeout_persist(tp, rack, cts);
 	} else if (timers & PACE_TMR_KEEP) {
 		ret = rack_timeout_keepalive(tp, rack, cts);
 	}
 	rack_log_to_processing(rack, cts, ret, timers);
 	return (ret);
 }
 
 static void
 rack_timer_cancel(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, int line)
 {
 	uint8_t hpts_removed = 0;
 
 	if ((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) &&
 	    TSTMP_GEQ(cts, rack->r_ctl.rc_last_output_to)) {
 		tcp_hpts_remove(rack->rc_inp, HPTS_REMOVE_OUTPUT);
 		hpts_removed = 1;
 	}
 	if (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) {
 		rack->rc_tmr_stopped = rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK;
 		if (rack->rc_inp->inp_in_hpts &&
 		    ((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0)) {
 			/*
 			 * Canceling timer's when we have no output being
 			 * paced. We also must remove ourselves from the
 			 * hpts.
 			 */
 			tcp_hpts_remove(rack->rc_inp, HPTS_REMOVE_OUTPUT);
 			hpts_removed = 1;
 		}
 		rack_log_to_cancel(rack, hpts_removed, line);
 		rack->r_ctl.rc_hpts_flags &= ~(PACE_TMR_MASK);
 	}
 }
 
 static void
 rack_timer_stop(struct tcpcb *tp, uint32_t timer_type)
 {
 	return;
 }
 
 static int
 rack_stopall(struct tcpcb *tp)
 {
 	struct tcp_rack *rack;
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	rack->t_timers_stopped = 1;
 	return (0);
 }
 
 static void
 rack_timer_activate(struct tcpcb *tp, uint32_t timer_type, uint32_t delta)
 {
 	return;
 }
 
 static int
 rack_timer_active(struct tcpcb *tp, uint32_t timer_type)
 {
 	return (0);
 }
 
 static void
 rack_stop_all_timers(struct tcpcb *tp)
 {
 	struct tcp_rack *rack;
 
 	/*
 	 * Assure no timers are running.
 	 */
 	if (tcp_timer_active(tp, TT_PERSIST)) {
 		/* We enter in persists, set the flag appropriately */
 		rack = (struct tcp_rack *)tp->t_fb_ptr;
 		rack->rc_in_persist = 1;
 	}
 	tcp_timer_suspend(tp, TT_PERSIST);
 	tcp_timer_suspend(tp, TT_REXMT);
 	tcp_timer_suspend(tp, TT_KEEP);
 	tcp_timer_suspend(tp, TT_DELACK);
 }
 
 static void
 rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack,
     struct rack_sendmap *rsm, uint32_t ts)
 {
 	int32_t idx;
 
 	rsm->r_rtr_cnt++;
 	rsm->r_sndcnt++;
 	if (rsm->r_rtr_cnt > RACK_NUM_OF_RETRANS) {
 		rsm->r_rtr_cnt = RACK_NUM_OF_RETRANS;
 		rsm->r_flags |= RACK_OVERMAX;
 	}
 	if ((rsm->r_rtr_cnt > 1) && (rack->r_tlp_running == 0)) {
 		rack->r_ctl.rc_holes_rxt += (rsm->r_end - rsm->r_start);
 		rsm->r_rtr_bytes += (rsm->r_end - rsm->r_start);
 	}
 	idx = rsm->r_rtr_cnt - 1;
 	rsm->r_tim_lastsent[idx] = ts;
 	if (rsm->r_flags & RACK_ACKED) {
 		/* Problably MTU discovery messing with us */
 		rsm->r_flags &= ~RACK_ACKED;
 		rack->r_ctl.rc_sacked -= (rsm->r_end - rsm->r_start);
 	}
 	if (rsm->r_in_tmap) {
 		TAILQ_REMOVE(&rack->r_ctl.rc_tmap, rsm, r_tnext);
 	}
 	TAILQ_INSERT_TAIL(&rack->r_ctl.rc_tmap, rsm, r_tnext);
 	rsm->r_in_tmap = 1;
 	if (rsm->r_flags & RACK_SACK_PASSED) {
 		/* We have retransmitted due to the SACK pass */
 		rsm->r_flags &= ~RACK_SACK_PASSED;
 		rsm->r_flags |= RACK_WAS_SACKPASS;
 	}
 	/* Update memory for next rtr */
 	rack->r_ctl.rc_next = TAILQ_NEXT(rsm, r_next);
 }
 
 
 static uint32_t
 rack_update_entry(struct tcpcb *tp, struct tcp_rack *rack,
     struct rack_sendmap *rsm, uint32_t ts, int32_t * lenp)
 {
 	/*
 	 * We (re-)transmitted starting at rsm->r_start for some length
 	 * (possibly less than r_end.
 	 */
 	struct rack_sendmap *nrsm;
 	uint32_t c_end;
 	int32_t len;
 	int32_t idx;
 
 	len = *lenp;
 	c_end = rsm->r_start + len;
 	if (SEQ_GEQ(c_end, rsm->r_end)) {
 		/*
 		 * We retransmitted the whole piece or more than the whole
 		 * slopping into the next rsm.
 		 */
 		rack_update_rsm(tp, rack, rsm, ts);
 		if (c_end == rsm->r_end) {
 			*lenp = 0;
 			return (0);
 		} else {
 			int32_t act_len;
 
 			/* Hangs over the end return whats left */
 			act_len = rsm->r_end - rsm->r_start;
 			*lenp = (len - act_len);
 			return (rsm->r_end);
 		}
 		/* We don't get out of this block. */
 	}
 	/*
 	 * Here we retransmitted less than the whole thing which means we
 	 * have to split this into what was transmitted and what was not.
 	 */
 	nrsm = rack_alloc_full_limit(rack);
 	if (nrsm == NULL) {
 		/*
 		 * We can't get memory, so lets not proceed.
 		 */
 		*lenp = 0;
 		return (0);
 	}
 	/*
 	 * So here we are going to take the original rsm and make it what we
 	 * retransmitted. nrsm will be the tail portion we did not
 	 * retransmit. For example say the chunk was 1, 11 (10 bytes). And
 	 * we retransmitted 5 bytes i.e. 1, 5. The original piece shrinks to
 	 * 1, 6 and the new piece will be 6, 11.
 	 */
 	nrsm->r_start = c_end;
 	nrsm->r_end = rsm->r_end;
 	nrsm->r_rtr_cnt = rsm->r_rtr_cnt;
 	nrsm->r_flags = rsm->r_flags;
 	nrsm->r_sndcnt = rsm->r_sndcnt;
 	nrsm->r_rtr_bytes = 0;
 	rsm->r_end = c_end;
 	for (idx = 0; idx < nrsm->r_rtr_cnt; idx++) {
 		nrsm->r_tim_lastsent[idx] = rsm->r_tim_lastsent[idx];
 	}
 	TAILQ_INSERT_AFTER(&rack->r_ctl.rc_map, rsm, nrsm, r_next);
 	if (rsm->r_in_tmap) {
 		TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext);
 		nrsm->r_in_tmap = 1;
 	}
 	rsm->r_flags &= (~RACK_HAS_FIN);
 	rack_update_rsm(tp, rack, rsm, ts);
 	*lenp = 0;
 	return (0);
 }
 
 
 static void
 rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len,
     uint32_t seq_out, uint8_t th_flags, int32_t err, uint32_t ts,
     uint8_t pass, struct rack_sendmap *hintrsm)
 {
 	struct tcp_rack *rack;
 	struct rack_sendmap *rsm, *nrsm;
 	register uint32_t snd_max, snd_una;
 	int32_t idx;
 
 	/*
 	 * Add to the RACK log of packets in flight or retransmitted. If
 	 * there is a TS option we will use the TS echoed, if not we will
 	 * grab a TS.
 	 *
 	 * Retransmissions will increment the count and move the ts to its
 	 * proper place. Note that if options do not include TS's then we
 	 * won't be able to effectively use the ACK for an RTT on a retran.
 	 *
 	 * Notes about r_start and r_end. Lets consider a send starting at
 	 * sequence 1 for 10 bytes. In such an example the r_start would be
 	 * 1 (starting sequence) but the r_end would be r_start+len i.e. 11.
 	 * This means that r_end is actually the first sequence for the next
 	 * slot (11).
 	 *
 	 */
 	/*
 	 * If err is set what do we do XXXrrs? should we not add the thing?
 	 * -- i.e. return if err != 0 or should we pretend we sent it? --
 	 * i.e. proceed with add ** do this for now.
 	 */
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	if (err)
 		/*
 		 * We don't log errors -- we could but snd_max does not
 		 * advance in this case either.
 		 */
 		return;
 
 	if (th_flags & TH_RST) {
 		/*
 		 * We don't log resets and we return immediately from
 		 * sending
 		 */
 		return;
 	}
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	snd_una = tp->snd_una;
 	if (SEQ_LEQ((seq_out + len), snd_una)) {
 		/* Are sending an old segment to induce an ack (keep-alive)? */
 		return;
 	}
 	if (SEQ_LT(seq_out, snd_una)) {
 		/* huh? should we panic? */
 		uint32_t end;
 
 		end = seq_out + len;
 		seq_out = snd_una;
 		len = end - seq_out;
 	}
 	snd_max = tp->snd_max;
 	if (th_flags & (TH_SYN | TH_FIN)) {
 		/*
 		 * The call to rack_log_output is made before bumping
 		 * snd_max. This means we can record one extra byte on a SYN
 		 * or FIN if seq_out is adding more on and a FIN is present
 		 * (and we are not resending).
 		 */
 		if (th_flags & TH_SYN)
 			len++;
 		if (th_flags & TH_FIN)
 			len++;
 		if (SEQ_LT(snd_max, tp->snd_nxt)) {
 			/*
 			 * The add/update as not been done for the FIN/SYN
 			 * yet.
 			 */
 			snd_max = tp->snd_nxt;
 		}
 	}
 	if (len == 0) {
 		/* We don't log zero window probes */
 		return;
 	}
 	rack->r_ctl.rc_time_last_sent = ts;
 	if (IN_RECOVERY(tp->t_flags)) {
 		rack->r_ctl.rc_prr_out += len;
 	}
 	/* First question is it a retransmission? */
 	if (seq_out == snd_max) {
 again:
 		rsm = rack_alloc(rack);
 		if (rsm == NULL) {
 			/*
 			 * Hmm out of memory and the tcb got destroyed while
 			 * we tried to wait.
 			 */
 			return;
 		}
 		if (th_flags & TH_FIN) {
 			rsm->r_flags = RACK_HAS_FIN;
 		} else {
 			rsm->r_flags = 0;
 		}
 		rsm->r_tim_lastsent[0] = ts;
 		rsm->r_rtr_cnt = 1;
 		rsm->r_rtr_bytes = 0;
 		rsm->r_start = seq_out;
 		rsm->r_end = rsm->r_start + len;
 		rsm->r_sndcnt = 0;
 		TAILQ_INSERT_TAIL(&rack->r_ctl.rc_map, rsm, r_next);
 		TAILQ_INSERT_TAIL(&rack->r_ctl.rc_tmap, rsm, r_tnext);
 		rsm->r_in_tmap = 1;
 		return;
 	}
 	/*
 	 * If we reach here its a retransmission and we need to find it.
 	 */
 more:
 	if (hintrsm && (hintrsm->r_start == seq_out)) {
 		rsm = hintrsm;
 		hintrsm = NULL;
 	} else if (rack->r_ctl.rc_next) {
 		/* We have a hint from a previous run */
 		rsm = rack->r_ctl.rc_next;
 	} else {
 		/* No hints sorry */
 		rsm = NULL;
 	}
 	if ((rsm) && (rsm->r_start == seq_out)) {
 		/*
 		 * We used rc_next or hintrsm  to retransmit, hopefully the
 		 * likely case.
 		 */
 		seq_out = rack_update_entry(tp, rack, rsm, ts, &len);
 		if (len == 0) {
 			return;
 		} else {
 			goto more;
 		}
 	}
 	/* Ok it was not the last pointer go through it the hard way. */
 	TAILQ_FOREACH(rsm, &rack->r_ctl.rc_map, r_next) {
 		if (rsm->r_start == seq_out) {
 			seq_out = rack_update_entry(tp, rack, rsm, ts, &len);
 			rack->r_ctl.rc_next = TAILQ_NEXT(rsm, r_next);
 			if (len == 0) {
 				return;
 			} else {
 				continue;
 			}
 		}
 		if (SEQ_GEQ(seq_out, rsm->r_start) && SEQ_LT(seq_out, rsm->r_end)) {
 			/* Transmitted within this piece */
 			/*
 			 * Ok we must split off the front and then let the
 			 * update do the rest
 			 */
 			nrsm = rack_alloc_full_limit(rack);
 			if (nrsm == NULL) {
 				rack_update_rsm(tp, rack, rsm, ts);
 				return;
 			}
 			/*
 			 * copy rsm to nrsm and then trim the front of rsm
 			 * to not include this part.
 			 */
 			nrsm->r_start = seq_out;
 			nrsm->r_end = rsm->r_end;
 			nrsm->r_rtr_cnt = rsm->r_rtr_cnt;
 			nrsm->r_flags = rsm->r_flags;
 			nrsm->r_sndcnt = rsm->r_sndcnt;
 			nrsm->r_rtr_bytes = 0;
 			for (idx = 0; idx < nrsm->r_rtr_cnt; idx++) {
 				nrsm->r_tim_lastsent[idx] = rsm->r_tim_lastsent[idx];
 			}
 			rsm->r_end = nrsm->r_start;
 			TAILQ_INSERT_AFTER(&rack->r_ctl.rc_map, rsm, nrsm, r_next);
 			if (rsm->r_in_tmap) {
 				TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext);
 				nrsm->r_in_tmap = 1;
 			}
 			rsm->r_flags &= (~RACK_HAS_FIN);
 			seq_out = rack_update_entry(tp, rack, nrsm, ts, &len);
 			if (len == 0) {
 				return;
 			}
 		}
 	}
 	/*
 	 * Hmm not found in map did they retransmit both old and on into the
 	 * new?
 	 */
 	if (seq_out == tp->snd_max) {
 		goto again;
 	} else if (SEQ_LT(seq_out, tp->snd_max)) {
 #ifdef INVARIANTS
 		printf("seq_out:%u len:%d snd_una:%u snd_max:%u -- but rsm not found?\n",
 		    seq_out, len, tp->snd_una, tp->snd_max);
 		printf("Starting Dump of all rack entries\n");
 		TAILQ_FOREACH(rsm, &rack->r_ctl.rc_map, r_next) {
 			printf("rsm:%p start:%u end:%u\n",
 			    rsm, rsm->r_start, rsm->r_end);
 		}
 		printf("Dump complete\n");
 		panic("seq_out not found rack:%p tp:%p",
 		    rack, tp);
 #endif
 	} else {
 #ifdef INVARIANTS
 		/*
 		 * Hmm beyond sndmax? (only if we are using the new rtt-pack
 		 * flag)
 		 */
 		panic("seq_out:%u(%d) is beyond snd_max:%u tp:%p",
 		    seq_out, len, tp->snd_max, tp);
 #endif
 	}
 }
 
 /*
  * Record one of the RTT updates from an ack into
  * our sample structure.
  */
 static void
 tcp_rack_xmit_timer(struct tcp_rack *rack, int32_t rtt)
 {
 	if ((rack->r_ctl.rack_rs.rs_flags & RACK_RTT_EMPTY) ||
 	    (rack->r_ctl.rack_rs.rs_rtt_lowest > rtt)) {
 		rack->r_ctl.rack_rs.rs_rtt_lowest = rtt;
 	}
 	if ((rack->r_ctl.rack_rs.rs_flags & RACK_RTT_EMPTY) ||
 	    (rack->r_ctl.rack_rs.rs_rtt_highest < rtt)) {
 		rack->r_ctl.rack_rs.rs_rtt_highest = rtt;
 	}
 	rack->r_ctl.rack_rs.rs_flags = RACK_RTT_VALID;
 	rack->r_ctl.rack_rs.rs_rtt_tot += rtt;
 	rack->r_ctl.rack_rs.rs_rtt_cnt++;
 }
 
 /*
  * Collect new round-trip time estimate
  * and update averages and current timeout.
  */
 static void
 tcp_rack_xmit_timer_commit(struct tcp_rack *rack, struct tcpcb *tp)
 {
 	int32_t delta;
 	uint32_t o_srtt, o_var;
 	int32_t rtt;
 
 	if (rack->r_ctl.rack_rs.rs_flags & RACK_RTT_EMPTY)
 		/* No valid sample */
 		return;
 	if (rack->r_ctl.rc_rate_sample_method == USE_RTT_LOW) {
 		/* We are to use the lowest RTT seen in a single ack */
 		rtt = rack->r_ctl.rack_rs.rs_rtt_lowest;
 	} else if (rack->r_ctl.rc_rate_sample_method == USE_RTT_HIGH) {
 		/* We are to use the highest RTT seen in a single ack */
 		rtt = rack->r_ctl.rack_rs.rs_rtt_highest;
 	} else if (rack->r_ctl.rc_rate_sample_method == USE_RTT_AVG) {
 		/* We are to use the average RTT seen in a single ack */
 		rtt = (int32_t)(rack->r_ctl.rack_rs.rs_rtt_tot /
 				(uint64_t)rack->r_ctl.rack_rs.rs_rtt_cnt);
 	} else {
 #ifdef INVARIANTS
 		panic("Unknown rtt variant %d", rack->r_ctl.rc_rate_sample_method);
 #endif		
 		return;
 	}
 	if (rtt == 0)
 		rtt = 1;
 	rack_log_rtt_sample(rack, rtt);
 	o_srtt = tp->t_srtt;
 	o_var = tp->t_rttvar;
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	if (tp->t_srtt != 0) {
 		/*
 		 * srtt is stored as fixed point with 5 bits after the
 		 * binary point (i.e., scaled by 8).  The following magic is
 		 * equivalent to the smoothing algorithm in rfc793 with an
 		 * alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed point).
 		 * Adjust rtt to origin 0.
 		 */
 		delta = ((rtt - 1) << TCP_DELTA_SHIFT)
 		    - (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
 
 		tp->t_srtt += delta;
 		if (tp->t_srtt <= 0)
 			tp->t_srtt = 1;
 
 		/*
 		 * We accumulate a smoothed rtt variance (actually, a
 		 * smoothed mean difference), then set the retransmit timer
 		 * to smoothed rtt + 4 times the smoothed variance. rttvar
 		 * is stored as fixed point with 4 bits after the binary
 		 * point (scaled by 16).  The following is equivalent to
 		 * rfc793 smoothing with an alpha of .75 (rttvar =
 		 * rttvar*3/4 + |delta| / 4).  This replaces rfc793's
 		 * wired-in beta.
 		 */
 		if (delta < 0)
 			delta = -delta;
 		delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
 		tp->t_rttvar += delta;
 		if (tp->t_rttvar <= 0)
 			tp->t_rttvar = 1;
 		if (tp->t_rttbest > tp->t_srtt + tp->t_rttvar)
 			tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
 	} else {
 		/*
 		 * No rtt measurement yet - use the unsmoothed rtt. Set the
 		 * variance to half the rtt (so our first retransmit happens
 		 * at 3*rtt).
 		 */
 		tp->t_srtt = rtt << TCP_RTT_SHIFT;
 		tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
 		tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
 	}
 	TCPSTAT_INC(tcps_rttupdated);
 	rack_log_rtt_upd(tp, rack, rtt, o_srtt, o_var);
 	tp->t_rttupdated++;
 #ifdef NETFLIX_STATS
 	stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt));
 #endif
 	tp->t_rxtshift = 0;
 
 	/*
 	 * the retransmit should happen at rtt + 4 * rttvar. Because of the
 	 * way we do the smoothing, srtt and rttvar will each average +1/2
 	 * tick of bias.  When we compute the retransmit timer, we want 1/2
 	 * tick of rounding and 1 extra tick because of +-1/2 tick
 	 * uncertainty in the firing of the timer.  The bias will give us
 	 * exactly the 1.5 tick we need.  But, because the bias is
 	 * statistical, we have to test that we don't drop below the minimum
 	 * feasible timer (which is 2 ticks).
 	 */
 	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
 	   max(MSEC_2_TICKS(rack_rto_min), rtt + 2), MSEC_2_TICKS(rack_rto_max));
 	tp->t_softerror = 0;
 }
 
 static void
 rack_earlier_retran(struct tcpcb *tp, struct rack_sendmap *rsm,
     uint32_t t, uint32_t cts)
 {
 	/*
 	 * For this RSM, we acknowledged the data from a previous
 	 * transmission, not the last one we made. This means we did a false
 	 * retransmit.
 	 */
 	struct tcp_rack *rack;
 
 	if (rsm->r_flags & RACK_HAS_FIN) {
 		/*
 		 * The sending of the FIN often is multiple sent when we
 		 * have everything outstanding ack'd. We ignore this case
 		 * since its over now.
 		 */
 		return;
 	}
 	if (rsm->r_flags & RACK_TLP) {
 		/*
 		 * We expect TLP's to have this occur.
 		 */
 		return;
 	}
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	/* should we undo cc changes and exit recovery? */
 	if (IN_RECOVERY(tp->t_flags)) {
 		if (rack->r_ctl.rc_rsm_start == rsm->r_start) {
 			/*
 			 * Undo what we ratched down and exit recovery if
 			 * possible
 			 */
 			EXIT_RECOVERY(tp->t_flags);
 			tp->snd_recover = tp->snd_una;
 			if (rack->r_ctl.rc_cwnd_at > tp->snd_cwnd)
 				tp->snd_cwnd = rack->r_ctl.rc_cwnd_at;
 			if (rack->r_ctl.rc_ssthresh_at > tp->snd_ssthresh)
 				tp->snd_ssthresh = rack->r_ctl.rc_ssthresh_at;
 		}
 	}
 	if (rsm->r_flags & RACK_WAS_SACKPASS) {
 		/*
 		 * We retransmitted based on a sack and the earlier
 		 * retransmission ack'd it - re-ordering is occuring.
 		 */
 		counter_u64_add(rack_reorder_seen, 1);
 		rack->r_ctl.rc_reorder_ts = cts;
 	}
 	counter_u64_add(rack_badfr, 1);
 	counter_u64_add(rack_badfr_bytes, (rsm->r_end - rsm->r_start));
 }
 
 
 static int
 rack_update_rtt(struct tcpcb *tp, struct tcp_rack *rack,
     struct rack_sendmap *rsm, struct tcpopt *to, uint32_t cts, int32_t ack_type)
 {
 	int32_t i;
 	uint32_t t;
 
 	if (rsm->r_flags & RACK_ACKED)
 		/* Already done */
 		return (0);
 
 
 	if ((rsm->r_rtr_cnt == 1) ||
 	    ((ack_type == CUM_ACKED) &&
 	    (to->to_flags & TOF_TS) &&
 	    (to->to_tsecr) &&
 	    (rsm->r_tim_lastsent[rsm->r_rtr_cnt - 1] == to->to_tsecr))
 	    ) {
 		/*
 		 * We will only find a matching timestamp if its cum-acked.
 		 * But if its only one retransmission its for-sure matching
 		 * :-)
 		 */
 		t = cts - rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)];
 		if ((int)t <= 0)
 			t = 1;
 		if (!tp->t_rttlow || tp->t_rttlow > t)
 			tp->t_rttlow = t;
 		if (!rack->r_ctl.rc_rack_min_rtt ||
 		    SEQ_LT(t, rack->r_ctl.rc_rack_min_rtt)) {
 			rack->r_ctl.rc_rack_min_rtt = t;
 			if (rack->r_ctl.rc_rack_min_rtt == 0) {
 				rack->r_ctl.rc_rack_min_rtt = 1;
 			}
 		}
 		tcp_rack_xmit_timer(rack, TCP_TS_TO_TICKS(t) + 1);
 		if ((rsm->r_flags & RACK_TLP) &&
 		    (!IN_RECOVERY(tp->t_flags))) {
 			/* Segment was a TLP and our retrans matched */
 			if (rack->r_ctl.rc_tlp_cwnd_reduce) {
 				rack->r_ctl.rc_rsm_start = tp->snd_max;
 				rack->r_ctl.rc_cwnd_at = tp->snd_cwnd;
 				rack->r_ctl.rc_ssthresh_at = tp->snd_ssthresh;
 				rack_cong_signal(tp, NULL, CC_NDUPACK);
 				/*
 				 * When we enter recovery we need to assure
 				 * we send one packet.
 				 */
 				rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg;
 			} else
 				rack->r_ctl.rc_tlp_rtx_out = 0;
 		}
 		if (SEQ_LT(rack->r_ctl.rc_rack_tmit_time, rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)])) {
 			/* New more recent rack_tmit_time */
 			rack->r_ctl.rc_rack_tmit_time = rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)];
 			rack->rc_rack_rtt = t;
 		}
 		return (1);
 	}
 	/* 
 	 * We clear the soft/rxtshift since we got an ack. 
 	 * There is no assurance we will call the commit() function
 	 * so we need to clear these to avoid incorrect handling.
 	 */
 	tp->t_rxtshift = 0;
 	tp->t_softerror = 0;
 	if ((to->to_flags & TOF_TS) &&
 	    (ack_type == CUM_ACKED) &&
 	    (to->to_tsecr) &&
 	    ((rsm->r_flags & (RACK_DEFERRED | RACK_OVERMAX)) == 0)) {
 		/*
 		 * Now which timestamp does it match? In this block the ACK
 		 * must be coming from a previous transmission.
 		 */
 		for (i = 0; i < rsm->r_rtr_cnt; i++) {
 			if (rsm->r_tim_lastsent[i] == to->to_tsecr) {
 				t = cts - rsm->r_tim_lastsent[i];
 				if ((int)t <= 0)
 					t = 1;
 				if ((i + 1) < rsm->r_rtr_cnt) {
 					/* Likely */
 					rack_earlier_retran(tp, rsm, t, cts);
 				}
 				if (!tp->t_rttlow || tp->t_rttlow > t)
 					tp->t_rttlow = t;
 				if (!rack->r_ctl.rc_rack_min_rtt || SEQ_LT(t, rack->r_ctl.rc_rack_min_rtt)) {
 					rack->r_ctl.rc_rack_min_rtt = t;
 					if (rack->r_ctl.rc_rack_min_rtt == 0) {
 						rack->r_ctl.rc_rack_min_rtt = 1;
 					}
 				}
                                 /*
 				 * Note the following calls to
 				 * tcp_rack_xmit_timer() are being commented
 				 * out for now. They give us no more accuracy
 				 * and often lead to a wrong choice. We have
 				 * enough samples that have not been 
 				 * retransmitted. I leave the commented out
 				 * code in here in case in the future we
 				 * decide to add it back (though I can't forsee
 				 * doing that). That way we will easily see
 				 * where they need to be placed.
 				 */
 				if (SEQ_LT(rack->r_ctl.rc_rack_tmit_time,
 				    rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)])) {
 					/* New more recent rack_tmit_time */
 					rack->r_ctl.rc_rack_tmit_time = rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)];
 					rack->rc_rack_rtt = t;
 				}
 				return (1);
 			}
 		}
 		goto ts_not_found;
 	} else {
 		/*
 		 * Ok its a SACK block that we retransmitted. or a windows
 		 * machine without timestamps. We can tell nothing from the
 		 * time-stamp since its not there or the time the peer last
 		 * recieved a segment that moved forward its cum-ack point.
 		 */
 ts_not_found:
 		i = rsm->r_rtr_cnt - 1;
 		t = cts - rsm->r_tim_lastsent[i];
 		if ((int)t <= 0)
 			t = 1;
 		if (rack->r_ctl.rc_rack_min_rtt && SEQ_LT(t, rack->r_ctl.rc_rack_min_rtt)) {
 			/*
 			 * We retransmitted and the ack came back in less
 			 * than the smallest rtt we have observed. We most
 			 * likey did an improper retransmit as outlined in
 			 * 4.2 Step 3 point 2 in the rack-draft.
 			 */
 			i = rsm->r_rtr_cnt - 2;
 			t = cts - rsm->r_tim_lastsent[i];
 			rack_earlier_retran(tp, rsm, t, cts);
 		} else if (rack->r_ctl.rc_rack_min_rtt) {
 			/*
 			 * We retransmitted it and the retransmit did the
 			 * job.
 			 */
 			if (!rack->r_ctl.rc_rack_min_rtt ||
 			    SEQ_LT(t, rack->r_ctl.rc_rack_min_rtt)) {
 				rack->r_ctl.rc_rack_min_rtt = t;
 				if (rack->r_ctl.rc_rack_min_rtt == 0) {
 					rack->r_ctl.rc_rack_min_rtt = 1;
 				}
 			}
 			if (SEQ_LT(rack->r_ctl.rc_rack_tmit_time, rsm->r_tim_lastsent[i])) {
 				/* New more recent rack_tmit_time */
 				rack->r_ctl.rc_rack_tmit_time = rsm->r_tim_lastsent[i];
 				rack->rc_rack_rtt = t;
 			}
 			return (1);
 		}
 	}
 	return (0);
 }
 
 /*
  * Mark the SACK_PASSED flag on all entries prior to rsm send wise.
  */
 static void
 rack_log_sack_passed(struct tcpcb *tp,
     struct tcp_rack *rack, struct rack_sendmap *rsm)
 {
 	struct rack_sendmap *nrsm;
 	uint32_t ts;
 	int32_t idx;
 
 	idx = rsm->r_rtr_cnt - 1;
 	ts = rsm->r_tim_lastsent[idx];
 	nrsm = rsm;
 	TAILQ_FOREACH_REVERSE_FROM(nrsm, &rack->r_ctl.rc_tmap,
 	    rack_head, r_tnext) {
 		if (nrsm == rsm) {
 			/* Skip orginal segment he is acked */
 			continue;
 		}
 		if (nrsm->r_flags & RACK_ACKED) {
 			/* Skip ack'd segments */
 			continue;
 		} 
 		if (nrsm->r_flags & RACK_SACK_PASSED) {
 			/* 
 			 * We found one that is already marked
 			 * passed, we have been here before and
 			 * so all others below this are marked.
 			 */
 			break;
 		}
 		idx = nrsm->r_rtr_cnt - 1;
 		if (ts == nrsm->r_tim_lastsent[idx]) {
 			/*
 			 * For this case lets use seq no, if we sent in a
 			 * big block (TSO) we would have a bunch of segments
 			 * sent at the same time.
 			 *
 			 * We would only get a report if its SEQ is earlier.
 			 * If we have done multiple retransmits the times
 			 * would not be equal.
 			 */
 			if (SEQ_LT(nrsm->r_start, rsm->r_start)) {
 				nrsm->r_flags |= RACK_SACK_PASSED;
 				nrsm->r_flags &= ~RACK_WAS_SACKPASS;
 			}
 		} else {
 			/*
 			 * Here they were sent at different times, not a big
 			 * block. Since we transmitted this one later and
 			 * see it sack'd then this must also be missing (or
 			 * we would have gotten a sack block for it)
 			 */
 			nrsm->r_flags |= RACK_SACK_PASSED;
 			nrsm->r_flags &= ~RACK_WAS_SACKPASS;
 		}
 	}
 }
 
 static uint32_t
 rack_proc_sack_blk(struct tcpcb *tp, struct tcp_rack *rack, struct sackblk *sack,
     struct tcpopt *to, struct rack_sendmap **prsm, uint32_t cts)
 {
 	int32_t idx;
 	int32_t times = 0;
 	uint32_t start, end, changed = 0;
 	struct rack_sendmap *rsm, *nrsm;
 	int32_t used_ref = 1;
 
 	start = sack->start;
 	end = sack->end;
 	rsm = *prsm;
 	if (rsm && SEQ_LT(start, rsm->r_start)) {
 		TAILQ_FOREACH_REVERSE_FROM(rsm, &rack->r_ctl.rc_map, rack_head, r_next) {
 			if (SEQ_GEQ(start, rsm->r_start) &&
 			    SEQ_LT(start, rsm->r_end)) {
 				goto do_rest_ofb;
 			}
 		}
 	}
 	if (rsm == NULL) {
 start_at_beginning:
 		rsm = NULL;
 		used_ref = 0;
 	}
 	/* First lets locate the block where this guy is */
 	TAILQ_FOREACH_FROM(rsm, &rack->r_ctl.rc_map, r_next) {
 		if (SEQ_GEQ(start, rsm->r_start) &&
 		    SEQ_LT(start, rsm->r_end)) {
 			break;
 		}
 	}
 do_rest_ofb:
 	if (rsm == NULL) {
 		/*
 		 * This happens when we get duplicate sack blocks with the
 		 * same end. For example SACK 4: 100 SACK 3: 100 The sort
 		 * will not change there location so we would just start at
 		 * the end of the first one and get lost.
 		 */
 		if (tp->t_flags & TF_SENTFIN) {
 			/*
 			 * Check to see if we have not logged the FIN that
 			 * went out.
 			 */
 			nrsm = TAILQ_LAST_FAST(&rack->r_ctl.rc_map, rack_sendmap, r_next);
 			if (nrsm && (nrsm->r_end + 1) == tp->snd_max) {
 				/*
 				 * Ok we did not get the FIN logged.
 				 */
 				nrsm->r_end++;
 				rsm = nrsm;
 				goto do_rest_ofb;
 			}
 		}
 		if (times == 1) {
 #ifdef INVARIANTS
 			panic("tp:%p rack:%p sack:%p to:%p prsm:%p",
 			    tp, rack, sack, to, prsm);
 #else
 			goto out;
 #endif
 		}
 		times++;
 		counter_u64_add(rack_sack_proc_restart, 1);
 		goto start_at_beginning;
 	}
 	/* Ok we have an ACK for some piece of rsm */
 	if (rsm->r_start != start) {
 		/*
 		 * Need to split this in two pieces the before and after.
 		 */
 		nrsm = rack_alloc_limit(rack, RACK_LIMIT_TYPE_SPLIT);
 		if (nrsm == NULL) {
 			/*
 			 * failed XXXrrs what can we do but loose the sack
 			 * info?
 			 */
 			goto out;
 		}
 		nrsm->r_start = start;
 		nrsm->r_rtr_bytes = 0;
 		nrsm->r_end = rsm->r_end;
 		nrsm->r_rtr_cnt = rsm->r_rtr_cnt;
 		nrsm->r_flags = rsm->r_flags;
 		nrsm->r_sndcnt = rsm->r_sndcnt;
 		for (idx = 0; idx < nrsm->r_rtr_cnt; idx++) {
 			nrsm->r_tim_lastsent[idx] = rsm->r_tim_lastsent[idx];
 		}
 		rsm->r_end = nrsm->r_start;
 		TAILQ_INSERT_AFTER(&rack->r_ctl.rc_map, rsm, nrsm, r_next);
 		if (rsm->r_in_tmap) {
 			TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext);
 			nrsm->r_in_tmap = 1;
 		}
 		rsm->r_flags &= (~RACK_HAS_FIN);
 		rsm = nrsm;
 	}
 	if (SEQ_GEQ(end, rsm->r_end)) {
 		/*
 		 * The end of this block is either beyond this guy or right
 		 * at this guy.
 		 */
 
 		if ((rsm->r_flags & RACK_ACKED) == 0) {
 			rack_update_rtt(tp, rack, rsm, to, cts, SACKED);
 			changed += (rsm->r_end - rsm->r_start);
 			rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start);
 			rack_log_sack_passed(tp, rack, rsm);
 			/* Is Reordering occuring? */
 			if (rsm->r_flags & RACK_SACK_PASSED) {
 				counter_u64_add(rack_reorder_seen, 1);
 				rack->r_ctl.rc_reorder_ts = cts;
 			}
 			rsm->r_flags |= RACK_ACKED;
 			rsm->r_flags &= ~RACK_TLP;
 			if (rsm->r_in_tmap) {
 				TAILQ_REMOVE(&rack->r_ctl.rc_tmap, rsm, r_tnext);
 				rsm->r_in_tmap = 0;
 			}
 		}
 		if (end == rsm->r_end) {
 			/* This block only - done */
 			goto out;
 		}
 		/* There is more not coverend by this rsm move on */
 		start = rsm->r_end;
 		nrsm = TAILQ_NEXT(rsm, r_next);
 		rsm = nrsm;
 		times = 0;
 		goto do_rest_ofb;
 	}
 	/* Ok we need to split off this one at the tail */
 	nrsm = rack_alloc_limit(rack, RACK_LIMIT_TYPE_SPLIT);
 	if (nrsm == NULL) {
 		/* failed rrs what can we do but loose the sack info? */
 		goto out;
 	}
 	/* Clone it */
 	nrsm->r_start = end;
 	nrsm->r_end = rsm->r_end;
 	nrsm->r_rtr_bytes = 0;
 	nrsm->r_rtr_cnt = rsm->r_rtr_cnt;
 	nrsm->r_flags = rsm->r_flags;
 	nrsm->r_sndcnt = rsm->r_sndcnt;
 	for (idx = 0; idx < nrsm->r_rtr_cnt; idx++) {
 		nrsm->r_tim_lastsent[idx] = rsm->r_tim_lastsent[idx];
 	}
 	/* The sack block does not cover this guy fully */
 	rsm->r_flags &= (~RACK_HAS_FIN);
 	rsm->r_end = end;
 	TAILQ_INSERT_AFTER(&rack->r_ctl.rc_map, rsm, nrsm, r_next);
 	if (rsm->r_in_tmap) {
 		TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext);
 		nrsm->r_in_tmap = 1;
 	}
 	if (rsm->r_flags & RACK_ACKED) {
 		/* Been here done that */
 		goto out;
 	}
 	rack_update_rtt(tp, rack, rsm, to, cts, SACKED);
 	changed += (rsm->r_end - rsm->r_start);
 	rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start);
 	rack_log_sack_passed(tp, rack, rsm);
 	/* Is Reordering occuring? */
 	if (rsm->r_flags & RACK_SACK_PASSED) {
 		counter_u64_add(rack_reorder_seen, 1);
 		rack->r_ctl.rc_reorder_ts = cts;
 	}
 	rsm->r_flags |= RACK_ACKED;
 	rsm->r_flags &= ~RACK_TLP;
 	if (rsm->r_in_tmap) {
 		TAILQ_REMOVE(&rack->r_ctl.rc_tmap, rsm, r_tnext);
 		rsm->r_in_tmap = 0;
 	}
 out:
 	if (rsm && (rsm->r_flags & RACK_ACKED)) {
 		/* 
 		 * Now can we merge this newly acked
 		 * block with either the previous or
 		 * next block?
 		 */
 		nrsm = TAILQ_NEXT(rsm, r_next);
 		if (nrsm &&
 		    (nrsm->r_flags & RACK_ACKED)) {
 			/* yep this and next can be merged */
 			rsm = rack_merge_rsm(rack, rsm, nrsm);
 		}
 		/* Now what about the previous? */
 		nrsm = TAILQ_PREV(rsm, rack_head, r_next);
 		if (nrsm &&
 		    (nrsm->r_flags & RACK_ACKED)) {
 			/* yep the previous and this can be merged */
 			rsm = rack_merge_rsm(rack, nrsm, rsm);
 		}
 	}
 	if (used_ref == 0) {
 		counter_u64_add(rack_sack_proc_all, 1);
 	} else {
 		counter_u64_add(rack_sack_proc_short, 1);
 	}
 	/* Save off where we last were */
 	if (rsm)
 		rack->r_ctl.rc_sacklast = TAILQ_NEXT(rsm, r_next);
 	else
 		rack->r_ctl.rc_sacklast = NULL;
 	*prsm = rsm;
 	return (changed);
 }
 
 static void inline 
 rack_peer_reneges(struct tcp_rack *rack, struct rack_sendmap *rsm, tcp_seq th_ack)
 {
 	struct rack_sendmap *tmap;
 
 	tmap = NULL;
 	while (rsm && (rsm->r_flags & RACK_ACKED)) {
 		/* Its no longer sacked, mark it so */
 		rack->r_ctl.rc_sacked -= (rsm->r_end - rsm->r_start);
 #ifdef INVARIANTS
 		if (rsm->r_in_tmap) {
 			panic("rack:%p rsm:%p flags:0x%x in tmap?",
 			      rack, rsm, rsm->r_flags);
 		}
 #endif
 		rsm->r_flags &= ~(RACK_ACKED|RACK_SACK_PASSED|RACK_WAS_SACKPASS);
 		/* Rebuild it into our tmap */
 		if (tmap == NULL) {
 			TAILQ_INSERT_HEAD(&rack->r_ctl.rc_tmap, rsm, r_tnext);
 			tmap = rsm;
 		} else {
 			TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, tmap, rsm, r_tnext);
 			tmap = rsm;
 		}
 		tmap->r_in_tmap = 1;
 		rsm = TAILQ_NEXT(rsm, r_next);
 	}
 	/* 
 	 * Now lets possibly clear the sack filter so we start 
 	 * recognizing sacks that cover this area.
 	 */
 	if (rack_use_sack_filter)
 		sack_filter_clear(&rack->r_ctl.rack_sf, th_ack);
 
 }
 
 static void
 rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th)
 {
 	uint32_t changed, last_seq, entered_recovery = 0;
 	struct tcp_rack *rack;
 	struct rack_sendmap *rsm;
 	struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1];
 	register uint32_t th_ack;
 	int32_t i, j, k, num_sack_blks = 0;
 	uint32_t cts, acked, ack_point, sack_changed = 0;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	if (th->th_flags & TH_RST) {
 		/* We don't log resets */
 		return;
 	}
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	cts = tcp_ts_getticks();
 	rsm = TAILQ_FIRST(&rack->r_ctl.rc_map);
 	changed = 0;
 	th_ack = th->th_ack;
 
 	if (SEQ_GT(th_ack, tp->snd_una)) {
 		rack_log_progress_event(rack, tp, ticks, PROGRESS_UPDATE, __LINE__);
 		tp->t_acktime = ticks;
 	}
 	if (rsm && SEQ_GT(th_ack, rsm->r_start))
 		changed = th_ack - rsm->r_start;
 	if (changed) {
 		/*
 		 * The ACK point is advancing to th_ack, we must drop off
 		 * the packets in the rack log and calculate any eligble
 		 * RTT's.
 		 */
 		rack->r_wanted_output++;
 more:
 		rsm = TAILQ_FIRST(&rack->r_ctl.rc_map);
 		if (rsm == NULL) {
 			if ((th_ack - 1) == tp->iss) {
 				/*
 				 * For the SYN incoming case we will not
 				 * have called tcp_output for the sending of
 				 * the SYN, so there will be no map. All
 				 * other cases should probably be a panic.
 				 */
 				goto proc_sack;
 			}
 			if (tp->t_flags & TF_SENTFIN) {
 				/* if we send a FIN we will not hav a map */
 				goto proc_sack;
 			}
 #ifdef INVARIANTS
 			panic("No rack map tp:%p for th:%p state:%d rack:%p snd_una:%u snd_max:%u snd_nxt:%u chg:%d\n",
 			    tp,
 			    th, tp->t_state, rack,
 			    tp->snd_una, tp->snd_max, tp->snd_nxt, changed);
 #endif
 			goto proc_sack;
 		}
 		if (SEQ_LT(th_ack, rsm->r_start)) {
 			/* Huh map is missing this */
 #ifdef INVARIANTS
 			printf("Rack map starts at r_start:%u for th_ack:%u huh? ts:%d rs:%d\n",
 			    rsm->r_start,
 			    th_ack, tp->t_state, rack->r_state);
 #endif
 			goto proc_sack;
 		}
 		rack_update_rtt(tp, rack, rsm, to, cts, CUM_ACKED);
 		/* Now do we consume the whole thing? */
 		if (SEQ_GEQ(th_ack, rsm->r_end)) {
 			/* Its all consumed. */
 			uint32_t left;
 
 			rack->r_ctl.rc_holes_rxt -= rsm->r_rtr_bytes;
 			rsm->r_rtr_bytes = 0;
 			TAILQ_REMOVE(&rack->r_ctl.rc_map, rsm, r_next);
 			if (rsm->r_in_tmap) {
 				TAILQ_REMOVE(&rack->r_ctl.rc_tmap, rsm, r_tnext);
 				rsm->r_in_tmap = 0;
 			}
 			if (rack->r_ctl.rc_next == rsm) {
 				/* scoot along the marker */
 				rack->r_ctl.rc_next = TAILQ_FIRST(&rack->r_ctl.rc_map);
 			}
 			if (rsm->r_flags & RACK_ACKED) {
 				/*
 				 * It was acked on the scoreboard -- remove
 				 * it from total
 				 */
 				rack->r_ctl.rc_sacked -= (rsm->r_end - rsm->r_start);
 			} else if (rsm->r_flags & RACK_SACK_PASSED) {
 				/*
 				 * There are acked segments ACKED on the
 				 * scoreboard further up. We are seeing
 				 * reordering.
 				 */
 				counter_u64_add(rack_reorder_seen, 1);
 				rsm->r_flags |= RACK_ACKED;
 				rack->r_ctl.rc_reorder_ts = cts;
 			}
 			left = th_ack - rsm->r_end;
 			if (rsm->r_rtr_cnt > 1) {
 				/*
 				 * Technically we should make r_rtr_cnt be
 				 * monotonicly increasing and just mod it to
 				 * the timestamp it is replacing.. that way
 				 * we would have the last 3 retransmits. Now
 				 * rc_loss_count will be wrong if we
 				 * retransmit something more than 2 times in
 				 * recovery :(
 				 */
 				rack->r_ctl.rc_loss_count += (rsm->r_rtr_cnt - 1);
 			}
 			/* Free back to zone */
 			rack_free(rack, rsm);
 			if (left) {
 				goto more;
 			}
 			goto proc_sack;
 		}
 		if (rsm->r_flags & RACK_ACKED) {
 			/*
 			 * It was acked on the scoreboard -- remove it from
 			 * total for the part being cum-acked.
 			 */
 			rack->r_ctl.rc_sacked -= (th_ack - rsm->r_start);
 		}
 		rack->r_ctl.rc_holes_rxt -= rsm->r_rtr_bytes;
 		rsm->r_rtr_bytes = 0;
 		rsm->r_start = th_ack;
 	}
 proc_sack:
 	/* Check for reneging */
 	rsm = TAILQ_FIRST(&rack->r_ctl.rc_map);
 	if (rsm && (rsm->r_flags & RACK_ACKED) && (th_ack == rsm->r_start)) {
 		/*
 		 * The peer has moved snd_una up to
 		 * the edge of this send, i.e. one
 		 * that it had previously acked. The only
 		 * way that can be true if the peer threw
 		 * away data (space issues) that it had
 		 * previously sacked (else it would have 
 		 * given us snd_una up to (rsm->r_end).
 		 * We need to undo the acked markings here.
 		 *
 		 * Note we have to look to make sure th_ack is
 		 * our rsm->r_start in case we get an old ack
 		 * where th_ack is behind snd_una.
 		 */
 		rack_peer_reneges(rack, rsm, th->th_ack);
 	}
 	if ((to->to_flags & TOF_SACK) == 0) {
 		/* We are done nothing left to log */
 		goto out;
 	}
 	rsm = TAILQ_LAST_FAST(&rack->r_ctl.rc_map, rack_sendmap, r_next);
 	if (rsm) {
 		last_seq = rsm->r_end;
 	} else {
 		last_seq = tp->snd_max;
 	}
 	/* Sack block processing */
 	if (SEQ_GT(th_ack, tp->snd_una))
 		ack_point = th_ack;
 	else
 		ack_point = tp->snd_una;
 	for (i = 0; i < to->to_nsacks; i++) {
 		bcopy((to->to_sacks + i * TCPOLEN_SACK),
 		    &sack, sizeof(sack));
 		sack.start = ntohl(sack.start);
 		sack.end = ntohl(sack.end);
 		if (SEQ_GT(sack.end, sack.start) &&
 		    SEQ_GT(sack.start, ack_point) &&
 		    SEQ_LT(sack.start, tp->snd_max) &&
 		    SEQ_GT(sack.end, ack_point) &&
 		    SEQ_LEQ(sack.end, tp->snd_max)) {
 			if ((rack->r_ctl.rc_num_maps_alloced > rack_sack_block_limit) &&
 			    (SEQ_LT(sack.end, last_seq)) &&
 			    ((sack.end - sack.start) < (tp->t_maxseg / 8))) {
 				/*
 				 * Not the last piece and its smaller than
 				 * 1/8th of a MSS. We ignore this.
 				 */
 				counter_u64_add(rack_runt_sacks, 1);
 				continue;
 			}
 			sack_blocks[num_sack_blks] = sack;
 			num_sack_blks++;
 		} else if (SEQ_LEQ(sack.start, th_ack) &&
 			   SEQ_LEQ(sack.end, th_ack)) {
 			/*
 			 * Its a D-SACK block.
 			 */
 /*			tcp_record_dsack(sack.start, sack.end); */
 		}
 	}
 	if (num_sack_blks == 0)
 		goto out;
 	/*
 	 * Sort the SACK blocks so we can update the rack scoreboard with
 	 * just one pass.
 	 */
 	if (rack_use_sack_filter) {
 		num_sack_blks = sack_filter_blks(&rack->r_ctl.rack_sf, sack_blocks,
 						 num_sack_blks, th->th_ack);
 		ctf_log_sack_filter(rack->rc_tp, num_sack_blks, sack_blocks);
 	}
 	if (num_sack_blks < 2) {
 		goto do_sack_work;
 	}
 	/* Sort the sacks */
 	for (i = 0; i < num_sack_blks; i++) {
 		for (j = i + 1; j < num_sack_blks; j++) {
 			if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) {
 				sack = sack_blocks[i];
 				sack_blocks[i] = sack_blocks[j];
 				sack_blocks[j] = sack;
 			}
 		}
 	}
 	/*
 	 * Now are any of the sack block ends the same (yes some
 	 * implememtations send these)?
 	 */
 again:
 	if (num_sack_blks > 1) {
 		for (i = 0; i < num_sack_blks; i++) {
 			for (j = i + 1; j < num_sack_blks; j++) {
 				if (sack_blocks[i].end == sack_blocks[j].end) {
 					/*
 					 * Ok these two have the same end we
 					 * want the smallest end and then
 					 * throw away the larger and start
 					 * again.
 					 */
 					if (SEQ_LT(sack_blocks[j].start, sack_blocks[i].start)) {
 						/*
 						 * The second block covers
 						 * more area use that
 						 */
 						sack_blocks[i].start = sack_blocks[j].start;
 					}
 					/*
 					 * Now collapse out the dup-sack and
 					 * lower the count
 					 */
 					for (k = (j + 1); k < num_sack_blks; k++) {
 						sack_blocks[j].start = sack_blocks[k].start;
 						sack_blocks[j].end = sack_blocks[k].end;
 						j++;
 					}
 					num_sack_blks--;
 					goto again;
 				}
 			}
 		}
 	}
 do_sack_work:
 	rsm = rack->r_ctl.rc_sacklast;
 	for (i = 0; i < num_sack_blks; i++) {
 		acked = rack_proc_sack_blk(tp, rack, &sack_blocks[i], to, &rsm, cts);
 		if (acked) {
 			rack->r_wanted_output++;
 			changed += acked;
 			sack_changed += acked;
 		}
 	}
 out:
 	if (changed) {
 		/* Something changed cancel the rack timer */
 		rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
 	}
 	if ((sack_changed) && (!IN_RECOVERY(tp->t_flags))) {
 		/*
 		 * Ok we have a high probability that we need to go in to
 		 * recovery since we have data sack'd
 		 */
 		struct rack_sendmap *rsm;
 		uint32_t tsused;
 
 		tsused = tcp_ts_getticks();
 		rsm = tcp_rack_output(tp, rack, tsused);
 		if (rsm) {
 			/* Enter recovery */
 			rack->r_ctl.rc_rsm_start = rsm->r_start;
 			rack->r_ctl.rc_cwnd_at = tp->snd_cwnd;
 			rack->r_ctl.rc_ssthresh_at = tp->snd_ssthresh;
 			entered_recovery = 1;
 			rack_cong_signal(tp, NULL, CC_NDUPACK);
 			/*
 			 * When we enter recovery we need to assure we send
 			 * one packet.
 			 */
 			rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg;
 			rack->r_timer_override = 1;
 		}
 	}
 	if (IN_RECOVERY(tp->t_flags) && (entered_recovery == 0)) {
 		/* Deal with changed an PRR here (in recovery only) */
 		uint32_t pipe, snd_una;
 
 		rack->r_ctl.rc_prr_delivered += changed;
 		/* Compute prr_sndcnt */
 		if (SEQ_GT(tp->snd_una, th_ack)) {
 			snd_una = tp->snd_una;
 		} else {
 			snd_una = th_ack;
 		}
 		pipe = ((tp->snd_max - snd_una) - rack->r_ctl.rc_sacked) + rack->r_ctl.rc_holes_rxt;
 		if (pipe > tp->snd_ssthresh) {
 			long sndcnt;
 
 			sndcnt = rack->r_ctl.rc_prr_delivered * tp->snd_ssthresh;
 			if (rack->r_ctl.rc_prr_recovery_fs > 0)
 				sndcnt /= (long)rack->r_ctl.rc_prr_recovery_fs;
 			else {
 				rack->r_ctl.rc_prr_sndcnt = 0;
 				sndcnt = 0;
 			}
 			sndcnt++;
 			if (sndcnt > (long)rack->r_ctl.rc_prr_out)
 				sndcnt -= rack->r_ctl.rc_prr_out;
 			else
 				sndcnt = 0;
 			rack->r_ctl.rc_prr_sndcnt = sndcnt;
 		} else {
 			uint32_t limit;
 
 			if (rack->r_ctl.rc_prr_delivered > rack->r_ctl.rc_prr_out)
 				limit = (rack->r_ctl.rc_prr_delivered - rack->r_ctl.rc_prr_out);
 			else
 				limit = 0;
 			if (changed > limit)
 				limit = changed;
 			limit += tp->t_maxseg;
 			if (tp->snd_ssthresh > pipe) {
 				rack->r_ctl.rc_prr_sndcnt = min((tp->snd_ssthresh - pipe), limit);
 			} else {
 				rack->r_ctl.rc_prr_sndcnt = min(0, limit);
 			}
 		}
 		if (rack->r_ctl.rc_prr_sndcnt >= tp->t_maxseg) {
 			rack->r_timer_override = 1;
 		}
 	}
 }
 
 /*
  * Return value of 1, we do not need to call rack_process_data().
  * return value of 0, rack_process_data can be called.
  * For ret_val if its 0 the TCP is locked, if its non-zero
  * its unlocked and probably unsafe to touch the TCB.
  */
 static int
 rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, struct tcpopt *to,
     uint32_t tiwin, int32_t tlen,
     int32_t * ofia, int32_t thflags, int32_t * ret_val)
 {
 	int32_t ourfinisacked = 0;
 	int32_t nsegs, acked_amount;
 	int32_t acked;
 	struct mbuf *mfree;
 	struct tcp_rack *rack;
 	int32_t recovery = 0;
 
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	if (SEQ_GT(th->th_ack, tp->snd_max)) {
 		rack_do_dropafterack(m, tp, th, thflags, tlen, ret_val);
 		return (1);
 	}
 	if (SEQ_GEQ(th->th_ack, tp->snd_una) || to->to_nsacks) {
 		rack_log_ack(tp, to, th);
 	}
 	if (__predict_false(SEQ_LEQ(th->th_ack, tp->snd_una))) {
 		/*
 		 * Old ack, behind (or duplicate to) the last one rcv'd
 		 * Note: Should mark reordering is occuring! We should also
 		 * look for sack blocks arriving e.g. ack 1, 4-4 then ack 1,
 		 * 3-3, 4-4 would be reording. As well as ack 1, 3-3 <no
 		 * retran and> ack 3
 		 */
 		return (0);
 	}
 	/*
 	 * If we reach this point, ACK is not a duplicate, i.e., it ACKs
 	 * something we sent.
 	 */
 	if (tp->t_flags & TF_NEEDSYN) {
 		/*
 		 * T/TCP: Connection was half-synchronized, and our SYN has
 		 * been ACK'd (so connection is now fully synchronized).  Go
 		 * to non-starred state, increment snd_una for ACK of SYN,
 		 * and check if we can do window scaling.
 		 */
 		tp->t_flags &= ~TF_NEEDSYN;
 		tp->snd_una++;
 		/* Do window scaling? */
 		if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
 		    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
 			tp->rcv_scale = tp->request_r_scale;
 			/* Send window already scaled. */
 		}
 	}
 	nsegs = max(1, m->m_pkthdr.lro_nsegs);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	acked = BYTES_THIS_ACK(tp, th);
 	TCPSTAT_ADD(tcps_rcvackpack, nsegs);
 	TCPSTAT_ADD(tcps_rcvackbyte, acked);
 
 	/*
 	 * If we just performed our first retransmit, and the ACK arrives
 	 * within our recovery window, then it was a mistake to do the
 	 * retransmit in the first place.  Recover our original cwnd and
 	 * ssthresh, and proceed to transmit where we left off.
 	 */
 	if (tp->t_flags & TF_PREVVALID) {
 		tp->t_flags &= ~TF_PREVVALID;
 		if (tp->t_rxtshift == 1 &&
 		    (int)(ticks - tp->t_badrxtwin) < 0)
 			rack_cong_signal(tp, th, CC_RTO_ERR);
 	}
 	/*
 	 * If we have a timestamp reply, update smoothed round trip time. If
 	 * no timestamp is present but transmit timer is running and timed
 	 * sequence number was acked, update smoothed round trip time. Since
 	 * we now have an rtt measurement, cancel the timer backoff (cf.,
 	 * Phil Karn's retransmit alg.). Recompute the initial retransmit
 	 * timer.
 	 *
 	 * Some boxes send broken timestamp replies during the SYN+ACK
 	 * phase, ignore timestamps of 0 or we could calculate a huge RTT
 	 * and blow up the retransmit timer.
 	 */
 	/*
 	 * If all outstanding data is acked, stop retransmit timer and
 	 * remember to restart (more output or persist). If there is more
 	 * data to be acked, restart retransmit timer, using current
 	 * (possibly backed-off) value.
 	 */
 	if (th->th_ack == tp->snd_max) {
 		rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
 		rack->r_wanted_output++;
 	}
 	/*
 	 * If no data (only SYN) was ACK'd, skip rest of ACK processing.
 	 */
 	if (acked == 0) {
 		if (ofia)
 			*ofia = ourfinisacked;
 		return (0);
 	}
 	if (rack->r_ctl.rc_early_recovery) {
 		if (IN_RECOVERY(tp->t_flags)) {
 			if (SEQ_LT(th->th_ack, tp->snd_recover) &&
 			    (SEQ_LT(th->th_ack, tp->snd_max))) {
 				tcp_rack_partialack(tp, th);
 			} else {
 				rack_post_recovery(tp, th);
 				recovery = 1;
 			}
 		}
 	}
 	/*
 	 * Let the congestion control algorithm update congestion control
 	 * related information. This typically means increasing the
 	 * congestion window.
 	 */
 	rack_ack_received(tp, rack, th, nsegs, CC_ACK, recovery);
 	SOCKBUF_LOCK(&so->so_snd);
 	acked_amount = min(acked, (int)sbavail(&so->so_snd));
 	tp->snd_wnd -= acked_amount;
 	mfree = sbcut_locked(&so->so_snd, acked_amount);
 	if ((sbused(&so->so_snd) == 0) &&
 	    (acked > acked_amount) &&
 	    (tp->t_state >= TCPS_FIN_WAIT_1)) {
 		ourfinisacked = 1;
 	}
 	/* NB: sowwakeup_locked() does an implicit unlock. */
 	sowwakeup_locked(so);
 	m_freem(mfree);
 	if (rack->r_ctl.rc_early_recovery == 0) {
 		if (IN_RECOVERY(tp->t_flags)) {
 			if (SEQ_LT(th->th_ack, tp->snd_recover) &&
 			    (SEQ_LT(th->th_ack, tp->snd_max))) {
 				tcp_rack_partialack(tp, th);
 			} else {
 				rack_post_recovery(tp, th);
 			}
 		}
 	}
 	tp->snd_una = th->th_ack;
 	if (SEQ_GT(tp->snd_una, tp->snd_recover))
 		tp->snd_recover = tp->snd_una;
 
 	if (SEQ_LT(tp->snd_nxt, tp->snd_una)) {
 		tp->snd_nxt = tp->snd_una;
 	}
 	if (tp->snd_una == tp->snd_max) {
 		/* Nothing left outstanding */
 		rack_log_progress_event(rack, tp, 0, PROGRESS_CLEAR, __LINE__);
 		tp->t_acktime = 0;
 		rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
 		/* Set need output so persist might get set */
 		rack->r_wanted_output++;
 		if (rack_use_sack_filter)
 			sack_filter_clear(&rack->r_ctl.rack_sf, tp->snd_una);
 		if ((tp->t_state >= TCPS_FIN_WAIT_1) &&
 		    (sbavail(&so->so_snd) == 0) &&
 		    (tp->t_flags2 & TF2_DROP_AF_DATA)) {
 			/* 
 			 * The socket was gone and the
 			 * peer sent data, time to
 			 * reset him.
 			 */
 			*ret_val = 1;
 			tp = tcp_close(tp);
 			rack_do_dropwithreset(m, tp, th, BANDLIM_UNLIMITED, tlen);
 			return (1);
 		}
 	}
 	if (ofia)
 		*ofia = ourfinisacked;
 	return (0);
 }
 
 
 /*
  * Return value of 1, the TCB is unlocked and most
  * likely gone, return value of 0, the TCP is still
  * locked.
  */
 static int
 rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
 {
 	/*
 	 * Update window information. Don't look at window if no ACK: TAC's
 	 * send garbage on first SYN.
 	 */
 	int32_t nsegs;
 #ifdef TCP_RFC7413
 	int32_t tfo_syn;
 #else
 #define	tfo_syn	(FALSE)
 #endif
 	struct tcp_rack *rack;
 
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	nsegs = max(1, m->m_pkthdr.lro_nsegs);
 	if ((thflags & TH_ACK) &&
 	    (SEQ_LT(tp->snd_wl1, th->th_seq) ||
 	    (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
 	    (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
 		/* keep track of pure window updates */
 		if (tlen == 0 &&
 		    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
 			TCPSTAT_INC(tcps_rcvwinupd);
 		tp->snd_wnd = tiwin;
 		tp->snd_wl1 = th->th_seq;
 		tp->snd_wl2 = th->th_ack;
 		if (tp->snd_wnd > tp->max_sndwnd)
 			tp->max_sndwnd = tp->snd_wnd;
 		rack->r_wanted_output++;
 	} else if (thflags & TH_ACK) {
 		if ((tp->snd_wl2 == th->th_ack) && (tiwin < tp->snd_wnd)) {
 			tp->snd_wnd = tiwin;
 			tp->snd_wl1 = th->th_seq;
 			tp->snd_wl2 = th->th_ack;
 		}
 	}
 	/* Was persist timer active and now we have window space? */
 	if ((rack->rc_in_persist != 0) && tp->snd_wnd) {
 		rack_exit_persist(tp, rack);
 		tp->snd_nxt = tp->snd_max;
 		/* Make sure we output to start the timer */
 		rack->r_wanted_output++;
 	}
 	if (tp->t_flags2 & TF2_DROP_AF_DATA) {
 		m_freem(m);
 		return (0);
 	}
 	/*
 	 * Process segments with URG.
 	 */
 	if ((thflags & TH_URG) && th->th_urp &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		/*
 		 * This is a kludge, but if we receive and accept random
 		 * urgent pointers, we'll crash in soreceive.  It's hard to
 		 * imagine someone actually wanting to send this much urgent
 		 * data.
 		 */
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (th->th_urp + sbavail(&so->so_rcv) > sb_max) {
 			th->th_urp = 0;	/* XXX */
 			thflags &= ~TH_URG;	/* XXX */
 			SOCKBUF_UNLOCK(&so->so_rcv);	/* XXX */
 			goto dodata;	/* XXX */
 		}
 		/*
 		 * If this segment advances the known urgent pointer, then
 		 * mark the data stream.  This should not happen in
 		 * CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since a
 		 * FIN has been received from the remote side. In these
 		 * states we ignore the URG.
 		 *
 		 * According to RFC961 (Assigned Protocols), the urgent
 		 * pointer points to the last octet of urgent data.  We
 		 * continue, however, to consider it to indicate the first
 		 * octet of data past the urgent section as the original
 		 * spec states (in one of two places).
 		 */
 		if (SEQ_GT(th->th_seq + th->th_urp, tp->rcv_up)) {
 			tp->rcv_up = th->th_seq + th->th_urp;
 			so->so_oobmark = sbavail(&so->so_rcv) +
 			    (tp->rcv_up - tp->rcv_nxt) - 1;
 			if (so->so_oobmark == 0)
 				so->so_rcv.sb_state |= SBS_RCVATMARK;
 			sohasoutofband(so);
 			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 		}
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		/*
 		 * Remove out of band data so doesn't get presented to user.
 		 * This can happen independent of advancing the URG pointer,
 		 * but if two URG's are pending at once, some out-of-band
 		 * data may creep in... ick.
 		 */
 		if (th->th_urp <= (uint32_t) tlen &&
 		    !(so->so_options & SO_OOBINLINE)) {
 			/* hdr drop is delayed */
 			tcp_pulloutofband(so, th, m, drop_hdrlen);
 		}
 	} else {
 		/*
 		 * If no out of band data is expected, pull receive urgent
 		 * pointer along with the receive window.
 		 */
 		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
 			tp->rcv_up = tp->rcv_nxt;
 	}
 dodata:				/* XXX */
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Process the segment text, merging it into the TCP sequencing
 	 * queue, and arranging for acknowledgment of receipt if necessary.
 	 * This process logically involves adjusting tp->rcv_wnd as data is
 	 * presented to the user (this happens in tcp_usrreq.c, case
 	 * PRU_RCVD).  If a FIN has already been received on this connection
 	 * then we just ignore the text.
 	 */
 #ifdef TCP_RFC7413
 	tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
 	    (tp->t_flags & TF_FASTOPEN));
 #endif
 	if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		tcp_seq save_start = th->th_seq;
 		tcp_seq save_rnxt  = tp->rcv_nxt;
 		int     save_tlen  = tlen;
 
 		m_adj(m, drop_hdrlen);	/* delayed header drop */
 		/*
 		 * Insert segment which includes th into TCP reassembly
 		 * queue with control block tp.  Set thflags to whether
 		 * reassembly now includes a segment with FIN.  This handles
 		 * the common case inline (segment is the next to be
 		 * received on an established connection, and the queue is
 		 * empty), avoiding linkage into and removal from the queue
 		 * and repetition of various conversions. Set DELACK for
 		 * segments received in order, but ack immediately when
 		 * segments are out of order (so fast retransmit can work).
 		 */
 		if (th->th_seq == tp->rcv_nxt &&
 		    SEGQ_EMPTY(tp) &&
 		    (TCPS_HAVEESTABLISHED(tp->t_state) ||
 		    tfo_syn)) {
 			if (DELAY_ACK(tp, tlen) || tfo_syn) {
 				rack_timer_cancel(tp, rack,
 				    rack->r_ctl.rc_rcvtime, __LINE__);
 				tp->t_flags |= TF_DELACK;
 			} else {
 				rack->r_wanted_output++;
 				tp->t_flags |= TF_ACKNOW;
 			}
 			tp->rcv_nxt += tlen;
 			thflags = th->th_flags & TH_FIN;
 			TCPSTAT_ADD(tcps_rcvpack, nsegs);
 			TCPSTAT_ADD(tcps_rcvbyte, tlen);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				m_freem(m);
 			else
 				sbappendstream_locked(&so->so_rcv, m, 0);
 			/* NB: sorwakeup_locked() does an implicit unlock. */
 			sorwakeup_locked(so);
 		} else {
 			/*
 			 * XXX: Due to the header drop above "th" is
 			 * theoretically invalid by now.  Fortunately
 			 * m_adj() doesn't actually frees any mbufs when
 			 * trimming from the head.
 			 */
 			tcp_seq temp = save_start;
 			thflags = tcp_reass(tp, th, &temp, &tlen, m);
 			tp->t_flags |= TF_ACKNOW;
 		}
 		if ((tp->t_flags & TF_SACK_PERMIT) && (save_tlen > 0)) {
 			if ((tlen == 0) && (SEQ_LT(save_start, save_rnxt))) {
 				/*
 				 * DSACK actually handled in the fastpath
 				 * above.
 				 */
 				tcp_update_sack_list(tp, save_start,
 				    save_start + save_tlen);
 			} else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) {
 				if ((tp->rcv_numsacks >= 1) &&
 				    (tp->sackblks[0].end == save_start)) {
 					/*
 					 * Partial overlap, recorded at todrop
 					 * above.
 					 */
 					tcp_update_sack_list(tp,
 					    tp->sackblks[0].start,
 					    tp->sackblks[0].end);
 				} else {
 					tcp_update_dsack_list(tp, save_start,
 					    save_start + save_tlen);
 				}
 			} else if (tlen >= save_tlen) {
 				/* Update of sackblks. */
 				tcp_update_dsack_list(tp, save_start,
 				    save_start + save_tlen);
 			} else if (tlen > 0) {
 				tcp_update_dsack_list(tp, save_start,
 				    save_start + tlen);
 			}
 		}
 	} else {
 		m_freem(m);
 		thflags &= ~TH_FIN;
 	}
 
 	/*
 	 * If FIN is received ACK the FIN and let the user know that the
 	 * connection is closing.
 	 */
 	if (thflags & TH_FIN) {
 		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 			socantrcvmore(so);
 			/*
 			 * If connection is half-synchronized (ie NEEDSYN
 			 * flag on) then delay ACK, so it may be piggybacked
 			 * when SYN is sent. Otherwise, since we received a
 			 * FIN then no more input can be expected, send ACK
 			 * now.
 			 */
 			if (tp->t_flags & TF_NEEDSYN) {
 				rack_timer_cancel(tp, rack,
 				    rack->r_ctl.rc_rcvtime, __LINE__);
 				tp->t_flags |= TF_DELACK;
 			} else {
 				tp->t_flags |= TF_ACKNOW;
 			}
 			tp->rcv_nxt++;
 		}
 		switch (tp->t_state) {
 
 			/*
 			 * In SYN_RECEIVED and ESTABLISHED STATES enter the
 			 * CLOSE_WAIT state.
 			 */
 		case TCPS_SYN_RECEIVED:
 			tp->t_starttime = ticks;
 			/* FALLTHROUGH */
 		case TCPS_ESTABLISHED:
 			rack_timer_cancel(tp, rack,
 			    rack->r_ctl.rc_rcvtime, __LINE__);
 			tcp_state_change(tp, TCPS_CLOSE_WAIT);
 			break;
 
 			/*
 			 * If still in FIN_WAIT_1 STATE FIN has not been
 			 * acked so enter the CLOSING state.
 			 */
 		case TCPS_FIN_WAIT_1:
 			rack_timer_cancel(tp, rack,
 			    rack->r_ctl.rc_rcvtime, __LINE__);
 			tcp_state_change(tp, TCPS_CLOSING);
 			break;
 
 			/*
 			 * In FIN_WAIT_2 state enter the TIME_WAIT state,
 			 * starting the time-wait timer, turning off the
 			 * other standard timers.
 			 */
 		case TCPS_FIN_WAIT_2:
 			rack_timer_cancel(tp, rack,
 			    rack->r_ctl.rc_rcvtime, __LINE__);
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 			tcp_twstart(tp);
 			return (1);
 		}
 	}
 	/*
 	 * Return any desired output.
 	 */
 	if ((tp->t_flags & TF_ACKNOW) ||
 	    (sbavail(&so->so_snd) > (tp->snd_max - tp->snd_una))) {
 		rack->r_wanted_output++;
 	}
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	return (0);
 }
 
 /*
  * Here nothing is really faster, its just that we
  * have broken out the fast-data path also just like
  * the fast-ack.
  */
 static int
 rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
     uint32_t tiwin, int32_t nxt_pkt)
 {
 	int32_t nsegs;
 	int32_t newsize = 0;	/* automatic sockbuf scaling */
 	struct tcp_rack *rack;
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 
 #endif
 	/*
 	 * If last ACK falls within this segment's sequence numbers, record
 	 * the timestamp. NOTE that the test is modified according to the
 	 * latest proposal of the tcplw@cray.com list (Braden 1993/04/26).
 	 */
 	if (__predict_false(th->th_seq != tp->rcv_nxt)) {
 		return (0);
 	}
 	if (__predict_false(tp->snd_nxt != tp->snd_max)) {
 		return (0);
 	}
 	if (tiwin && tiwin != tp->snd_wnd) {
 		return (0);
 	}
 	if (__predict_false((tp->t_flags & (TF_NEEDSYN | TF_NEEDFIN)))) {
 		return (0);
 	}
 	if (__predict_false((to->to_flags & TOF_TS) &&
 	    (TSTMP_LT(to->to_tsval, tp->ts_recent)))) {
 		return (0);
 	}
 	if (__predict_false((th->th_ack != tp->snd_una))) {
 		return (0);
 	}
 	if (__predict_false(tlen > sbspace(&so->so_rcv))) {
 		return (0);
 	}
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	/*
 	 * This is a pure, in-sequence data packet with nothing on the
 	 * reassembly queue and we have enough buffer space to take it.
 	 */
 	nsegs = max(1, m->m_pkthdr.lro_nsegs);
 
 
 	/* Clean receiver SACK report if present */
 	if (tp->rcv_numsacks)
 	        tcp_clean_sackreport(tp);
 	TCPSTAT_INC(tcps_preddat);
 	tp->rcv_nxt += tlen;
 	/*
 	 * Pull snd_wl1 up to prevent seq wrap relative to th_seq.
 	 */
 	tp->snd_wl1 = th->th_seq;
 	/*
 	 * Pull rcv_up up to prevent seq wrap relative to rcv_nxt.
 	 */
 	tp->rcv_up = tp->rcv_nxt;
 	TCPSTAT_ADD(tcps_rcvpack, nsegs);
 	TCPSTAT_ADD(tcps_rcvbyte, tlen);
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_INPUT, ostate, tp,
 		    (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
 	newsize = tcp_autorcvbuf(m, th, so, tp, tlen);
 
 	/* Add data to socket buffer. */
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		m_freem(m);
 	} else {
 		/*
 		 * Set new socket buffer size. Give up when limit is
 		 * reached.
 		 */
 		if (newsize)
 			if (!sbreserve_locked(&so->so_rcv,
 			    newsize, so, NULL))
 				so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
 		m_adj(m, drop_hdrlen);	/* delayed header drop */
 		sbappendstream_locked(&so->so_rcv, m, 0);
 		rack_calc_rwin(so, tp);
 	}
 	/* NB: sorwakeup_locked() does an implicit unlock. */
 	sorwakeup_locked(so);
 	if (DELAY_ACK(tp, tlen)) {
 		rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
 		tp->t_flags |= TF_DELACK;
 	} else {
 		tp->t_flags |= TF_ACKNOW;
 		rack->r_wanted_output++;
 	}
 	if ((tp->snd_una == tp->snd_max) && rack_use_sack_filter)
 		sack_filter_clear(&rack->r_ctl.rack_sf, tp->snd_una);
 	return (1);
 }
 
 /*
  * This subfunction is used to try to highly optimize the
  * fast path. We again allow window updates that are
  * in sequence to remain in the fast-path. We also add
  * in the __predict's to attempt to help the compiler.
  * Note that if we return a 0, then we can *not* process
  * it and the caller should push the packet into the
  * slow-path.
  */
 static int
 rack_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
     uint32_t tiwin, int32_t nxt_pkt, uint32_t cts)
 {
 	int32_t acked;
 	int32_t nsegs;
 
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 
 #endif
 	struct tcp_rack *rack;
 
 	if (__predict_false(SEQ_LEQ(th->th_ack, tp->snd_una))) {
 		/* Old ack, behind (or duplicate to) the last one rcv'd */
 		return (0);
 	}
 	if (__predict_false(SEQ_GT(th->th_ack, tp->snd_max))) {
 		/* Above what we have sent? */
 		return (0);
 	}
 	if (__predict_false(tp->snd_nxt != tp->snd_max)) {
 		/* We are retransmitting */
 		return (0);
 	}
 	if (__predict_false(tiwin == 0)) {
 		/* zero window */
 		return (0);
 	}
 	if (__predict_false(tp->t_flags & (TF_NEEDSYN | TF_NEEDFIN))) {
 		/* We need a SYN or a FIN, unlikely.. */
 		return (0);
 	}
 	if ((to->to_flags & TOF_TS) && __predict_false(TSTMP_LT(to->to_tsval, tp->ts_recent))) {
 		/* Timestamp is behind .. old ack with seq wrap? */
 		return (0);
 	}
 	if (__predict_false(IN_RECOVERY(tp->t_flags))) {
 		/* Still recovering */
 		return (0);
 	}
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	if (rack->r_ctl.rc_sacked) {
 		/* We have sack holes on our scoreboard */
 		return (0);
 	}
 	/* Ok if we reach here, we can process a fast-ack */
 	nsegs = max(1, m->m_pkthdr.lro_nsegs);
 	rack_log_ack(tp, to, th);
 	/* Did the window get updated? */
 	if (tiwin != tp->snd_wnd) {
 		tp->snd_wnd = tiwin;
 		tp->snd_wl1 = th->th_seq;
 		if (tp->snd_wnd > tp->max_sndwnd)
 			tp->max_sndwnd = tp->snd_wnd;
 	}
 	if ((rack->rc_in_persist != 0) && (tp->snd_wnd >= tp->t_maxseg)) {
 		rack_exit_persist(tp, rack);
 	}
 	/*
 	 * If last ACK falls within this segment's sequence numbers, record
 	 * the timestamp. NOTE that the test is modified according to the
 	 * latest proposal of the tcplw@cray.com list (Braden 1993/04/26).
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 	/*
 	 * This is a pure ack for outstanding data.
 	 */
 	TCPSTAT_INC(tcps_predack);
 
 	/*
 	 * "bad retransmit" recovery.
 	 */
 	if (tp->t_flags & TF_PREVVALID) {
 		tp->t_flags &= ~TF_PREVVALID;
 		if (tp->t_rxtshift == 1 &&
 		    (int)(ticks - tp->t_badrxtwin) < 0)
 			rack_cong_signal(tp, th, CC_RTO_ERR);
 	}
 	/*
 	 * Recalculate the transmit timer / rtt.
 	 *
 	 * Some boxes send broken timestamp replies during the SYN+ACK
 	 * phase, ignore timestamps of 0 or we could calculate a huge RTT
 	 * and blow up the retransmit timer.
 	 */
 	acked = BYTES_THIS_ACK(tp, th);
 
 #ifdef TCP_HHOOK
 	/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
 	hhook_run_tcp_est_in(tp, th, to);
 #endif
 
 	TCPSTAT_ADD(tcps_rcvackpack, nsegs);
 	TCPSTAT_ADD(tcps_rcvackbyte, acked);
 	sbdrop(&so->so_snd, acked);
 	/*
 	 * Let the congestion control algorithm update congestion control
 	 * related information. This typically means increasing the
 	 * congestion window.
 	 */
 	rack_ack_received(tp, rack, th, nsegs, CC_ACK, 0);
 
 	tp->snd_una = th->th_ack;
 	/*
 	 * Pull snd_wl2 up to prevent seq wrap relative to th_ack.
 	 */
 	tp->snd_wl2 = th->th_ack;
 	tp->t_dupacks = 0;
 	m_freem(m);
 	/* ND6_HINT(tp);	 *//* Some progress has been made. */
 
 	/*
 	 * If all outstanding data are acked, stop retransmit timer,
 	 * otherwise restart timer using current (possibly backed-off)
 	 * value. If process is waiting for space, wakeup/selwakeup/signal.
 	 * If data are ready to send, let tcp_output decide between more
 	 * output or persist.
 	 */
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_INPUT, ostate, tp,
 		    (void *)tcp_saveipgen,
 		    &tcp_savetcp, 0);
 #endif
 	if (tp->snd_una == tp->snd_max) {
 		rack_log_progress_event(rack, tp, 0, PROGRESS_CLEAR, __LINE__);
 		tp->t_acktime = 0;
 		rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
 	}
 	/* Wake up the socket if we have room to write more */
 	sowwakeup(so);
 	if (sbavail(&so->so_snd)) {
 		rack->r_wanted_output++;
 	}
 	return (1);
 }
 
 /*
  * Return value of 1, the TCB is unlocked and most
  * likely gone, return value of 0, the TCP is still
  * locked.
  */
 static int
 rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
 {
 	int32_t ret_val = 0;
 	int32_t todrop;
 	int32_t ourfinisacked = 0;
 
 	rack_calc_rwin(so, tp);
 	/*
 	 * If the state is SYN_SENT: if seg contains an ACK, but not for our
 	 * SYN, drop the input. if seg contains a RST, then drop the
 	 * connection. if seg does not contain SYN, then drop it. Otherwise
 	 * this is an acceptable SYN segment initialize tp->rcv_nxt and
 	 * tp->irs if seg contains ack then advance tp->snd_una if seg
 	 * contains an ECE and ECN support is enabled, the stream is ECN
 	 * capable. if SYN has been acked change to ESTABLISHED else
 	 * SYN_RCVD state arrange for segment to be acked (eventually)
 	 * continue processing rest of data/controls, beginning with URG
 	 */
 	if ((thflags & TH_ACK) &&
 	    (SEQ_LEQ(th->th_ack, tp->iss) ||
 	    SEQ_GT(th->th_ack, tp->snd_max))) {
 		rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
 		return (1);
 	}
 	if ((thflags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) {
 		TCP_PROBE5(connect__refused, NULL, tp,
 		    mtod(m, const char *), tp, th);
 		tp = tcp_drop(tp, ECONNREFUSED);
 		rack_do_drop(m, tp);
 		return (1);
 	}
 	if (thflags & TH_RST) {
 		rack_do_drop(m, tp);
 		return (1);
 	}
 	if (!(thflags & TH_SYN)) {
 		rack_do_drop(m, tp);
 		return (1);
 	}
 	tp->irs = th->th_seq;
 	tcp_rcvseqinit(tp);
 	if (thflags & TH_ACK) {
 		TCPSTAT_INC(tcps_connects);
 		soisconnected(so);
 #ifdef MAC
 		mac_socketpeer_set_from_mbuf(m, so);
 #endif
 		/* Do window scaling on this connection? */
 		if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
 		    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
 			tp->rcv_scale = tp->request_r_scale;
 		}
 		tp->rcv_adv += min(tp->rcv_wnd,
 		    TCP_MAXWIN << tp->rcv_scale);
 		/*
 		 * If there's data, delay ACK; if there's also a FIN ACKNOW
 		 * will be turned on later.
 		 */
 		if (DELAY_ACK(tp, tlen) && tlen != 0) {
 			rack_timer_cancel(tp, (struct tcp_rack *)tp->t_fb_ptr,
 					  ((struct tcp_rack *)tp->t_fb_ptr)->r_ctl.rc_rcvtime, __LINE__);
 			tp->t_flags |= TF_DELACK;
 		} else {
 			((struct tcp_rack *)tp->t_fb_ptr)->r_wanted_output++;
 			tp->t_flags |= TF_ACKNOW;
 		}
 
 		if ((thflags & TH_ECE) && V_tcp_do_ecn) {
 			tp->t_flags |= TF_ECN_PERMIT;
 			TCPSTAT_INC(tcps_ecn_shs);
 		}
 		/*
 		 * Received <SYN,ACK> in SYN_SENT[*] state. Transitions:
 		 * SYN_SENT  --> ESTABLISHED SYN_SENT* --> FIN_WAIT_1
 		 */
 		tp->t_starttime = ticks;
 		if (tp->t_flags & TF_NEEDFIN) {
 			tcp_state_change(tp, TCPS_FIN_WAIT_1);
 			tp->t_flags &= ~TF_NEEDFIN;
 			thflags &= ~TH_SYN;
 		} else {
 			tcp_state_change(tp, TCPS_ESTABLISHED);
 			TCP_PROBE5(connect__established, NULL, tp,
 			    mtod(m, const char *), tp, th);
 			cc_conn_init(tp);
 		}
 	} else {
 		/*
 		 * Received initial SYN in SYN-SENT[*] state => simultaneous
 		 * open.  If segment contains CC option and there is a
 		 * cached CC, apply TAO test. If it succeeds, connection is *
 		 * half-synchronized. Otherwise, do 3-way handshake:
 		 * SYN-SENT -> SYN-RECEIVED SYN-SENT* -> SYN-RECEIVED* If
 		 * there was no CC option, clear cached CC value.
 		 */
 		tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
 		tcp_state_change(tp, TCPS_SYN_RECEIVED);
 	}
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	/*
 	 * Advance th->th_seq to correspond to first data byte. If data,
 	 * trim to stay within window, dropping FIN if necessary.
 	 */
 	th->th_seq++;
 	if (tlen > tp->rcv_wnd) {
 		todrop = tlen - tp->rcv_wnd;
 		m_adj(m, -todrop);
 		tlen = tp->rcv_wnd;
 		thflags &= ~TH_FIN;
 		TCPSTAT_INC(tcps_rcvpackafterwin);
 		TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
 	}
 	tp->snd_wl1 = th->th_seq - 1;
 	tp->rcv_up = th->th_seq;
 	/*
 	 * Client side of transaction: already sent SYN and data. If the
 	 * remote host used T/TCP to validate the SYN, our data will be
 	 * ACK'd; if so, enter normal data segment processing in the middle
 	 * of step 5, ack processing. Otherwise, goto step 6.
 	 */
 	if (thflags & TH_ACK) {
 		if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val))
 			return (ret_val);
 		/* We may have changed to FIN_WAIT_1 above */
 		if (tp->t_state == TCPS_FIN_WAIT_1) {
 			/*
 			 * In FIN_WAIT_1 STATE in addition to the processing
 			 * for the ESTABLISHED state if our FIN is now
 			 * acknowledged then enter FIN_WAIT_2.
 			 */
 			if (ourfinisacked) {
 				/*
 				 * If we can't receive any more data, then
 				 * closing user can proceed. Starting the
 				 * timer is contrary to the specification,
 				 * but if we don't get a FIN we'll hang
 				 * forever.
 				 *
 				 * XXXjl: we should release the tp also, and
 				 * use a compressed state.
 				 */
 				if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 					soisdisconnected(so);
 					tcp_timer_activate(tp, TT_2MSL,
 					    (tcp_fast_finwait2_recycle ?
 					    tcp_finwait2_timeout :
 					    TP_MAXIDLE(tp)));
 				}
 				tcp_state_change(tp, TCPS_FIN_WAIT_2);
 			}
 		}
 	}
 	return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 	    tiwin, thflags, nxt_pkt));
 }
 
 /*
  * Return value of 1, the TCB is unlocked and most
  * likely gone, return value of 0, the TCP is still
  * locked.
  */
 static int
 rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
 {
 	int32_t ret_val = 0;
 	int32_t ourfinisacked = 0;
 
 	rack_calc_rwin(so, tp);
 
 	if ((thflags & TH_ACK) &&
 	    (SEQ_LEQ(th->th_ack, tp->snd_una) ||
 	    SEQ_GT(th->th_ack, tp->snd_max))) {
 		rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
 		return (1);
 	}
 #ifdef TCP_RFC7413
 	if (tp->t_flags & TF_FASTOPEN) {
 		/*
 		 * When a TFO connection is in SYN_RECEIVED, the only valid
 		 * packets are the initial SYN, a retransmit/copy of the
 		 * initial SYN (possibly with a subset of the original
 		 * data), a valid ACK, a FIN, or a RST.
 		 */
 		if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) {
 			rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
 			return (1);
 		} else if (thflags & TH_SYN) {
 			/* non-initial SYN is ignored */
 			struct tcp_rack *rack;
 
 			rack = (struct tcp_rack *)tp->t_fb_ptr;
 			if ((rack->r_ctl.rc_hpts_flags & PACE_TMR_RXT) ||
 			    (rack->r_ctl.rc_hpts_flags & PACE_TMR_TLP) ||
 			    (rack->r_ctl.rc_hpts_flags & PACE_TMR_RACK)) {
 				rack_do_drop(m, NULL);
 				return (0);
 			}
 		} else if (!(thflags & (TH_ACK | TH_FIN | TH_RST))) {
 			rack_do_drop(m, NULL);
 			return (0);
 		}
 	}
 #endif
 	if (thflags & TH_RST)
 		return (rack_process_rst(m, th, so, tp));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
 		rack_challenge_ack(m, th, tp, &ret_val);
 		return (ret_val);
 	}
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment and
 	 * it's less than ts_recent, drop it.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
 	    TSTMP_LT(to->to_tsval, tp->ts_recent)) {
 		if (rack_ts_check(m, th, tp, tlen, thflags, &ret_val))
 			return (ret_val);
 	}
 	/*
 	 * In the SYN-RECEIVED state, validate that the packet belongs to
 	 * this connection before trimming the data to fit the receive
 	 * window.  Check the sequence number versus IRS since we know the
 	 * sequence numbers haven't wrapped.  This is a partial fix for the
 	 * "LAND" DoS attack.
 	 */
 	if (SEQ_LT(th->th_seq, tp->irs)) {
 		rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
 		return (1);
 	}
 	if (rack_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) {
 		return (ret_val);
 	}
 	/*
 	 * If last ACK falls within this segment's sequence numbers, record
 	 * its timestamp. NOTE: 1) That the test incorporates suggestions
 	 * from the latest proposal of the tcplw@cray.com list (Braden
 	 * 1993/04/26). 2) That updating only on newer timestamps interferes
 	 * with our earlier PAWS tests, so this check should be solely
 	 * predicated on the sequence space of this segment. 3) That we
 	 * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ
 	 * + SEG.Len  instead of RFC1323's Last.ACK.Sent < SEG.SEQ +
 	 * SEG.Len, This modified check allows us to overcome RFC1323's
 	 * limitations as described in Stevens TCP/IP Illustrated Vol. 2
 	 * p.869. In such cases, we can still calculate the RTT correctly
 	 * when RCV.NXT == Last.ACK.Sent.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
 	    SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
 	    ((thflags & (TH_SYN | TH_FIN)) != 0))) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 	/*
 	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN flag
 	 * is on (half-synchronized state), then queue data for later
 	 * processing; else drop segment and return.
 	 */
 	if ((thflags & TH_ACK) == 0) {
 #ifdef TCP_RFC7413
 		if (tp->t_flags & TF_FASTOPEN) {
 			tp->snd_wnd = tiwin;
 			cc_conn_init(tp);
 		}
 #endif
 		return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 		    tiwin, thflags, nxt_pkt));
 	}
 	TCPSTAT_INC(tcps_connects);
 	soisconnected(so);
 	/* Do window scaling? */
 	if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
 	    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
 		tp->rcv_scale = tp->request_r_scale;
 		tp->snd_wnd = tiwin;
 	}
 	/*
 	 * Make transitions: SYN-RECEIVED  -> ESTABLISHED SYN-RECEIVED* ->
 	 * FIN-WAIT-1
 	 */
 	tp->t_starttime = ticks;
 	if (tp->t_flags & TF_NEEDFIN) {
 		tcp_state_change(tp, TCPS_FIN_WAIT_1);
 		tp->t_flags &= ~TF_NEEDFIN;
 	} else {
 		tcp_state_change(tp, TCPS_ESTABLISHED);
 		TCP_PROBE5(accept__established, NULL, tp,
 		    mtod(m, const char *), tp, th);
 #ifdef TCP_RFC7413
 		if (tp->t_tfo_pending) {
 			tcp_fastopen_decrement_counter(tp->t_tfo_pending);
 			tp->t_tfo_pending = NULL;
 
 			/*
 			 * Account for the ACK of our SYN prior to regular
 			 * ACK processing below.
 			 */
 			tp->snd_una++;
 		}
 		/*
 		 * TFO connections call cc_conn_init() during SYN
 		 * processing.  Calling it again here for such connections
 		 * is not harmless as it would undo the snd_cwnd reduction
 		 * that occurs when a TFO SYN|ACK is retransmitted.
 		 */
 		if (!(tp->t_flags & TF_FASTOPEN))
 #endif
 			cc_conn_init(tp);
 	}
 	/*
 	 * If segment contains data or ACK, will call tcp_reass() later; if
 	 * not, do so now to pass queued data to user.
 	 */
 	if (tlen == 0 && (thflags & TH_FIN) == 0)
 		(void)tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
 		    (struct mbuf *)0);
 	tp->snd_wl1 = th->th_seq - 1;
 	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
 		return (ret_val);
 	}
 	if (tp->t_state == TCPS_FIN_WAIT_1) {
 		/* We could have went to FIN_WAIT_1 (or EST) above */
 		/*
 		 * In FIN_WAIT_1 STATE in addition to the processing for the
 		 * ESTABLISHED state if our FIN is now acknowledged then
 		 * enter FIN_WAIT_2.
 		 */
 		if (ourfinisacked) {
 			/*
 			 * If we can't receive any more data, then closing
 			 * user can proceed. Starting the timer is contrary
 			 * to the specification, but if we don't get a FIN
 			 * we'll hang forever.
 			 *
 			 * XXXjl: we should release the tp also, and use a
 			 * compressed state.
 			 */
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 				soisdisconnected(so);
 				tcp_timer_activate(tp, TT_2MSL,
 				    (tcp_fast_finwait2_recycle ?
 				    tcp_finwait2_timeout :
 				    TP_MAXIDLE(tp)));
 			}
 			tcp_state_change(tp, TCPS_FIN_WAIT_2);
 		}
 	}
 	return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 	    tiwin, thflags, nxt_pkt));
 }
 
 /*
  * Return value of 1, the TCB is unlocked and most
  * likely gone, return value of 0, the TCP is still
  * locked.
  */
 static int
 rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
 {
 	int32_t ret_val = 0;
 
 	/*
 	 * Header prediction: check for the two common cases of a
 	 * uni-directional data xfer.  If the packet has no control flags,
 	 * is in-sequence, the window didn't change and we're not
 	 * retransmitting, it's a candidate.  If the length is zero and the
 	 * ack moved forward, we're the sender side of the xfer.  Just free
 	 * the data acked & wake any higher level process that was blocked
 	 * waiting for space.  If the length is non-zero and the ack didn't
 	 * move, we're the receiver side.  If we're getting packets in-order
 	 * (the reassembly queue is empty), add the data toc The socket
 	 * buffer and note that we need a delayed ack. Make sure that the
 	 * hidden state-flags are also off. Since we check for
 	 * TCPS_ESTABLISHED first, it can only be TH_NEEDSYN.
 	 */
 	if (__predict_true(((to->to_flags & TOF_SACK) == 0)) &&
 	    __predict_true((thflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) == TH_ACK) &&
 	    __predict_true(SEGQ_EMPTY(tp)) &&
 	    __predict_true(th->th_seq == tp->rcv_nxt)) {
 		struct tcp_rack *rack;
 
 		rack = (struct tcp_rack *)tp->t_fb_ptr;
 		if (tlen == 0) {
 			if (rack_fastack(m, th, so, tp, to, drop_hdrlen, tlen,
 			    tiwin, nxt_pkt, rack->r_ctl.rc_rcvtime)) {
 				return (0);
 			}
 		} else {
 			if (rack_do_fastnewdata(m, th, so, tp, to, drop_hdrlen, tlen,
 			    tiwin, nxt_pkt)) {
 				return (0);
 			}
 		}
 	}
 	rack_calc_rwin(so, tp);
 
 	if (thflags & TH_RST)
 		return (rack_process_rst(m, th, so, tp));
 
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
 		rack_challenge_ack(m, th, tp, &ret_val);
 		return (ret_val);
 	}
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment and
 	 * it's less than ts_recent, drop it.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
 	    TSTMP_LT(to->to_tsval, tp->ts_recent)) {
 		if (rack_ts_check(m, th, tp, tlen, thflags, &ret_val))
 			return (ret_val);
 	}
 	if (rack_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) {
 		return (ret_val);
 	}
 	/*
 	 * If last ACK falls within this segment's sequence numbers, record
 	 * its timestamp. NOTE: 1) That the test incorporates suggestions
 	 * from the latest proposal of the tcplw@cray.com list (Braden
 	 * 1993/04/26). 2) That updating only on newer timestamps interferes
 	 * with our earlier PAWS tests, so this check should be solely
 	 * predicated on the sequence space of this segment. 3) That we
 	 * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ
 	 * + SEG.Len  instead of RFC1323's Last.ACK.Sent < SEG.SEQ +
 	 * SEG.Len, This modified check allows us to overcome RFC1323's
 	 * limitations as described in Stevens TCP/IP Illustrated Vol. 2
 	 * p.869. In such cases, we can still calculate the RTT correctly
 	 * when RCV.NXT == Last.ACK.Sent.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
 	    SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
 	    ((thflags & (TH_SYN | TH_FIN)) != 0))) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 	/*
 	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN flag
 	 * is on (half-synchronized state), then queue data for later
 	 * processing; else drop segment and return.
 	 */
 	if ((thflags & TH_ACK) == 0) {
 		if (tp->t_flags & TF_NEEDSYN) {
 
 			return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 			    tiwin, thflags, nxt_pkt));
 
 		} else if (tp->t_flags & TF_ACKNOW) {
 			rack_do_dropafterack(m, tp, th, thflags, tlen, &ret_val);
 			return (ret_val);
 		} else {
 			rack_do_drop(m, NULL);
 			return (0);
 		}
 	}
 	/*
 	 * Ack processing.
 	 */
 	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, NULL, thflags, &ret_val)) {
 		return (ret_val);
 	}
 	if (sbavail(&so->so_snd)) {
 		if (rack_progress_timeout_check(tp)) {
 			tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT);
 			rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
 			return (1);
 		}
 	}
 	/* State changes only happen in rack_process_data() */
 	return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 	    tiwin, thflags, nxt_pkt));
 }
 
 /*
  * Return value of 1, the TCB is unlocked and most
  * likely gone, return value of 0, the TCP is still
  * locked.
  */
 static int
 rack_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
 {
 	int32_t ret_val = 0;
 
 	rack_calc_rwin(so, tp);
 	if (thflags & TH_RST)
 		return (rack_process_rst(m, th, so, tp));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
 		rack_challenge_ack(m, th, tp, &ret_val);
 		return (ret_val);
 	}
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment and
 	 * it's less than ts_recent, drop it.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
 	    TSTMP_LT(to->to_tsval, tp->ts_recent)) {
 		if (rack_ts_check(m, th, tp, tlen, thflags, &ret_val))
 			return (ret_val);
 	}
 	if (rack_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) {
 		return (ret_val);
 	}
 	/*
 	 * If last ACK falls within this segment's sequence numbers, record
 	 * its timestamp. NOTE: 1) That the test incorporates suggestions
 	 * from the latest proposal of the tcplw@cray.com list (Braden
 	 * 1993/04/26). 2) That updating only on newer timestamps interferes
 	 * with our earlier PAWS tests, so this check should be solely
 	 * predicated on the sequence space of this segment. 3) That we
 	 * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ
 	 * + SEG.Len  instead of RFC1323's Last.ACK.Sent < SEG.SEQ +
 	 * SEG.Len, This modified check allows us to overcome RFC1323's
 	 * limitations as described in Stevens TCP/IP Illustrated Vol. 2
 	 * p.869. In such cases, we can still calculate the RTT correctly
 	 * when RCV.NXT == Last.ACK.Sent.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
 	    SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
 	    ((thflags & (TH_SYN | TH_FIN)) != 0))) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 	/*
 	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN flag
 	 * is on (half-synchronized state), then queue data for later
 	 * processing; else drop segment and return.
 	 */
 	if ((thflags & TH_ACK) == 0) {
 		if (tp->t_flags & TF_NEEDSYN) {
 			return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 			    tiwin, thflags, nxt_pkt));
 
 		} else if (tp->t_flags & TF_ACKNOW) {
 			rack_do_dropafterack(m, tp, th, thflags, tlen, &ret_val);
 			return (ret_val);
 		} else {
 			rack_do_drop(m, NULL);
 			return (0);
 		}
 	}
 	/*
 	 * Ack processing.
 	 */
 	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, NULL, thflags, &ret_val)) {
 		return (ret_val);
 	}
 	if (sbavail(&so->so_snd)) {
 		if (rack_progress_timeout_check(tp)) {
 			tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT);
 			rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
 			return (1);
 		}
 	}
 	return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 	    tiwin, thflags, nxt_pkt));
 }
 
 static int
 rack_check_data_after_close(struct mbuf *m, 
     struct tcpcb *tp, int32_t *tlen, struct tcphdr *th, struct socket *so)
 {
 	struct tcp_rack *rack; 
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	if (rack->rc_allow_data_af_clo == 0) {
 	close_now:
 		tp = tcp_close(tp);
 		TCPSTAT_INC(tcps_rcvafterclose);
 		rack_do_dropwithreset(m, tp, th, BANDLIM_UNLIMITED, (*tlen));
 		return (1);
 	}
 	if (sbavail(&so->so_snd) == 0)
 		goto close_now;
 	/* Ok we allow data that is ignored and a followup reset */
 	tp->rcv_nxt = th->th_seq + *tlen;
 	tp->t_flags2 |= TF2_DROP_AF_DATA;
 	rack->r_wanted_output = 1;
 	*tlen = 0;
 	return (0);
 }
 
 /*
  * Return value of 1, the TCB is unlocked and most
  * likely gone, return value of 0, the TCP is still
  * locked.
  */
 static int
 rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
 {
 	int32_t ret_val = 0;
 	int32_t ourfinisacked = 0;
 
 	rack_calc_rwin(so, tp);
 
 	if (thflags & TH_RST)
 		return (rack_process_rst(m, th, so, tp));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
 		rack_challenge_ack(m, th, tp, &ret_val);
 		return (ret_val);
 	}
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment and
 	 * it's less than ts_recent, drop it.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
 	    TSTMP_LT(to->to_tsval, tp->ts_recent)) {
 		if (rack_ts_check(m, th, tp, tlen, thflags, &ret_val))
 			return (ret_val);
 	}
 	if (rack_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) {
 		return (ret_val);
 	}
 	/*
 	 * If new data are received on a connection after the user processes
 	 * are gone, then RST the other end.
 	 */
 	if ((so->so_state & SS_NOFDREF) && tlen) {
 		if (rack_check_data_after_close(m, tp, &tlen, th, so))
 			return (1);
 	}
 	/*
 	 * If last ACK falls within this segment's sequence numbers, record
 	 * its timestamp. NOTE: 1) That the test incorporates suggestions
 	 * from the latest proposal of the tcplw@cray.com list (Braden
 	 * 1993/04/26). 2) That updating only on newer timestamps interferes
 	 * with our earlier PAWS tests, so this check should be solely
 	 * predicated on the sequence space of this segment. 3) That we
 	 * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ
 	 * + SEG.Len  instead of RFC1323's Last.ACK.Sent < SEG.SEQ +
 	 * SEG.Len, This modified check allows us to overcome RFC1323's
 	 * limitations as described in Stevens TCP/IP Illustrated Vol. 2
 	 * p.869. In such cases, we can still calculate the RTT correctly
 	 * when RCV.NXT == Last.ACK.Sent.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
 	    SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
 	    ((thflags & (TH_SYN | TH_FIN)) != 0))) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 	/*
 	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN flag
 	 * is on (half-synchronized state), then queue data for later
 	 * processing; else drop segment and return.
 	 */
 	if ((thflags & TH_ACK) == 0) {
 		if (tp->t_flags & TF_NEEDSYN) {
 			return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 			    tiwin, thflags, nxt_pkt));
 		} else if (tp->t_flags & TF_ACKNOW) {
 			rack_do_dropafterack(m, tp, th, thflags, tlen, &ret_val);
 			return (ret_val);
 		} else {
 			rack_do_drop(m, NULL);
 			return (0);
 		}
 	}
 	/*
 	 * Ack processing.
 	 */
 	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
 		return (ret_val);
 	}
 	if (ourfinisacked) {
 		/*
 		 * If we can't receive any more data, then closing user can
 		 * proceed. Starting the timer is contrary to the
 		 * specification, but if we don't get a FIN we'll hang
 		 * forever.
 		 *
 		 * XXXjl: we should release the tp also, and use a
 		 * compressed state.
 		 */
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 			soisdisconnected(so);
 			tcp_timer_activate(tp, TT_2MSL,
 			    (tcp_fast_finwait2_recycle ?
 			    tcp_finwait2_timeout :
 			    TP_MAXIDLE(tp)));
 		}
 		tcp_state_change(tp, TCPS_FIN_WAIT_2);
 	}
 	if (sbavail(&so->so_snd)) {
 		if (rack_progress_timeout_check(tp)) {
 			tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT);
 			rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
 			return (1);
 		}
 	}
 	return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 	    tiwin, thflags, nxt_pkt));
 }
 
 /*
  * Return value of 1, the TCB is unlocked and most
  * likely gone, return value of 0, the TCP is still
  * locked.
  */
 static int
 rack_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
 {
 	int32_t ret_val = 0;
 	int32_t ourfinisacked = 0;
 
 	rack_calc_rwin(so, tp);
 
 	if (thflags & TH_RST)
 		return (rack_process_rst(m, th, so, tp));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
 		rack_challenge_ack(m, th, tp, &ret_val);
 		return (ret_val);
 	}
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment and
 	 * it's less than ts_recent, drop it.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
 	    TSTMP_LT(to->to_tsval, tp->ts_recent)) {
 		if (rack_ts_check(m, th, tp, tlen, thflags, &ret_val))
 			return (ret_val);
 	}
 	if (rack_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) {
 		return (ret_val);
 	}
 	/*
 	 * If new data are received on a connection after the user processes
 	 * are gone, then RST the other end.
 	 */
 	if ((so->so_state & SS_NOFDREF) && tlen) {
 		if (rack_check_data_after_close(m, tp, &tlen, th, so))
 			return (1);
 	}
 	/*
 	 * If last ACK falls within this segment's sequence numbers, record
 	 * its timestamp. NOTE: 1) That the test incorporates suggestions
 	 * from the latest proposal of the tcplw@cray.com list (Braden
 	 * 1993/04/26). 2) That updating only on newer timestamps interferes
 	 * with our earlier PAWS tests, so this check should be solely
 	 * predicated on the sequence space of this segment. 3) That we
 	 * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ
 	 * + SEG.Len  instead of RFC1323's Last.ACK.Sent < SEG.SEQ +
 	 * SEG.Len, This modified check allows us to overcome RFC1323's
 	 * limitations as described in Stevens TCP/IP Illustrated Vol. 2
 	 * p.869. In such cases, we can still calculate the RTT correctly
 	 * when RCV.NXT == Last.ACK.Sent.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
 	    SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
 	    ((thflags & (TH_SYN | TH_FIN)) != 0))) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 	/*
 	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN flag
 	 * is on (half-synchronized state), then queue data for later
 	 * processing; else drop segment and return.
 	 */
 	if ((thflags & TH_ACK) == 0) {
 		if (tp->t_flags & TF_NEEDSYN) {
 			return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 			    tiwin, thflags, nxt_pkt));
 		} else if (tp->t_flags & TF_ACKNOW) {
 			rack_do_dropafterack(m, tp, th, thflags, tlen, &ret_val);
 			return (ret_val);
 		} else {
 			rack_do_drop(m, NULL);
 			return (0);
 		}
 	}
 	/*
 	 * Ack processing.
 	 */
 	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
 		return (ret_val);
 	}
 	if (ourfinisacked) {
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 		tcp_twstart(tp);
 		m_freem(m);
 		return (1);
 	}
 	if (sbavail(&so->so_snd)) {
 		if (rack_progress_timeout_check(tp)) {
 			tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT);
 			rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
 			return (1);
 		}
 	}
 	return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 	    tiwin, thflags, nxt_pkt));
 }
 
 /*
  * Return value of 1, the TCB is unlocked and most
  * likely gone, return value of 0, the TCP is still
  * locked.
  */
 static int
 rack_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
 {
 	int32_t ret_val = 0;
 	int32_t ourfinisacked = 0;
 
 	rack_calc_rwin(so, tp);
 
 	if (thflags & TH_RST)
 		return (rack_process_rst(m, th, so, tp));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
 		rack_challenge_ack(m, th, tp, &ret_val);
 		return (ret_val);
 	}
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment and
 	 * it's less than ts_recent, drop it.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
 	    TSTMP_LT(to->to_tsval, tp->ts_recent)) {
 		if (rack_ts_check(m, th, tp, tlen, thflags, &ret_val))
 			return (ret_val);
 	}
 	if (rack_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) {
 		return (ret_val);
 	}
 	/*
 	 * If new data are received on a connection after the user processes
 	 * are gone, then RST the other end.
 	 */
 	if ((so->so_state & SS_NOFDREF) && tlen) {
 		if (rack_check_data_after_close(m, tp, &tlen, th, so))
 			return (1);
 	}
 	/*
 	 * If last ACK falls within this segment's sequence numbers, record
 	 * its timestamp. NOTE: 1) That the test incorporates suggestions
 	 * from the latest proposal of the tcplw@cray.com list (Braden
 	 * 1993/04/26). 2) That updating only on newer timestamps interferes
 	 * with our earlier PAWS tests, so this check should be solely
 	 * predicated on the sequence space of this segment. 3) That we
 	 * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ
 	 * + SEG.Len  instead of RFC1323's Last.ACK.Sent < SEG.SEQ +
 	 * SEG.Len, This modified check allows us to overcome RFC1323's
 	 * limitations as described in Stevens TCP/IP Illustrated Vol. 2
 	 * p.869. In such cases, we can still calculate the RTT correctly
 	 * when RCV.NXT == Last.ACK.Sent.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
 	    SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
 	    ((thflags & (TH_SYN | TH_FIN)) != 0))) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 	/*
 	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN flag
 	 * is on (half-synchronized state), then queue data for later
 	 * processing; else drop segment and return.
 	 */
 	if ((thflags & TH_ACK) == 0) {
 		if (tp->t_flags & TF_NEEDSYN) {
 			return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 			    tiwin, thflags, nxt_pkt));
 		} else if (tp->t_flags & TF_ACKNOW) {
 			rack_do_dropafterack(m, tp, th, thflags, tlen, &ret_val);
 			return (ret_val);
 		} else {
 			rack_do_drop(m, NULL);
 			return (0);
 		}
 	}
 	/*
 	 * case TCPS_LAST_ACK: Ack processing.
 	 */
 	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
 		return (ret_val);
 	}
 	if (ourfinisacked) {
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 		tp = tcp_close(tp);
 		rack_do_drop(m, tp);
 		return (1);
 	}
 	if (sbavail(&so->so_snd)) {
 		if (rack_progress_timeout_check(tp)) {
 			tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT);
 			rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
 			return (1);
 		}
 	}
 	return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 	    tiwin, thflags, nxt_pkt));
 }
 
 
 /*
  * Return value of 1, the TCB is unlocked and most
  * likely gone, return value of 0, the TCP is still
  * locked.
  */
 static int
 rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
 {
 	int32_t ret_val = 0;
 	int32_t ourfinisacked = 0;
 
 	rack_calc_rwin(so, tp);
 
 	/* Reset receive buffer auto scaling when not in bulk receive mode. */
 	if (thflags & TH_RST)
 		return (rack_process_rst(m, th, so, tp));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
 		rack_challenge_ack(m, th, tp, &ret_val);
 		return (ret_val);
 	}
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment and
 	 * it's less than ts_recent, drop it.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
 	    TSTMP_LT(to->to_tsval, tp->ts_recent)) {
 		if (rack_ts_check(m, th, tp, tlen, thflags, &ret_val))
 			return (ret_val);
 	}
 	if (rack_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) {
 		return (ret_val);
 	}
 	/*
 	 * If new data are received on a connection after the user processes
 	 * are gone, then RST the other end.
 	 */
 	if ((so->so_state & SS_NOFDREF) &&
 	    tlen) {
 		if (rack_check_data_after_close(m, tp, &tlen, th, so))
 			return (1);
 	}
 	/*
 	 * If last ACK falls within this segment's sequence numbers, record
 	 * its timestamp. NOTE: 1) That the test incorporates suggestions
 	 * from the latest proposal of the tcplw@cray.com list (Braden
 	 * 1993/04/26). 2) That updating only on newer timestamps interferes
 	 * with our earlier PAWS tests, so this check should be solely
 	 * predicated on the sequence space of this segment. 3) That we
 	 * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ
 	 * + SEG.Len  instead of RFC1323's Last.ACK.Sent < SEG.SEQ +
 	 * SEG.Len, This modified check allows us to overcome RFC1323's
 	 * limitations as described in Stevens TCP/IP Illustrated Vol. 2
 	 * p.869. In such cases, we can still calculate the RTT correctly
 	 * when RCV.NXT == Last.ACK.Sent.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
 	    SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
 	    ((thflags & (TH_SYN | TH_FIN)) != 0))) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 	/*
 	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN flag
 	 * is on (half-synchronized state), then queue data for later
 	 * processing; else drop segment and return.
 	 */
 	if ((thflags & TH_ACK) == 0) {
 		if (tp->t_flags & TF_NEEDSYN) {
 			return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 			    tiwin, thflags, nxt_pkt));
 		} else if (tp->t_flags & TF_ACKNOW) {
 			rack_do_dropafterack(m, tp, th, thflags, tlen, &ret_val);
 			return (ret_val);
 		} else {
 			rack_do_drop(m, NULL);
 			return (0);
 		}
 	}
 	/*
 	 * Ack processing.
 	 */
 	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
 		return (ret_val);
 	}
 	if (sbavail(&so->so_snd)) {
 		if (rack_progress_timeout_check(tp)) {
 			tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT);
 			rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
 			return (1);
 		}
 	}
 	return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 	    tiwin, thflags, nxt_pkt));
 }
 
 
 static void inline
 rack_clear_rate_sample(struct tcp_rack *rack)
 {
 	rack->r_ctl.rack_rs.rs_flags = RACK_RTT_EMPTY;
 	rack->r_ctl.rack_rs.rs_rtt_cnt = 0;
 	rack->r_ctl.rack_rs.rs_rtt_tot = 0;
 }
 
 static int
 rack_init(struct tcpcb *tp)
 {
 	struct tcp_rack *rack = NULL;
 
 	tp->t_fb_ptr = uma_zalloc(rack_pcb_zone, M_NOWAIT);
 	if (tp->t_fb_ptr == NULL) {
 		/*
 		 * We need to allocate memory but cant. The INP and INP_INFO
 		 * locks and they are recusive (happens during setup. So a
 		 * scheme to drop the locks fails :(
 		 *
 		 */
 		return (ENOMEM);
 	}
 	memset(tp->t_fb_ptr, 0, sizeof(struct tcp_rack));
 
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	TAILQ_INIT(&rack->r_ctl.rc_map);
 	TAILQ_INIT(&rack->r_ctl.rc_free);
 	TAILQ_INIT(&rack->r_ctl.rc_tmap);
 	rack->rc_tp = tp;
 	if (tp->t_inpcb) {
 		rack->rc_inp = tp->t_inpcb;
 	}
 	/* Probably not needed but lets be sure */
 	rack_clear_rate_sample(rack);
 	rack->r_cpu = 0;
 	rack->r_ctl.rc_reorder_fade = rack_reorder_fade;
 	rack->rc_allow_data_af_clo = rack_ignore_data_after_close;
 	rack->r_ctl.rc_tlp_threshold = rack_tlp_thresh;
 	rack->rc_pace_reduce = rack_slot_reduction;
 	if (V_tcp_delack_enabled)
 		tp->t_delayed_ack = 1;
 	else
 		tp->t_delayed_ack = 0;
 	rack->rc_pace_max_segs = rack_hptsi_segments;
 	rack->r_ctl.rc_early_recovery_segs = rack_early_recovery_max_seg;
 	rack->r_ctl.rc_reorder_shift = rack_reorder_thresh;
 	rack->r_ctl.rc_pkt_delay = rack_pkt_delay;
 	rack->r_ctl.rc_prop_reduce = rack_use_proportional_reduce;
 	rack->r_idle_reduce_largest  = rack_reduce_largest_on_idle;
 	rack->r_enforce_min_pace = rack_min_pace_time;
 	rack->r_min_pace_seg_thresh = rack_min_pace_time_seg_req;
 	rack->r_ctl.rc_prop_rate = rack_proportional_rate;
 	rack->r_ctl.rc_tlp_cwnd_reduce = rack_lower_cwnd_at_tlp;
 	rack->r_ctl.rc_early_recovery = rack_early_recovery;
 	rack->rc_always_pace = rack_pace_every_seg;
 	rack->r_ctl.rc_rate_sample_method = rack_rate_sample_method;
 	rack->rack_tlp_threshold_use = rack_tlp_threshold_use;
 	rack->r_ctl.rc_prr_sendalot = rack_send_a_lot_in_prr;
 	rack->r_ctl.rc_min_to = rack_min_to;
 	rack->r_ctl.rc_prr_inc_var = rack_inc_var;
 	if (tp->snd_una != tp->snd_max) {
 		/* Create a send map for the current outstanding data */
 		struct rack_sendmap *rsm;
 
 		rsm = rack_alloc(rack);
 		if (rsm == NULL) {
 			uma_zfree(rack_pcb_zone, tp->t_fb_ptr);
 			tp->t_fb_ptr = NULL;
 			return (ENOMEM);
 		}
 		rsm->r_flags = RACK_OVERMAX;
 		rsm->r_tim_lastsent[0] = tcp_ts_getticks();
 		rsm->r_rtr_cnt = 1;
 		rsm->r_rtr_bytes = 0;
 		rsm->r_start = tp->snd_una;
 		rsm->r_end = tp->snd_max;
 		rsm->r_sndcnt = 0;
 		TAILQ_INSERT_TAIL(&rack->r_ctl.rc_map, rsm, r_next);
 		TAILQ_INSERT_TAIL(&rack->r_ctl.rc_tmap, rsm, r_tnext);
 		rsm->r_in_tmap = 1;
 	}
 	rack_stop_all_timers(tp);
 	rack_start_hpts_timer(rack, tp, tcp_ts_getticks(), __LINE__, 0, 0, 0);
 	return (0);
 }
 
 static int
 rack_handoff_ok(struct tcpcb *tp)
 {
 	if ((tp->t_state == TCPS_CLOSED) ||
 	    (tp->t_state == TCPS_LISTEN)) {
 		/* Sure no problem though it may not stick */
 		return (0);
 	}
 	if ((tp->t_state == TCPS_SYN_SENT) ||
 	    (tp->t_state == TCPS_SYN_RECEIVED)) {
 		/*
 		 * We really don't know you have to get to ESTAB or beyond
 		 * to tell.
 		 */
 		return (EAGAIN);
 	}
 	if (tp->t_flags & TF_SACK_PERMIT) {
 		return (0);
 	}
 	/*
 	 * If we reach here we don't do SACK on this connection so we can
 	 * never do rack.
 	 */
 	return (EINVAL);
 }
 
 static void
 rack_fini(struct tcpcb *tp, int32_t tcb_is_purged)
 {
 	if (tp->t_fb_ptr) {
 		struct tcp_rack *rack;
 		struct rack_sendmap *rsm;
 
 		rack = (struct tcp_rack *)tp->t_fb_ptr;
 #ifdef TCP_BLACKBOX
 		tcp_log_flowend(tp);
 #endif
 		rsm = TAILQ_FIRST(&rack->r_ctl.rc_map);
 		while (rsm) {
 			TAILQ_REMOVE(&rack->r_ctl.rc_map, rsm, r_next);
 			uma_zfree(rack_zone, rsm);
 			rsm = TAILQ_FIRST(&rack->r_ctl.rc_map);
 		}
 		rsm = TAILQ_FIRST(&rack->r_ctl.rc_free);
 		while (rsm) {
 			TAILQ_REMOVE(&rack->r_ctl.rc_free, rsm, r_next);
 			uma_zfree(rack_zone, rsm);
 			rsm = TAILQ_FIRST(&rack->r_ctl.rc_free);
 		}
 		rack->rc_free_cnt = 0;
 		uma_zfree(rack_pcb_zone, tp->t_fb_ptr);
 		tp->t_fb_ptr = NULL;
 	}
 	/* Make sure snd_nxt is correctly set */
 	tp->snd_nxt = tp->snd_max;
 }
 
 static void
 rack_set_state(struct tcpcb *tp, struct tcp_rack *rack)
 {
 	switch (tp->t_state) {
 	case TCPS_SYN_SENT:
 		rack->r_state = TCPS_SYN_SENT;
 		rack->r_substate = rack_do_syn_sent;
 		break;
 	case TCPS_SYN_RECEIVED:
 		rack->r_state = TCPS_SYN_RECEIVED;
 		rack->r_substate = rack_do_syn_recv;
 		break;
 	case TCPS_ESTABLISHED:
 		rack->r_state = TCPS_ESTABLISHED;
 		rack->r_substate = rack_do_established;
 		break;
 	case TCPS_CLOSE_WAIT:
 		rack->r_state = TCPS_CLOSE_WAIT;
 		rack->r_substate = rack_do_close_wait;
 		break;
 	case TCPS_FIN_WAIT_1:
 		rack->r_state = TCPS_FIN_WAIT_1;
 		rack->r_substate = rack_do_fin_wait_1;
 		break;
 	case TCPS_CLOSING:
 		rack->r_state = TCPS_CLOSING;
 		rack->r_substate = rack_do_closing;
 		break;
 	case TCPS_LAST_ACK:
 		rack->r_state = TCPS_LAST_ACK;
 		rack->r_substate = rack_do_lastack;
 		break;
 	case TCPS_FIN_WAIT_2:
 		rack->r_state = TCPS_FIN_WAIT_2;
 		rack->r_substate = rack_do_fin_wait_2;
 		break;
 	case TCPS_LISTEN:
 	case TCPS_CLOSED:
 	case TCPS_TIME_WAIT:
 	default:
 		break;
 	};
 }
 
 
 static void
 rack_timer_audit(struct tcpcb *tp, struct tcp_rack *rack, struct sockbuf *sb)
 {
 	/*
 	 * We received an ack, and then did not
 	 * call send or were bounced out due to the
 	 * hpts was running. Now a timer is up as well, is
 	 * it the right timer?
 	 */
 	struct rack_sendmap *rsm;
 	int tmr_up;
 	
 	tmr_up = rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK;
 	if (rack->rc_in_persist && (tmr_up == PACE_TMR_PERSIT))
 		return;
 	rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
 	if (((rsm == NULL) || (tp->t_state < TCPS_ESTABLISHED)) &&
 	    (tmr_up == PACE_TMR_RXT)) {
 		/* Should be an RXT */
 		return;
 	}
 	if (rsm == NULL) {
 		/* Nothing outstanding? */
 		if (tp->t_flags & TF_DELACK) {
 			if (tmr_up == PACE_TMR_DELACK)
 				/* We are supposed to have delayed ack up and we do */
 				return;
 		} else if (sbavail(&tp->t_inpcb->inp_socket->so_snd) && (tmr_up == PACE_TMR_RXT)) {
 			/* 
 			 * if we hit enobufs then we would expect the possiblity
 			 * of nothing outstanding and the RXT up (and the hptsi timer).
 			 */
 			return;
 		} else if (((tcp_always_keepalive ||
 			     rack->rc_inp->inp_socket->so_options & SO_KEEPALIVE) &&
 			    (tp->t_state <= TCPS_CLOSING)) &&
 			   (tmr_up == PACE_TMR_KEEP) &&
 			   (tp->snd_max == tp->snd_una)) {
 			/* We should have keep alive up and we do */
 			return;
 		}
 	}
 	if (rsm && (rsm->r_flags & RACK_SACK_PASSED)) {
 		if ((tp->t_flags & TF_SENTFIN) &&
 		    ((tp->snd_max - tp->snd_una) == 1) &&
 		    (rsm->r_flags & RACK_HAS_FIN)) {
 			/* needs to be a RXT */
 			if (tmr_up == PACE_TMR_RXT)
 				return;
 		} else if (tmr_up == PACE_TMR_RACK)
 			return;
 	} else if (SEQ_GT(tp->snd_max,tp->snd_una) &&
 		   ((tmr_up == PACE_TMR_TLP) ||
 		    (tmr_up == PACE_TMR_RXT))) {
 		/* 
 		 * Either a TLP or RXT is fine if no sack-passed 
 		 * is in place and data is outstanding.
 		 */
 		return;
 	} else if (tmr_up == PACE_TMR_DELACK) {
 		/*
 		 * If the delayed ack was going to go off
 		 * before the rtx/tlp/rack timer were going to
 		 * expire, then that would be the timer in control.
 		 * Note we don't check the time here trusting the
 		 * code is correct.
 		 */
 		return;
 	}
 	/* 
 	 * Ok the timer originally started is not what we want now.
 	 * We will force the hpts to be stopped if any, and restart
 	 * with the slot set to what was in the saved slot.
 	 */
 	rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
 	rack_start_hpts_timer(rack, tp, tcp_ts_getticks(), __LINE__, 0, 0, 0);
 }
 
 static void
 rack_hpts_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, uint8_t iptos,
     int32_t nxt_pkt, struct timeval *tv)
 {
 	int32_t thflags, retval, did_out = 0;
 	int32_t way_out = 0;
 	uint32_t cts;
 	uint32_t tiwin;
 	struct tcpopt to;
 	struct tcp_rack *rack;
 	struct rack_sendmap *rsm;
 	int32_t prev_state = 0;
 
 	cts = tcp_tv_to_mssectick(tv);
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 
 	kern_prefetch(rack, &prev_state);
 	prev_state = 0;
 	thflags = th->th_flags;
 	/*
 	 * If this is either a state-changing packet or current state isn't
 	 * established, we require a read lock on tcbinfo.  Otherwise, we
 	 * allow the tcbinfo to be in either locked or unlocked, as the
 	 * caller may have unnecessarily acquired a lock due to a race.
 	 */
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
 	    __func__));
 	KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
 	    __func__));
 	{
 		union tcp_log_stackspecific log;
 
 		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
 		log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts;
 		log.u_bbr.ininput = rack->rc_inp->inp_in_input;
 		log.u_bbr.flex2 = rack->r_ctl.rc_num_maps_alloced;
 		TCP_LOG_EVENT(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_IN, 0,
 		    tlen, &log, true);
 	}
 	/*
 	 * Segment received on connection. Reset idle time and keep-alive
 	 * timer. XXX: This should be done after segment validation to
 	 * ignore broken/spoofed segs.
 	 */
 	if  (tp->t_idle_reduce && (tp->snd_max == tp->snd_una)) {
 		if ((ticks - tp->t_rcvtime) >= tp->t_rxtcur) {
 			counter_u64_add(rack_input_idle_reduces, 1);
 			rack_cc_after_idle(tp,
 			    (rack->r_idle_reduce_largest ? 1 :0));
 		}
 	}
 	rack->r_ctl.rc_rcvtime = cts;
 	tp->t_rcvtime = ticks;
 
 	/*
 	 * Unscale the window into a 32-bit value. For the SYN_SENT state
 	 * the scale is zero.
 	 */
 	tiwin = th->th_win << tp->snd_scale;
 #ifdef NETFLIX_STATS
 	stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
 #endif
 	/*
 	 * TCP ECN processing. XXXJTL: If we ever use ECN, we need to move
 	 * this to occur after we've validated the segment.
 	 */
 	if (tp->t_flags & TF_ECN_PERMIT) {
 		if (thflags & TH_CWR)
 			tp->t_flags &= ~TF_ECN_SND_ECE;
 		switch (iptos & IPTOS_ECN_MASK) {
 		case IPTOS_ECN_CE:
 			tp->t_flags |= TF_ECN_SND_ECE;
 			TCPSTAT_INC(tcps_ecn_ce);
 			break;
 		case IPTOS_ECN_ECT0:
 			TCPSTAT_INC(tcps_ecn_ect0);
 			break;
 		case IPTOS_ECN_ECT1:
 			TCPSTAT_INC(tcps_ecn_ect1);
 			break;
 		}
 		/* Congestion experienced. */
 		if (thflags & TH_ECE) {
 			rack_cong_signal(tp, th, CC_ECN);
 		}
 	}
 	/*
 	 * Parse options on any incoming segment.
 	 */
 	tcp_dooptions(&to, (u_char *)(th + 1),
 	    (th->th_off << 2) - sizeof(struct tcphdr),
 	    (thflags & TH_SYN) ? TO_SYN : 0);
 
 	/*
 	 * If echoed timestamp is later than the current time, fall back to
 	 * non RFC1323 RTT calculation.  Normalize timestamp if syncookies
 	 * were used when this connection was established.
 	 */
 	if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
 		to.to_tsecr -= tp->ts_offset;
 		if (TSTMP_GT(to.to_tsecr, cts))
 			to.to_tsecr = 0;
 	}
 	/*
 	 * If its the first time in we need to take care of options and
 	 * verify we can do SACK for rack!
 	 */
 	if (rack->r_state == 0) {
 		/* Should be init'd by rack_init() */
 		KASSERT(rack->rc_inp != NULL,
 		    ("%s: rack->rc_inp unexpectedly NULL", __func__));
 		if (rack->rc_inp == NULL) {
 			rack->rc_inp = tp->t_inpcb;
 		}
 
 		/*
 		 * Process options only when we get SYN/ACK back. The SYN
 		 * case for incoming connections is handled in tcp_syncache.
 		 * According to RFC1323 the window field in a SYN (i.e., a
 		 * <SYN> or <SYN,ACK>) segment itself is never scaled. XXX
 		 * this is traditional behavior, may need to be cleaned up.
 		 */
 		rack->r_cpu = inp_to_cpuid(tp->t_inpcb);
 		if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
 			if ((to.to_flags & TOF_SCALE) &&
 			    (tp->t_flags & TF_REQ_SCALE)) {
 				tp->t_flags |= TF_RCVD_SCALE;
 				tp->snd_scale = to.to_wscale;
 			}
 			/*
 			 * Initial send window.  It will be updated with the
 			 * next incoming segment to the scaled value.
 			 */
 			tp->snd_wnd = th->th_win;
 			if (to.to_flags & TOF_TS) {
 				tp->t_flags |= TF_RCVD_TSTMP;
 				tp->ts_recent = to.to_tsval;
 				tp->ts_recent_age = cts;
 			}
 			if (to.to_flags & TOF_MSS)
 				tcp_mss(tp, to.to_mss);
 			if ((tp->t_flags & TF_SACK_PERMIT) &&
 			    (to.to_flags & TOF_SACKPERM) == 0)
 				tp->t_flags &= ~TF_SACK_PERMIT;
 		}
 		/*
 		 * At this point we are at the initial call. Here we decide
 		 * if we are doing RACK or not. We do this by seeing if
 		 * TF_SACK_PERMIT is set, if not rack is *not* possible and
 		 * we switch to the default code.
 		 */
 		if ((tp->t_flags & TF_SACK_PERMIT) == 0) {
 			tcp_switch_back_to_default(tp);
 			(*tp->t_fb->tfb_tcp_do_segment) (m, th, so, tp, drop_hdrlen,
 			    tlen, iptos);
 			return;
 		}
 		/* Set the flag */
 		rack->r_is_v6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
 		tcp_set_hpts(tp->t_inpcb);
 		sack_filter_clear(&rack->r_ctl.rack_sf, th->th_ack);
 	}
 	/*
 	 * This is the one exception case where we set the rack state
 	 * always. All other times (timers etc) we must have a rack-state
 	 * set (so we assure we have done the checks above for SACK).
 	 */
 	if (rack->r_state != tp->t_state)
 		rack_set_state(tp, rack);
 	if (SEQ_GT(th->th_ack, tp->snd_una) && (rsm = TAILQ_FIRST(&rack->r_ctl.rc_map)) != NULL)
 		kern_prefetch(rsm, &prev_state);
 	prev_state = rack->r_state;
 	rack->r_ctl.rc_tlp_send_cnt = 0;
 	rack_clear_rate_sample(rack);
 	retval = (*rack->r_substate) (m, th, so,
 	    tp, &to, drop_hdrlen,
 	    tlen, tiwin, thflags, nxt_pkt);
 #ifdef INVARIANTS
 	if ((retval == 0) &&
 	    (tp->t_inpcb == NULL)) {
 		panic("retval:%d tp:%p t_inpcb:NULL state:%d",
 		    retval, tp, prev_state);
 	}
 #endif
 	if (retval == 0) {
 		/*
 		 * If retval is 1 the tcb is unlocked and most likely the tp
 		 * is gone.
 		 */
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 		tcp_rack_xmit_timer_commit(rack, tp);
 		if (nxt_pkt == 0) {
 			if (rack->r_wanted_output != 0) {
 				did_out = 1;
 				(void)tp->t_fb->tfb_tcp_output(tp);
 			}
 			rack_start_hpts_timer(rack, tp, cts, __LINE__, 0, 0, 0);
 		}
 		if (((rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) == 0) &&
 		    (SEQ_GT(tp->snd_max, tp->snd_una) ||
 		     (tp->t_flags & TF_DELACK) ||
 		     ((tcp_always_keepalive || rack->rc_inp->inp_socket->so_options & SO_KEEPALIVE) &&
 		      (tp->t_state <= TCPS_CLOSING)))) {
 			/* We could not send (probably in the hpts but stopped the timer earlier)? */
 			if ((tp->snd_max == tp->snd_una) &&
 			    ((tp->t_flags & TF_DELACK) == 0) &&
 			    (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT)) {
 				/* keep alive not needed if we are hptsi output yet */
 				;
 			} else {
 				if (rack->rc_inp->inp_in_hpts)
 					tcp_hpts_remove(rack->rc_inp, HPTS_REMOVE_OUTPUT);
 				rack_start_hpts_timer(rack, tp, tcp_ts_getticks(), __LINE__, 0, 0, 0);
 			}
 			way_out = 1;
 		} else {
 			/* Do we have the correct timer running? */
 			rack_timer_audit(tp, rack, &so->so_snd);
 			way_out = 2;
 		}
 		rack_log_doseg_done(rack, cts, nxt_pkt, did_out, way_out);
 		if (did_out)
 			rack->r_wanted_output = 0;
 #ifdef INVARIANTS
 		if (tp->t_inpcb == NULL) {
 			panic("OP:%d retval:%d tp:%p t_inpcb:NULL state:%d",
 			      did_out,
 			      retval, tp, prev_state);
 		}
 #endif
 		INP_WUNLOCK(tp->t_inpcb);
 	}
 }
 
 void
 rack_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, uint8_t iptos)
 {
 	struct timeval tv;
 #ifdef RSS
 	struct tcp_function_block *tfb;
 	struct tcp_rack *rack;
 	struct inpcb *inp;
 
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	if (rack->r_state == 0) {
 		/*
 		 * Initial input (ACK to SYN-ACK etc)lets go ahead and get
 		 * it processed
 		 */
 		tcp_get_usecs(&tv);
 		rack_hpts_do_segment(m, th, so, tp, drop_hdrlen,
 		    tlen, iptos, 0, &tv);
 		return;
 	}
 	tcp_queue_to_input(tp, m, th, tlen, drop_hdrlen, iptos);
 	INP_WUNLOCK(tp->t_inpcb);
 #else
 	tcp_get_usecs(&tv);
 	rack_hpts_do_segment(m, th, so, tp, drop_hdrlen,
 	    tlen, iptos, 0, &tv);
 #endif
 }
 
 struct rack_sendmap *
 tcp_rack_output(struct tcpcb *tp, struct tcp_rack *rack, uint32_t tsused)
 {
 	struct rack_sendmap *rsm = NULL;
 	int32_t idx;
 	uint32_t srtt_cur, srtt = 0, thresh = 0, ts_low = 0;
 
 	/* Return the next guy to be re-transmitted */
 	if (TAILQ_EMPTY(&rack->r_ctl.rc_map)) {
 		return (NULL);
 	}
 	if (tp->t_flags & TF_SENTFIN) {
 		/* retran the end FIN? */
 		return (NULL);
 	}
 	/* ok lets look at this one */
 	rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
 	if (rsm && ((rsm->r_flags & RACK_ACKED) == 0)) {
 		goto check_it;
 	}
 	rsm = rack_find_lowest_rsm(rack);
 	if (rsm == NULL) {
 		return (NULL);
 	}
 check_it:
 	srtt_cur = tp->t_srtt >> TCP_RTT_SHIFT;
 	srtt = TICKS_2_MSEC(srtt_cur);
 	if (rack->rc_rack_rtt && (srtt > rack->rc_rack_rtt))
 		srtt = rack->rc_rack_rtt;
 	if (rsm->r_flags & RACK_ACKED) {
 		return (NULL);
 	}
 	if ((rsm->r_flags & RACK_SACK_PASSED) == 0) {
 		/* Its not yet ready */
 		return (NULL);
 	}
 	idx = rsm->r_rtr_cnt - 1;
 	ts_low = rsm->r_tim_lastsent[idx];
 	thresh = rack_calc_thresh_rack(rack, srtt, tsused);
 	if (tsused <= ts_low) {
 		return (NULL);
 	}
 	if ((tsused - ts_low) >= thresh) {
 		return (rsm);
 	}
 	return (NULL);
 }
 
 static int
 rack_output(struct tcpcb *tp)
 {
 	struct socket *so;
 	uint32_t recwin, sendwin;
 	uint32_t sb_offset;
 	int32_t len, flags, error = 0;
 	struct mbuf *m;
 	struct mbuf *mb;
 	uint32_t if_hw_tsomaxsegcount = 0;
 	uint32_t if_hw_tsomaxsegsize;
 	long tot_len_this_send = 0;
 	struct ip *ip = NULL;
 #ifdef TCPDEBUG
 	struct ipovly *ipov = NULL;
 #endif
 #ifdef NETFLIX_TCP_O_UDP
 	struct udphdr *udp = NULL;
 #endif
 	struct tcp_rack *rack;
 	struct tcphdr *th;
 	uint8_t pass = 0;
 	u_char opt[TCP_MAXOLEN];
 	unsigned ipoptlen, optlen, hdrlen;
 #ifdef NETFLIX_TCP_O_UDP
 	unsigned ulen;
 #endif	
 	uint32_t rack_seq;
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	unsigned ipsec_optlen = 0;
 
 #endif
 	int32_t idle, sendalot;
 	int32_t sub_from_prr = 0;
 	volatile int32_t sack_rxmit;
 	struct rack_sendmap *rsm = NULL;
 	int32_t tso, mtu, would_have_fin = 0;
 	struct tcpopt to;
 	int32_t slot = 0;
 	uint32_t cts;
 	uint8_t hpts_calling, doing_tlp = 0;
 	int32_t do_a_prefetch;
 	int32_t prefetch_rsm = 0;
 	int32_t prefetch_so_done = 0;
 	struct tcp_log_buffer *lgb = NULL;
 	struct inpcb *inp;
 	struct sockbuf *sb;
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 	int32_t isipv6;
 #endif
 #ifdef KERN_TLS
 	const bool hw_tls = (so->so_snd.sb_flags & SB_TLS_IFNET) != 0;
 #else
 	const bool hw_tls = false;
 #endif
 
 	/* setup and take the cache hits here */
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	inp = rack->rc_inp;
 	so = inp->inp_socket;
 	sb = &so->so_snd;
 	kern_prefetch(sb, &do_a_prefetch);
 	do_a_prefetch = 1;
 	
 	INP_WLOCK_ASSERT(inp);
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE)
 		return (tcp_offload_output(tp));
 #endif
 
 #ifdef TCP_RFC7413
 	/*
 	 * For TFO connections in SYN_RECEIVED, only allow the initial
 	 * SYN|ACK and those sent by the retransmit timer.
 	 */
 	if ((tp->t_flags & TF_FASTOPEN) &&
 	    (tp->t_state == TCPS_SYN_RECEIVED) &&
 	    SEQ_GT(tp->snd_max, tp->snd_una) &&	/* inital SYN|ACK sent */
 	    (tp->snd_nxt != tp->snd_una))	/* not a retransmit */
 		return (0);
 #endif
 #ifdef INET6
 	if (rack->r_state) {
 		/* Use the cache line loaded if possible */
 		isipv6 = rack->r_is_v6;
 	} else {
 		isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 	}
 #endif
 	cts = tcp_ts_getticks();
 	if (((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0) &&
 	    inp->inp_in_hpts) {
 		/*
 		 * We are on the hpts for some timer but not hptsi output.
 		 * Remove from the hpts unconditionally.
 		 */
 		rack_timer_cancel(tp, rack, cts, __LINE__);
 	}
 	/* Mark that we have called rack_output(). */
 	if ((rack->r_timer_override) ||
 	    (tp->t_flags & TF_FORCEDATA) ||
 	    (tp->t_state < TCPS_ESTABLISHED)) {
 		if (tp->t_inpcb->inp_in_hpts)
 			tcp_hpts_remove(tp->t_inpcb, HPTS_REMOVE_OUTPUT);
 	} else if (tp->t_inpcb->inp_in_hpts) {
 		/*
 		 * On the hpts you can't pass even if ACKNOW is on, we will
 		 * when the hpts fires.
 		 */
 		counter_u64_add(rack_out_size[TCP_MSS_ACCT_INPACE], 1);
 		return (0);
 	}
 	hpts_calling = inp->inp_hpts_calls;
 	inp->inp_hpts_calls = 0;
 	if (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) {
 		if (rack_process_timers(tp, rack, cts, hpts_calling)) {
 			counter_u64_add(rack_out_size[TCP_MSS_ACCT_ATIMER], 1);
 			return (0);
 		}
 	}
 	rack->r_wanted_output = 0;
 	rack->r_timer_override = 0;
 	/*
 	 * Determine length of data that should be transmitted, and flags
 	 * that will be used. If there is some data or critical controls
 	 * (SYN, RST) to send, then transmit; otherwise, investigate
 	 * further.
 	 */
 	idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una);
 	if (tp->t_idle_reduce) {
 		if (idle && ((ticks - tp->t_rcvtime) >= tp->t_rxtcur))
 			rack_cc_after_idle(tp,
 		            (rack->r_idle_reduce_largest ? 1 :0));
 	}
 	tp->t_flags &= ~TF_LASTIDLE;
 	if (idle) {
 		if (tp->t_flags & TF_MORETOCOME) {
 			tp->t_flags |= TF_LASTIDLE;
 			idle = 0;
 		}
 	}
 again:
 	/*
 	 * If we've recently taken a timeout, snd_max will be greater than
 	 * snd_nxt.  There may be SACK information that allows us to avoid
 	 * resending already delivered data.  Adjust snd_nxt accordingly.
 	 */
 	sendalot = 0;
 	cts = tcp_ts_getticks();
 	tso = 0;
 	mtu = 0;
 	sb_offset = tp->snd_max - tp->snd_una;
 	sendwin = min(tp->snd_wnd, tp->snd_cwnd);
 
 	flags = tcp_outflags[tp->t_state];
 	/*
 	 * Send any SACK-generated retransmissions.  If we're explicitly
 	 * trying to send out new data (when sendalot is 1), bypass this
 	 * function. If we retransmit in fast recovery mode, decrement
 	 * snd_cwnd, since we're replacing a (future) new transmission with
 	 * a retransmission now, and we previously incremented snd_cwnd in
 	 * tcp_input().
 	 */
 	/*
 	 * Still in sack recovery , reset rxmit flag to zero.
 	 */
 	while (rack->rc_free_cnt < rack_free_cache) {
 		rsm = rack_alloc(rack);
 		if (rsm == NULL) {
 			if (inp->inp_hpts_calls)
 				/* Retry in a ms */
 				slot = 1;
 			goto just_return_nolock;
 		}
 		TAILQ_INSERT_TAIL(&rack->r_ctl.rc_free, rsm, r_next);
 		rack->rc_free_cnt++;
 		rsm = NULL;
 	}
 	if (inp->inp_hpts_calls)
 		inp->inp_hpts_calls = 0;
 	sack_rxmit = 0;
 	len = 0;
 	rsm = NULL;
 	if (flags & TH_RST) {
 		SOCKBUF_LOCK(sb);
 		goto send;
 	}
 	if (rack->r_ctl.rc_tlpsend) {
 		/* Tail loss probe */
 		long cwin;
 		long tlen;
 
 		doing_tlp = 1;
 		rsm = rack->r_ctl.rc_tlpsend;
 		rack->r_ctl.rc_tlpsend = NULL;
 		sack_rxmit = 1;
 		tlen = rsm->r_end - rsm->r_start;
 		if (tlen > tp->t_maxseg)
 			tlen = tp->t_maxseg;
 #ifdef INVARIANTS
 		if (SEQ_GT(tp->snd_una, rsm->r_start)) {
 			panic("tp:%p rack:%p snd_una:%u rsm:%p r_start:%u",
 			    tp, rack, tp->snd_una, rsm, rsm->r_start);
 		}
 #endif
 		sb_offset = rsm->r_start - tp->snd_una;
 		cwin = min(tp->snd_wnd, tlen);
 		len = cwin;
 	} else if (rack->r_ctl.rc_resend) {
 		/* Retransmit timer */
 		rsm = rack->r_ctl.rc_resend;
 		rack->r_ctl.rc_resend = NULL;
 		len = rsm->r_end - rsm->r_start;
 		sack_rxmit = 1;
 		sendalot = 0;
 		sb_offset = rsm->r_start - tp->snd_una;
 		if (len >= tp->t_maxseg) {
 			len = tp->t_maxseg;
 		}
 		KASSERT(sb_offset >= 0, ("%s: sack block to the left of una : %d",
 		    __func__, sb_offset));
 	} else if ((rack->rc_in_persist == 0) &&
 	    ((rsm = tcp_rack_output(tp, rack, cts)) != NULL)) {
 		long tlen;
 
 		if ((!IN_RECOVERY(tp->t_flags)) &&
 		    ((tp->t_flags & (TF_WASFRECOVERY | TF_WASCRECOVERY)) == 0)) {
 			/* Enter recovery if not induced by a time-out */
 			rack->r_ctl.rc_rsm_start = rsm->r_start;
 			rack->r_ctl.rc_cwnd_at = tp->snd_cwnd;
 			rack->r_ctl.rc_ssthresh_at = tp->snd_ssthresh;
 			rack_cong_signal(tp, NULL, CC_NDUPACK);
 			/*
 			 * When we enter recovery we need to assure we send
 			 * one packet.
 			 */
 			rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg;
 		}
 #ifdef INVARIANTS
 		if (SEQ_LT(rsm->r_start, tp->snd_una)) {
 			panic("Huh, tp:%p rack:%p rsm:%p start:%u < snd_una:%u\n",
 			    tp, rack, rsm, rsm->r_start, tp->snd_una);
 		}
 #endif
 		tlen = rsm->r_end - rsm->r_start;
 		sb_offset = rsm->r_start - tp->snd_una;
 		if (tlen > rack->r_ctl.rc_prr_sndcnt) {
 			len = rack->r_ctl.rc_prr_sndcnt;
 		} else {
 			len = tlen;
 		}
 		if (len >= tp->t_maxseg) {
 			sendalot = 1;
 			len = tp->t_maxseg;
 		} else {
 			sendalot = 0;
 			if ((rack->rc_timer_up == 0) &&
 			    (len < tlen)) {
 				/*
 				 * If its not a timer don't send a partial
 				 * segment.
 				 */
 				len = 0;
 				goto just_return_nolock;
 			}
 		}
 		KASSERT(sb_offset >= 0, ("%s: sack block to the left of una : %d",
 		    __func__, sb_offset));
 		if (len > 0) {
 			sub_from_prr = 1;
 			sack_rxmit = 1;
 			TCPSTAT_INC(tcps_sack_rexmits);
 			TCPSTAT_ADD(tcps_sack_rexmit_bytes,
 			    min(len, tp->t_maxseg));
 			counter_u64_add(rack_rtm_prr_retran, 1);
 		}
 	}
 	if (rsm && (rsm->r_flags & RACK_HAS_FIN)) {
 		/* we are retransmitting the fin */
 		len--;
 		if (len) {
 			/*
 			 * When retransmitting data do *not* include the
 			 * FIN. This could happen from a TLP probe.
 			 */
 			flags &= ~TH_FIN;
 		}
 	}
 #ifdef INVARIANTS
 	/* For debugging */
 	rack->r_ctl.rc_rsm_at_retran = rsm;
 #endif
 	/* 
 	 * Enforce a connection sendmap count limit if set 
 	 * as long as we are not retransmiting.
 	 */
 	if ((rsm == NULL) &&
 	    (rack_map_entries_limit > 0) &&
 	    (rack->r_ctl.rc_num_maps_alloced >= rack_map_entries_limit)) {
 		counter_u64_add(rack_to_alloc_limited, 1);
 		if (!rack->alloc_limit_reported) {
 			rack->alloc_limit_reported = 1;
 			counter_u64_add(rack_alloc_limited_conns, 1);
 		}
 		goto just_return_nolock;
 	}
 	/*
 	 * Get standard flags, and add SYN or FIN if requested by 'hidden'
 	 * state flags.
 	 */
 	if (tp->t_flags & TF_NEEDFIN)
 		flags |= TH_FIN;
 	if (tp->t_flags & TF_NEEDSYN)
 		flags |= TH_SYN;
 	if ((sack_rxmit == 0) && (prefetch_rsm == 0)) {
 		void *end_rsm;
 		end_rsm = TAILQ_LAST_FAST(&rack->r_ctl.rc_tmap, rack_sendmap, r_tnext);
 		if (end_rsm)
 			kern_prefetch(end_rsm, &prefetch_rsm);
 		prefetch_rsm = 1;
 	}
 	SOCKBUF_LOCK(sb);
 	/*
 	 * If in persist timeout with window of 0, send 1 byte. Otherwise,
 	 * if window is small but nonzero and time TF_SENTFIN expired, we
 	 * will send what we can and go to transmit state.
 	 */
 	if (tp->t_flags & TF_FORCEDATA) {
 		if (sendwin == 0) {
 			/*
 			 * If we still have some data to send, then clear
 			 * the FIN bit.  Usually this would happen below
 			 * when it realizes that we aren't sending all the
 			 * data.  However, if we have exactly 1 byte of
 			 * unsent data, then it won't clear the FIN bit
 			 * below, and if we are in persist state, we wind up
 			 * sending the packet without recording that we sent
 			 * the FIN bit.
 			 *
 			 * We can't just blindly clear the FIN bit, because
 			 * if we don't have any more data to send then the
 			 * probe will be the FIN itself.
 			 */
 			if (sb_offset < sbused(sb))
 				flags &= ~TH_FIN;
 			sendwin = 1;
 		} else {
 			if (rack->rc_in_persist)
 				rack_exit_persist(tp, rack);
 			/*
 			 * If we are dropping persist mode then we need to
 			 * correct snd_nxt/snd_max and off.
 			 */
 			tp->snd_nxt = tp->snd_max;
 			sb_offset = tp->snd_nxt - tp->snd_una;
 		}
 	}
 	/*
 	 * If snd_nxt == snd_max and we have transmitted a FIN, the
 	 * sb_offset will be > 0 even if so_snd.sb_cc is 0, resulting in a
 	 * negative length.  This can also occur when TCP opens up its
 	 * congestion window while receiving additional duplicate acks after
 	 * fast-retransmit because TCP will reset snd_nxt to snd_max after
 	 * the fast-retransmit.
 	 *
 	 * In the normal retransmit-FIN-only case, however, snd_nxt will be
 	 * set to snd_una, the sb_offset will be 0, and the length may wind
 	 * up 0.
 	 *
 	 * If sack_rxmit is true we are retransmitting from the scoreboard
 	 * in which case len is already set.
 	 */
 	if (sack_rxmit == 0) {
 		uint32_t avail;
 
 		avail = sbavail(sb);
 		if (SEQ_GT(tp->snd_nxt, tp->snd_una))
 			sb_offset = tp->snd_nxt - tp->snd_una;
 		else
 			sb_offset = 0;
 		if (IN_RECOVERY(tp->t_flags) == 0) {
 			if (rack->r_ctl.rc_tlp_new_data) {
 				/* TLP is forcing out new data */
 				if (rack->r_ctl.rc_tlp_new_data > (uint32_t) (avail - sb_offset)) {
 					rack->r_ctl.rc_tlp_new_data = (uint32_t) (avail - sb_offset);
 				}
 				if (rack->r_ctl.rc_tlp_new_data > tp->snd_wnd)
 					len = tp->snd_wnd;
 				else
 					len = rack->r_ctl.rc_tlp_new_data;
 				rack->r_ctl.rc_tlp_new_data = 0;
 				doing_tlp = 1;
 			} else {
 				if (sendwin > avail) {
 					/* use the available */
 					if (avail > sb_offset) {
 						len = (int32_t)(avail - sb_offset);
 					} else {
 						len = 0;
 					}
 				} else {
 					if (sendwin > sb_offset) {
 						len = (int32_t)(sendwin - sb_offset);
 					} else {
 						len = 0;
 					}
 				}
 			}
 		} else {
 			uint32_t outstanding;
 
 			/*
 			 * We are inside of a SACK recovery episode and are
 			 * sending new data, having retransmitted all the
 			 * data possible so far in the scoreboard.
 			 */
 			outstanding = tp->snd_max - tp->snd_una;
 			if ((rack->r_ctl.rc_prr_sndcnt + outstanding) > tp->snd_wnd) {
 				if (tp->snd_wnd > outstanding) {
 					len = tp->snd_wnd - outstanding;
 					/* Check to see if we have the data */
 					if (((sb_offset + len) > avail) &&
 					    (avail > sb_offset))
 						len = avail - sb_offset;
 					else
 						len = 0;
 				} else
 					len = 0;
 			} else if (avail > sb_offset)
 				len = avail - sb_offset;
 			else
 				len = 0;
 			if (len > 0) {
 				if (len > rack->r_ctl.rc_prr_sndcnt)
 					len = rack->r_ctl.rc_prr_sndcnt;
 
 				if (len > 0) {
 					sub_from_prr = 1;
 					counter_u64_add(rack_rtm_prr_newdata, 1);
 				}
 			}
 			if (len > tp->t_maxseg) {
 				/*
 				 * We should never send more than a MSS when
 				 * retransmitting or sending new data in prr
 				 * mode unless the override flag is on. Most
 				 * likely the PRR algorithm is not going to
 				 * let us send a lot as well :-)
 				 */
 				if (rack->r_ctl.rc_prr_sendalot == 0)
 					len = tp->t_maxseg;
 			} else if (len < tp->t_maxseg) {
 				/*
 				 * Do we send any? The idea here is if the
 				 * send empty's the socket buffer we want to
 				 * do it. However if not then lets just wait
 				 * for our prr_sndcnt to get bigger.
 				 */
 				long leftinsb;
 
 				leftinsb = sbavail(sb) - sb_offset;
 				if (leftinsb > len) {
 					/* This send does not empty the sb */
 					len = 0;
 				}
 			}
 		}
 	}
 	if (prefetch_so_done == 0) {
 		kern_prefetch(so, &prefetch_so_done);
 		prefetch_so_done = 1;
 	}
 	/*
 	 * Lop off SYN bit if it has already been sent.  However, if this is
 	 * SYN-SENT state and if segment contains data and if we don't know
 	 * that foreign host supports TAO, suppress sending segment.
 	 */
 	if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
 		if ((tp->t_state != TCPS_SYN_RECEIVED) &&
 		    (tp->t_state != TCPS_SYN_SENT))
 			flags &= ~TH_SYN;
 #ifdef TCP_RFC7413
 		/*
 		 * When sending additional segments following a TFO SYN|ACK,
 		 * do not include the SYN bit.
 		 */
 		if ((tp->t_flags & TF_FASTOPEN) &&
 		    (tp->t_state == TCPS_SYN_RECEIVED))
 			flags &= ~TH_SYN;
 #endif
 	}
 	/*
 	 * Be careful not to send data and/or FIN on SYN segments. This
 	 * measure is needed to prevent interoperability problems with not
 	 * fully conformant TCP implementations.
 	 */
 	if ((flags & TH_SYN) && (tp->t_flags & TF_NOOPT)) {
 		len = 0;
 		flags &= ~TH_FIN;
 	}
 #ifdef TCP_RFC7413
 	/*
 	 * When retransmitting SYN|ACK on a passively-created TFO socket,
 	 * don't include data, as the presence of data may have caused the
 	 * original SYN|ACK to have been dropped by a middlebox.
 	 */
 	if ((tp->t_flags & TF_FASTOPEN) &&
 	    ((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)))
 		len = 0;
 #endif
 	if (len <= 0) {
 		/*
 		 * If FIN has been sent but not acked, but we haven't been
 		 * called to retransmit, len will be < 0.  Otherwise, window
 		 * shrank after we sent into it.  If window shrank to 0,
 		 * cancel pending retransmit, pull snd_nxt back to (closed)
 		 * window, and set the persist timer if it isn't already
 		 * going.  If the window didn't close completely, just wait
 		 * for an ACK.
 		 *
 		 * We also do a general check here to ensure that we will
 		 * set the persist timer when we have data to send, but a
 		 * 0-byte window. This makes sure the persist timer is set
 		 * even if the packet hits one of the "goto send" lines
 		 * below.
 		 */
 		len = 0;
 		if ((tp->snd_wnd == 0) &&
 		    (TCPS_HAVEESTABLISHED(tp->t_state)) &&
 		    (sb_offset < (int)sbavail(sb))) {
 			tp->snd_nxt = tp->snd_una;
 			rack_enter_persist(tp, rack, cts);
 		}
 	}
 	/* len will be >= 0 after this point. */
 	KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
 	tcp_sndbuf_autoscale(tp, so, sendwin);
 	/*
 	 * Decide if we can use TCP Segmentation Offloading (if supported by
 	 * hardware).
 	 *
 	 * TSO may only be used if we are in a pure bulk sending state.  The
 	 * presence of TCP-MD5, SACK retransmits, SACK advertizements and IP
 	 * options prevent using TSO.  With TSO the TCP header is the same
 	 * (except for the sequence number) for all generated packets.  This
 	 * makes it impossible to transmit any options which vary per
 	 * generated segment or packet.
 	 *
 	 * IPv4 handling has a clear separation of ip options and ip header
 	 * flags while IPv6 combines both in in6p_outputopts. ip6_optlen() does
 	 * the right thing below to provide length of just ip options and thus
 	 * checking for ipoptlen is enough to decide if ip options are present.
 	 */
 
 #ifdef INET6
 	if (isipv6)
 		ipoptlen = ip6_optlen(tp->t_inpcb);
 	else
 #endif
 		if (tp->t_inpcb->inp_options)
 			ipoptlen = tp->t_inpcb->inp_options->m_len -
 			    offsetof(struct ipoption, ipopt_list);
 		else
 			ipoptlen = 0;
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/*
 	 * Pre-calculate here as we save another lookup into the darknesses
 	 * of IPsec that way and can actually decide if TSO is ok.
 	 */
 #ifdef INET6
 	if (isipv6 && IPSEC_ENABLED(ipv6))
 		ipsec_optlen = IPSEC_HDRSIZE(ipv6, tp->t_inpcb);
 #ifdef INET
 	else
 #endif
 #endif				/* INET6 */
 #ifdef INET
 	if (IPSEC_ENABLED(ipv4))
 		ipsec_optlen = IPSEC_HDRSIZE(ipv4, tp->t_inpcb);
 #endif				/* INET */
 #endif
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	ipoptlen += ipsec_optlen;
 #endif
 	if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
 #ifdef NETFLIX_TCP_O_UDP
 	    (tp->t_port == 0) &&
 #endif
 	    ((tp->t_flags & TF_SIGNATURE) == 0) &&
 	    tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
 	    ipoptlen == 0)
 		tso = 1;
 	{
 		uint32_t outstanding;
 
 		outstanding = tp->snd_max - tp->snd_una;
 		if (tp->t_flags & TF_SENTFIN) {
 			/*
 			 * If we sent a fin, snd_max is 1 higher than
 			 * snd_una
 			 */
 			outstanding--;
 		}
 		if (outstanding > 0) {
 			/*
 			 * This is sub-optimal. We only send a stand alone
 			 * FIN on its own segment.
 			 */
 			if (flags & TH_FIN) {
 				flags &= ~TH_FIN;
 				would_have_fin = 1;
 			}
 		} else if (sack_rxmit) {
 			if ((rsm->r_flags & RACK_HAS_FIN) == 0)
 				flags &= ~TH_FIN;
 		} else {
 			if (SEQ_LT(tp->snd_nxt + len, tp->snd_una +
 			    sbused(sb)))
 				flags &= ~TH_FIN;
 		}
 	}
 	recwin = sbspace(&so->so_rcv);
 
 	/*
 	 * Sender silly window avoidance.   We transmit under the following
 	 * conditions when len is non-zero:
 	 *
 	 * - We have a full segment (or more with TSO) - This is the last
 	 * buffer in a write()/send() and we are either idle or running
 	 * NODELAY - we've timed out (e.g. persist timer) - we have more
 	 * then 1/2 the maximum send window's worth of data (receiver may be
 	 * limited the window size) - we need to retransmit
 	 */
 	if (len) {
 		if (len >= tp->t_maxseg) {
 			pass = 1;
 			goto send;
 		}
 		/*
 		 * NOTE! on localhost connections an 'ack' from the remote
 		 * end may occur synchronously with the output and cause us
 		 * to flush a buffer queued with moretocome.  XXX
 		 *
 		 */
 		if (!(tp->t_flags & TF_MORETOCOME) &&	/* normal case */
 		    (idle || (tp->t_flags & TF_NODELAY)) &&
 		    ((uint32_t)len + (uint32_t)sb_offset >= sbavail(&so->so_snd)) && 
 		    (tp->t_flags & TF_NOPUSH) == 0) {
 			pass = 2;
 			goto send;
 		}
 		if (tp->t_flags & TF_FORCEDATA) {	/* typ. timeout case */
 			pass = 3;
 			goto send;
 		}
 		if ((tp->snd_una == tp->snd_max) && len) {	/* Nothing outstanding */
 			goto send;
 		}
 		if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) {
 			pass = 4;
 			goto send;
 		}
 		if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {	/* retransmit case */
 			pass = 5;
 			goto send;
 		}
 		if (sack_rxmit) {
 			pass = 6;
 			goto send;
 		}
 	}
 	/*
 	 * Sending of standalone window updates.
 	 *
 	 * Window updates are important when we close our window due to a
 	 * full socket buffer and are opening it again after the application
 	 * reads data from it.  Once the window has opened again and the
 	 * remote end starts to send again the ACK clock takes over and
 	 * provides the most current window information.
 	 *
 	 * We must avoid the silly window syndrome whereas every read from
 	 * the receive buffer, no matter how small, causes a window update
 	 * to be sent.  We also should avoid sending a flurry of window
 	 * updates when the socket buffer had queued a lot of data and the
 	 * application is doing small reads.
 	 *
 	 * Prevent a flurry of pointless window updates by only sending an
 	 * update when we can increase the advertized window by more than
 	 * 1/4th of the socket buffer capacity.  When the buffer is getting
 	 * full or is very small be more aggressive and send an update
 	 * whenever we can increase by two mss sized segments. In all other
 	 * situations the ACK's to new incoming data will carry further
 	 * window increases.
 	 *
 	 * Don't send an independent window update if a delayed ACK is
 	 * pending (it will get piggy-backed on it) or the remote side
 	 * already has done a half-close and won't send more data.  Skip
 	 * this if the connection is in T/TCP half-open state.
 	 */
 	if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) &&
 	    !(tp->t_flags & TF_DELACK) &&
 	    !TCPS_HAVERCVDFIN(tp->t_state)) {
 		/*
 		 * "adv" is the amount we could increase the window, taking
 		 * into account that we are limited by TCP_MAXWIN <<
 		 * tp->rcv_scale.
 		 */
 		int32_t adv;
 		int oldwin;
 
 		adv = min(recwin, (long)TCP_MAXWIN << tp->rcv_scale);
 		if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) {
 			oldwin = (tp->rcv_adv - tp->rcv_nxt);
 			adv -= oldwin;
 		} else
 			oldwin = 0;
 
 		/*
 		 * If the new window size ends up being the same as the old
 		 * size when it is scaled, then don't force a window update.
 		 */
 		if (oldwin >> tp->rcv_scale == (adv + oldwin) >> tp->rcv_scale)
 			goto dontupdate;
 
 		if (adv >= (int32_t)(2 * tp->t_maxseg) &&
 		    (adv >= (int32_t)(so->so_rcv.sb_hiwat / 4) ||
 		    recwin <= (int32_t)(so->so_rcv.sb_hiwat / 8) ||
 		    so->so_rcv.sb_hiwat <= 8 * tp->t_maxseg)) {
 			pass = 7;
 			goto send;
 		}
 		if (2 * adv >= (int32_t) so->so_rcv.sb_hiwat)
 			goto send;
 	}
 dontupdate:
 
 	/*
 	 * Send if we owe the peer an ACK, RST, SYN, or urgent data.  ACKNOW
 	 * is also a catch-all for the retransmit timer timeout case.
 	 */
 	if (tp->t_flags & TF_ACKNOW) {
 		pass = 8;
 		goto send;
 	}
 	if (((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0)) {
 		pass = 9;
 		goto send;
 	}
 	if (SEQ_GT(tp->snd_up, tp->snd_una)) {
 		pass = 10;
 		goto send;
 	}
 	/*
 	 * If our state indicates that FIN should be sent and we have not
 	 * yet done so, then we need to send.
 	 */
 	if (flags & TH_FIN) {
 		if ((tp->t_flags & TF_SENTFIN) ||
 		    (((tp->t_flags & TF_SENTFIN) == 0) &&
 		     (tp->snd_nxt == tp->snd_una))) {
 			pass = 11;
 			goto send;
 		}
 	}
 	/*
 	 * No reason to send a segment, just return.
 	 */
 just_return:
 	SOCKBUF_UNLOCK(sb);
 just_return_nolock:
 	if (tot_len_this_send == 0)
 		counter_u64_add(rack_out_size[TCP_MSS_ACCT_JUSTRET], 1);
 	rack_start_hpts_timer(rack, tp, cts, __LINE__, slot, tot_len_this_send, 1);
 	rack_log_type_just_return(rack, cts, tot_len_this_send, slot, hpts_calling);
 	tp->t_flags &= ~TF_FORCEDATA;
 	return (0);
 
 send:
 	if (doing_tlp == 0) {
 		/*
 		 * Data not a TLP, and its not the rxt firing. If it is the
 		 * rxt firing, we want to leave the tlp_in_progress flag on
 		 * so we don't send another TLP. It has to be a rack timer
 		 * or normal send (response to acked data) to clear the tlp
 		 * in progress flag.
 		 */
 		rack->rc_tlp_in_progress = 0;
 	}
 	SOCKBUF_LOCK_ASSERT(sb);
 	if (len > 0) {
 		if (len >= tp->t_maxseg)
 			tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT;
 		else
 			tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT;
 	}
 	/*
 	 * Before ESTABLISHED, force sending of initial options unless TCP
 	 * set not to do any options. NOTE: we assume that the IP/TCP header
 	 * plus TCP options always fit in a single mbuf, leaving room for a
 	 * maximum link header, i.e. max_linkhdr + sizeof (struct tcpiphdr)
 	 * + optlen <= MCLBYTES
 	 */
 	optlen = 0;
 #ifdef INET6
 	if (isipv6)
 		hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 	else
 #endif
 		hdrlen = sizeof(struct tcpiphdr);
 
 	/*
 	 * Compute options for segment. We only have to care about SYN and
 	 * established connection segments.  Options for SYN-ACK segments
 	 * are handled in TCP syncache.
 	 */
 	to.to_flags = 0;
 	if ((tp->t_flags & TF_NOOPT) == 0) {
 		/* Maximum segment size. */
 		if (flags & TH_SYN) {
 			tp->snd_nxt = tp->iss;
 			to.to_mss = tcp_mssopt(&inp->inp_inc);
 #ifdef NETFLIX_TCP_O_UDP
 			if (tp->t_port)
 				to.to_mss -= V_tcp_udp_tunneling_overhead;
 #endif
 			to.to_flags |= TOF_MSS;
 #ifdef TCP_RFC7413
 			/*
 			 * Only include the TFO option on the first
 			 * transmission of the SYN|ACK on a
 			 * passively-created TFO socket, as the presence of
 			 * the TFO option may have caused the original
 			 * SYN|ACK to have been dropped by a middlebox.
 			 */
 			if ((tp->t_flags & TF_FASTOPEN) &&
 			    (tp->t_state == TCPS_SYN_RECEIVED) &&
 			    (tp->t_rxtshift == 0)) {
 				to.to_tfo_len = TCP_FASTOPEN_MAX_COOKIE_LEN;
 				to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie;
 				to.to_flags |= TOF_FASTOPEN;
 			}
 #endif
 		}
 		/* Window scaling. */
 		if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
 			to.to_wscale = tp->request_r_scale;
 			to.to_flags |= TOF_SCALE;
 		}
 		/* Timestamps. */
 		if ((tp->t_flags & TF_RCVD_TSTMP) ||
 		    ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
 			to.to_tsval = cts + tp->ts_offset;
 			to.to_tsecr = tp->ts_recent;
 			to.to_flags |= TOF_TS;
 		}
 		/* Set receive buffer autosizing timestamp. */
 		if (tp->rfbuf_ts == 0 &&
 		    (so->so_rcv.sb_flags & SB_AUTOSIZE))
 			tp->rfbuf_ts = tcp_ts_getticks();
 		/* Selective ACK's. */
 		if (flags & TH_SYN)
 			to.to_flags |= TOF_SACKPERM;
 		else if (TCPS_HAVEESTABLISHED(tp->t_state) &&
 		    tp->rcv_numsacks > 0) {
 			to.to_flags |= TOF_SACK;
 			to.to_nsacks = tp->rcv_numsacks;
 			to.to_sacks = (u_char *)tp->sackblks;
 		}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		/* TCP-MD5 (RFC2385). */
 		if (tp->t_flags & TF_SIGNATURE)
 			to.to_flags |= TOF_SIGNATURE;
 #endif				/* TCP_SIGNATURE */
 
 		/* Processing the options. */
 		hdrlen += optlen = tcp_addoptions(&to, opt);
 	}
 #ifdef NETFLIX_TCP_O_UDP
 	if (tp->t_port) {
 		if (V_tcp_udp_tunneling_port == 0) {
 			/* The port was removed?? */
 			SOCKBUF_UNLOCK(&so->so_snd);
 			return (EHOSTUNREACH);
 		}
 		hdrlen += sizeof(struct udphdr);
 	}
 #endif
-	ipoptlen = 0;
+#ifdef INET6
+	if (isipv6)
+		ipoptlen = ip6_optlen(tp->t_inpcb);
+	else
+#endif
+	if (tp->t_inpcb->inp_options)
+		ipoptlen = tp->t_inpcb->inp_options->m_len -
+		    offsetof(struct ipoption, ipopt_list);
+	else
+		ipoptlen = 0;
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	ipoptlen += ipsec_optlen;
 #endif
 
 	/*
 	 * Adjust data length if insertion of options will bump the packet
 	 * length beyond the t_maxseg length. Clear the FIN bit because we
 	 * cut off the tail of the segment.
 	 */
 	if (len + optlen + ipoptlen > tp->t_maxseg) {
 		if (flags & TH_FIN) {
 			would_have_fin = 1;
 			flags &= ~TH_FIN;
 		}
 		if (tso) {
 			uint32_t if_hw_tsomax;
 			uint32_t moff;
 			int32_t max_len;
 
 			/* extract TSO information */
 			if_hw_tsomax = tp->t_tsomax;
 			if_hw_tsomaxsegcount = tp->t_tsomaxsegcount;
 			if_hw_tsomaxsegsize = tp->t_tsomaxsegsize;
 			KASSERT(ipoptlen == 0,
 			    ("%s: TSO can't do IP options", __func__));
 
 			/*
 			 * Check if we should limit by maximum payload
 			 * length:
 			 */
 			if (if_hw_tsomax != 0) {
 				/* compute maximum TSO length */
 				max_len = (if_hw_tsomax - hdrlen -
 				    max_linkhdr);
 				if (max_len <= 0) {
 					len = 0;
 				} else if (len > max_len) {
 					sendalot = 1;
 					len = max_len;
 				}
 			}
 			/*
 			 * Prevent the last segment from being fractional
 			 * unless the send sockbuf can be emptied:
 			 */
 			max_len = (tp->t_maxseg - optlen);
 			if ((sb_offset + len) < sbavail(sb)) {
 				moff = len % (u_int)max_len;
 				if (moff != 0) {
 					len -= moff;
 					sendalot = 1;
 				}
 			}
 			/*
 			 * In case there are too many small fragments don't
 			 * use TSO:
 			 */
 			if (len <= max_len) {
 				len = max_len;
 				sendalot = 1;
 				tso = 0;
 			}
 			/*
 			 * Send the FIN in a separate segment after the bulk
 			 * sending is done. We don't trust the TSO
 			 * implementations to clear the FIN flag on all but
 			 * the last segment.
 			 */
 			if (tp->t_flags & TF_NEEDFIN)
 				sendalot = 1;
 
 		} else {
+			if (optlen + ipoptlen > tp->t_maxseg) {
+				/*
+				 * Since we don't have enough space to put
+				 * the IP header chain and the TCP header in
+				 * one packet as required by RFC 7112, don't
+				 * send it.
+				 */
+				SOCKBUF_UNLOCK(&so->so_snd);
+				error = EMSGSIZE;
+				sack_rxmit = 0;
+				goto out;
+			}
 			len = tp->t_maxseg - optlen - ipoptlen;
 			sendalot = 1;
 		}
 	} else
 		tso = 0;
 	KASSERT(len + hdrlen + ipoptlen <= IP_MAXPACKET,
 	    ("%s: len > IP_MAXPACKET", __func__));
 #ifdef DIAGNOSTIC
 #ifdef INET6
 	if (max_linkhdr + hdrlen > MCLBYTES)
 #else
 	if (max_linkhdr + hdrlen > MHLEN)
 #endif
 		panic("tcphdr too big");
 #endif
 
 	/*
 	 * This KASSERT is here to catch edge cases at a well defined place.
 	 * Before, those had triggered (random) panic conditions further
 	 * down.
 	 */
 	KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
 	if ((len == 0) &&
 	    (flags & TH_FIN) &&
 	    (sbused(sb))) {
 		/*
 		 * We have outstanding data, don't send a fin by itself!.
 		 */
 		goto just_return;
 	}
 	/*
 	 * Grab a header mbuf, attaching a copy of data to be transmitted,
 	 * and initialize the header from the template for sends on this
 	 * connection.
 	 */
 	if (len) {
 		uint32_t max_val;
 		uint32_t moff;
 
 		if (rack->rc_pace_max_segs)
 			max_val = rack->rc_pace_max_segs * tp->t_maxseg;
 		else
 			max_val = len;
 		/*
 		 * We allow a limit on sending with hptsi.
 		 */
 		if (len > max_val) {
 			len = max_val;
 		}
 #ifdef INET6
 		if (MHLEN < hdrlen + max_linkhdr)
 			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		else
 #endif
 			m = m_gethdr(M_NOWAIT, MT_DATA);
 
 		if (m == NULL) {
 			SOCKBUF_UNLOCK(sb);
 			error = ENOBUFS;
 			sack_rxmit = 0;
 			goto out;
 		}
 		m->m_data += max_linkhdr;
 		m->m_len = hdrlen;
 
 		/*
 		 * Start the m_copy functions from the closest mbuf to the
 		 * sb_offset in the socket buffer chain.
 		 */
 		mb = sbsndptr_noadv(sb, sb_offset, &moff);
 		if (len <= MHLEN - hdrlen - max_linkhdr && !hw_tls) {
 			m_copydata(mb, moff, (int)len,
 			    mtod(m, caddr_t)+hdrlen);
 			if (SEQ_LT(tp->snd_nxt, tp->snd_max))
 				sbsndptr_adv(sb, mb, len);
 			m->m_len += len;
 		} else {
 			struct sockbuf *msb;
 
 			if (SEQ_LT(tp->snd_nxt, tp->snd_max))
 				msb = NULL;
 			else
 				msb = sb;
 			m->m_next = tcp_m_copym(/*tp, */ mb, moff, &len,
 			    if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb,
 			    hw_tls /*, NULL */);
 			if (len <= (tp->t_maxseg - optlen)) {
 				/* 
 				 * Must have ran out of mbufs for the copy
 				 * shorten it to no longer need tso. Lets
 				 * not put on sendalot since we are low on
 				 * mbufs.
 				 */
 				tso = 0;
 			}
 			if (m->m_next == NULL) {
 				SOCKBUF_UNLOCK(sb);
 				(void)m_free(m);
 				error = ENOBUFS;
 				sack_rxmit = 0;
 				goto out;
 			}
 		}
 		if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
 			TCPSTAT_INC(tcps_sndprobe);
 #ifdef NETFLIX_STATS
 			if (SEQ_LT(tp->snd_nxt, tp->snd_max))
 				stats_voi_update_abs_u32(tp->t_stats,
 				    VOI_TCP_RETXPB, len);
 			else
 				stats_voi_update_abs_u64(tp->t_stats,
 				    VOI_TCP_TXPB, len);
 #endif
 		} else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
 			if (rsm && (rsm->r_flags & RACK_TLP)) {
 				/*
 				 * TLP should not count in retran count, but
 				 * in its own bin
 				 */
 /*				tp->t_sndtlppack++;*/
 /*				tp->t_sndtlpbyte += len;*/
 				counter_u64_add(rack_tlp_retran, 1);
 				counter_u64_add(rack_tlp_retran_bytes, len);
 			} else {
 				tp->t_sndrexmitpack++;
 				TCPSTAT_INC(tcps_sndrexmitpack);
 				TCPSTAT_ADD(tcps_sndrexmitbyte, len);
 			}
 #ifdef NETFLIX_STATS
 			stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
 			    len);
 #endif
 		} else {
 			TCPSTAT_INC(tcps_sndpack);
 			TCPSTAT_ADD(tcps_sndbyte, len);
 #ifdef NETFLIX_STATS
 			stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
 			    len);
 #endif
 		}
 		/*
 		 * If we're sending everything we've got, set PUSH. (This
 		 * will keep happy those implementations which only give
 		 * data to the user when a buffer fills or a PUSH comes in.)
 		 */
 		if (sb_offset + len == sbused(sb) &&
 		    sbused(sb) &&
 		    !(flags & TH_SYN))
 			flags |= TH_PUSH;
 
 		/*
 		 * Are we doing hptsi, if so we must calculate the slot. We
 		 * only do hptsi in ESTABLISHED and with no RESET being
 		 * sent where we have data to send.
 		 */
 		if (((tp->t_state == TCPS_ESTABLISHED) ||
 		    (tp->t_state == TCPS_CLOSE_WAIT) ||
 		    ((tp->t_state == TCPS_FIN_WAIT_1) &&
 		    ((tp->t_flags & TF_SENTFIN) == 0) &&
 		    ((flags & TH_FIN) == 0))) &&
 		    ((flags & TH_RST) == 0) &&
 		    (rack->rc_always_pace)) {
 			/*
 			 * We use the most optimistic possible cwnd/srtt for
 			 * sending calculations. This will make our
 			 * calculation anticipate getting more through
 			 * quicker then possible. But thats ok we don't want
 			 * the peer to have a gap in data sending.
 			 */
 			uint32_t srtt, cwnd, tr_perms = 0;
 	
 			if (rack->r_ctl.rc_rack_min_rtt)
 				srtt = rack->r_ctl.rc_rack_min_rtt;
 			else
 				srtt = TICKS_2_MSEC((tp->t_srtt >> TCP_RTT_SHIFT));
 			if (rack->r_ctl.rc_rack_largest_cwnd)
 				cwnd = rack->r_ctl.rc_rack_largest_cwnd;
 			else
 				cwnd = tp->snd_cwnd;
 			tr_perms = cwnd / srtt;
 			if (tr_perms == 0) {
 				tr_perms = tp->t_maxseg;
 			}
 			tot_len_this_send += len;
 			/*
 			 * Calculate how long this will take to drain, if
 			 * the calculation comes out to zero, thats ok we
 			 * will use send_a_lot to possibly spin around for
 			 * more increasing tot_len_this_send to the point
 			 * that its going to require a pace, or we hit the
 			 * cwnd. Which in that case we are just waiting for
 			 * a ACK.
 			 */
 			slot = tot_len_this_send / tr_perms;
 			/* Now do we reduce the time so we don't run dry? */
 			if (slot && rack->rc_pace_reduce) {
 				int32_t reduce;
 
 				reduce = (slot / rack->rc_pace_reduce);
 				if (reduce < slot) {
 					slot -= reduce;
 				} else
 					slot = 0;
 			}
 			if (rack->r_enforce_min_pace &&
 			    (slot == 0) &&
 			    (tot_len_this_send >= (rack->r_min_pace_seg_thresh * tp->t_maxseg))) {
 				/* We are enforcing a minimum pace time of 1ms */
 				slot = rack->r_enforce_min_pace;
 			}
 		}
 		SOCKBUF_UNLOCK(sb);
 	} else {
 		SOCKBUF_UNLOCK(sb);
 		if (tp->t_flags & TF_ACKNOW)
 			TCPSTAT_INC(tcps_sndacks);
 		else if (flags & (TH_SYN | TH_FIN | TH_RST))
 			TCPSTAT_INC(tcps_sndctrl);
 		else if (SEQ_GT(tp->snd_up, tp->snd_una))
 			TCPSTAT_INC(tcps_sndurg);
 		else
 			TCPSTAT_INC(tcps_sndwinup);
 
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
 			sack_rxmit = 0;
 			goto out;
 		}
 #ifdef INET6
 		if (isipv6 && (MHLEN < hdrlen + max_linkhdr) &&
 		    MHLEN >= hdrlen) {
 			M_ALIGN(m, hdrlen);
 		} else
 #endif
 			m->m_data += max_linkhdr;
 		m->m_len = hdrlen;
 	}
 	SOCKBUF_UNLOCK_ASSERT(sb);
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 #ifdef INET6
 	if (isipv6) {
 		ip6 = mtod(m, struct ip6_hdr *);
 #ifdef NETFLIX_TCP_O_UDP
 		if (tp->t_port) {
 			udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + sizeof(struct ip6_hdr));
 			udp->uh_sport = htons(V_tcp_udp_tunneling_port);
 			udp->uh_dport = tp->t_port;
 			ulen = hdrlen + len - sizeof(struct ip6_hdr);
 			udp->uh_ulen = htons(ulen);
 			th = (struct tcphdr *)(udp + 1);
 		} else 
 #endif
 			th = (struct tcphdr *)(ip6 + 1);
 		tcpip_fillheaders(inp, /*tp->t_port, */ ip6, th);
 	} else
 #endif				/* INET6 */
 	{
 		ip = mtod(m, struct ip *);
 #ifdef TCPDEBUG
 		ipov = (struct ipovly *)ip;
 #endif
 #ifdef NETFLIX_TCP_O_UDP
 		if (tp->t_port) {
 			udp = (struct udphdr *)((caddr_t)ip + ipoptlen + sizeof(struct ip));
 			udp->uh_sport = htons(V_tcp_udp_tunneling_port);
 			udp->uh_dport = tp->t_port;
 			ulen = hdrlen + len - sizeof(struct ip);
 			udp->uh_ulen = htons(ulen);
 			th = (struct tcphdr *)(udp + 1);
 		} else
 #endif
 			th = (struct tcphdr *)(ip + 1);
 		tcpip_fillheaders(inp,/*tp->t_port, */ ip, th);
 	}
 	/*
 	 * Fill in fields, remembering maximum advertised window for use in
 	 * delaying messages about window sizes. If resending a FIN, be sure
 	 * not to use a new sequence number.
 	 */
 	if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
 	    tp->snd_nxt == tp->snd_max)
 		tp->snd_nxt--;
 	/*
 	 * If we are starting a connection, send ECN setup SYN packet. If we
 	 * are on a retransmit, we may resend those bits a number of times
 	 * as per RFC 3168.
 	 */
 	if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) {
 		if (tp->t_rxtshift >= 1) {
 			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
 				flags |= TH_ECE | TH_CWR;
 		} else
 			flags |= TH_ECE | TH_CWR;
 	}
 	if (tp->t_state == TCPS_ESTABLISHED &&
 	    (tp->t_flags & TF_ECN_PERMIT)) {
 		/*
 		 * If the peer has ECN, mark data packets with ECN capable
 		 * transmission (ECT). Ignore pure ack packets,
 		 * retransmissions and window probes.
 		 */
 		if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
 		    !((tp->t_flags & TF_FORCEDATA) && len == 1)) {
 #ifdef INET6
 			if (isipv6)
 				ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
 			else
 #endif
 				ip->ip_tos |= IPTOS_ECN_ECT0;
 			TCPSTAT_INC(tcps_ecn_ect0);
 		}
 		/*
 		 * Reply with proper ECN notifications.
 		 */
 		if (tp->t_flags & TF_ECN_SND_CWR) {
 			flags |= TH_CWR;
 			tp->t_flags &= ~TF_ECN_SND_CWR;
 		}
 		if (tp->t_flags & TF_ECN_SND_ECE)
 			flags |= TH_ECE;
 	}
 	/*
 	 * If we are doing retransmissions, then snd_nxt will not reflect
 	 * the first unsent octet.  For ACK only packets, we do not want the
 	 * sequence number of the retransmitted packet, we want the sequence
 	 * number of the next unsent octet.  So, if there is no data (and no
 	 * SYN or FIN), use snd_max instead of snd_nxt when filling in
 	 * ti_seq.  But if we are in persist state, snd_max might reflect
 	 * one byte beyond the right edge of the window, so use snd_nxt in
 	 * that case, since we know we aren't doing a retransmission.
 	 * (retransmit and persist are mutually exclusive...)
 	 */
 	if (sack_rxmit == 0) {
 		if (len || (flags & (TH_SYN | TH_FIN)) ||
 		    rack->rc_in_persist) {
 			th->th_seq = htonl(tp->snd_nxt);
 			rack_seq = tp->snd_nxt;
 		} else if (flags & TH_RST) {
 			/*
 			 * For a Reset send the last cum ack in sequence
 			 * (this like any other choice may still generate a
 			 * challenge ack, if a ack-update packet is in
 			 * flight).
 			 */
 			th->th_seq = htonl(tp->snd_una);
 			rack_seq = tp->snd_una;
 		} else {
 			th->th_seq = htonl(tp->snd_max);
 			rack_seq = tp->snd_max;
 		}
 	} else {
 		th->th_seq = htonl(rsm->r_start);
 		rack_seq = rsm->r_start;
 	}
 	th->th_ack = htonl(tp->rcv_nxt);
 	if (optlen) {
 		bcopy(opt, th + 1, optlen);
 		th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
 	}
 	th->th_flags = flags;
 	/*
 	 * Calculate receive window.  Don't shrink window, but avoid silly
 	 * window syndrome.
 	 */
 	if (recwin < (long)(so->so_rcv.sb_hiwat / 4) &&
 	    recwin < (long)tp->t_maxseg)
 		recwin = 0;
 	if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) &&
 	    recwin < (long)(tp->rcv_adv - tp->rcv_nxt))
 		recwin = (long)(tp->rcv_adv - tp->rcv_nxt);
 	if (recwin > (long)TCP_MAXWIN << tp->rcv_scale)
 		recwin = (long)TCP_MAXWIN << tp->rcv_scale;
 
 	/*
 	 * According to RFC1323 the window field in a SYN (i.e., a <SYN> or
 	 * <SYN,ACK>) segment itself is never scaled.  The <SYN,ACK> case is
 	 * handled in syncache.
 	 */
 	if (flags & TH_SYN)
 		th->th_win = htons((u_short)
 		    (min(sbspace(&so->so_rcv), TCP_MAXWIN)));
 	else
 		th->th_win = htons((u_short)(recwin >> tp->rcv_scale));
 	/*
 	 * Adjust the RXWIN0SENT flag - indicate that we have advertised a 0
 	 * window.  This may cause the remote transmitter to stall.  This
 	 * flag tells soreceive() to disable delayed acknowledgements when
 	 * draining the buffer.  This can occur if the receiver is
 	 * attempting to read more data than can be buffered prior to
 	 * transmitting on the connection.
 	 */
 	if (th->th_win == 0) {
 		tp->t_sndzerowin++;
 		tp->t_flags |= TF_RXWIN0SENT;
 	} else
 		tp->t_flags &= ~TF_RXWIN0SENT;
 	if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
 		th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
 		th->th_flags |= TH_URG;
 	} else
 		/*
 		 * If no urgent pointer to send, then we pull the urgent
 		 * pointer to the left edge of the send window so that it
 		 * doesn't drift into the send window on sequence number
 		 * wraparound.
 		 */
 		tp->snd_up = tp->snd_una;	/* drag it along */
 
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 	if (to.to_flags & TOF_SIGNATURE) {
 		/*
 		 * Calculate MD5 signature and put it into the place
 		 * determined before.
 		 * NOTE: since TCP options buffer doesn't point into
 		 * mbuf's data, calculate offset and use it.
 		 */
 		if (!TCPMD5_ENABLED() || TCPMD5_OUTPUT(m, th,
 		    (u_char *)(th + 1) + (to.to_signature - opt)) != 0) {
 			/*
 			 * Do not send segment if the calculation of MD5
 			 * digest has failed.
 			 */
 			goto out;
 		}
 	}
 #endif
 
 	/*
 	 * Put TCP length in extended header, and then checksum extended
 	 * header and data.
 	 */
 	m->m_pkthdr.len = hdrlen + len;	/* in6_cksum() need this */
 #ifdef INET6
 	if (isipv6) {
 		/*
 		 * ip6_plen is not need to be filled now, and will be filled
 		 * in ip6_output.
 		 */
 #ifdef NETFLIX_TCP_O_UDP
 		if (tp->t_port) {
 			m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
 			m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 			udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
 			th->th_sum = htons(0);
 			UDPSTAT_INC(udps_opackets);
 		} else {
 #endif
 			m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 			m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 			th->th_sum = in6_cksum_pseudo(ip6,
 			    sizeof(struct tcphdr) + optlen + len, IPPROTO_TCP,
 			    0);
 #ifdef NETFLIX_TCP_O_UDP
 		}
 #endif
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 #ifdef NETFLIX_TCP_O_UDP
 		if (tp->t_port) {
 			m->m_pkthdr.csum_flags = CSUM_UDP;
 			m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 			udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
 			   ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
 			th->th_sum = htons(0);
 			UDPSTAT_INC(udps_opackets);
 		} else {
 #endif
 			m->m_pkthdr.csum_flags = CSUM_TCP;
 			m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 			th->th_sum = in_pseudo(ip->ip_src.s_addr,
 			    ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
 			    IPPROTO_TCP + len + optlen));
 #ifdef NETFLIX_TCP_O_UDP
 		}
 #endif
 		/* IP version must be set here for ipv4/ipv6 checking later */
 		KASSERT(ip->ip_v == IPVERSION,
 		    ("%s: IP version incorrect: %d", __func__, ip->ip_v));
 	}
 #endif
 
 	/*
 	 * Enable TSO and specify the size of the segments. The TCP pseudo
 	 * header checksum is always provided. XXX: Fixme: This is currently
 	 * not the case for IPv6.
 	 */
 	if (tso) {
 		KASSERT(len > tp->t_maxseg - optlen,
 		    ("%s: len <= tso_segsz", __func__));
 		m->m_pkthdr.csum_flags |= CSUM_TSO;
 		m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen;
 	}
-#if defined(IPSEC) || defined(IPSEC_SUPPORT)
-	KASSERT(len + hdrlen + ipoptlen - ipsec_optlen == m_length(m, NULL),
-	    ("%s: mbuf chain shorter than expected: %d + %u + %u - %u != %u",
-	    __func__, len, hdrlen, ipoptlen, ipsec_optlen, m_length(m, NULL)));
-#else
-	KASSERT(len + hdrlen + ipoptlen == m_length(m, NULL),
-	    ("%s: mbuf chain shorter than expected: %d + %u + %u != %u",
-	    __func__, len, hdrlen, ipoptlen, m_length(m, NULL)));
-#endif
+	KASSERT(len + hdrlen == m_length(m, NULL),
+	    ("%s: mbuf chain different than expected: %d + %u != %u",
+	    __func__, len, hdrlen, m_length(m, NULL)));
 
 #ifdef TCP_HHOOK
 	/* Run HHOOK_TCP_ESTABLISHED_OUT helper hooks. */
 	hhook_run_tcp_est_out(tp, th, &to, len, tso);
 #endif
 
 #ifdef TCPDEBUG
 	/*
 	 * Trace.
 	 */
 	if (so->so_options & SO_DEBUG) {
 		u_short save = 0;
 
 #ifdef INET6
 		if (!isipv6)
 #endif
 		{
 			save = ipov->ih_len;
 			ipov->ih_len = htons(m->m_pkthdr.len	/* - hdrlen +
 			      * (th->th_off << 2) */ );
 		}
 		tcp_trace(TA_OUTPUT, tp->t_state, tp, mtod(m, void *), th, 0);
 #ifdef INET6
 		if (!isipv6)
 #endif
 			ipov->ih_len = save;
 	}
 #endif				/* TCPDEBUG */
 
 	/* We're getting ready to send; log now. */
 	if (tp->t_logstate != TCP_LOG_STATE_OFF) {
 		union tcp_log_stackspecific log;
 
 		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
 		log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts;
 		log.u_bbr.ininput = rack->rc_inp->inp_in_input;
 		log.u_bbr.flex1 = rack->r_ctl.rc_prr_sndcnt;
 		if (rsm || sack_rxmit) {
 			log.u_bbr.flex8 = 1;
 		} else {
 			log.u_bbr.flex8 = 0;
 		}
 		lgb = tcp_log_event_(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_OUT, ERRNO_UNK,
 		    len, &log, false, NULL, NULL, 0, NULL);
 	} else
 		lgb = NULL;
 
 	/*
 	 * Fill in IP length and desired time to live and send to IP level.
 	 * There should be a better way to handle ttl and tos; we could keep
 	 * them in the template, but need a way to checksum without them.
 	 */
 	/*
 	 * m->m_pkthdr.len should have been set before cksum calcuration,
 	 * because in6_cksum() need it.
 	 */
 #ifdef INET6
 	if (isipv6) {
 		/*
 		 * we separately set hoplimit for every segment, since the
 		 * user might want to change the value via setsockopt. Also,
 		 * desired default hop limit might be changed via Neighbor
 		 * Discovery.
 		 */
 		ip6->ip6_hlim = in6_selecthlim(inp, NULL);
 
 		/*
 		 * Set the packet size here for the benefit of DTrace
 		 * probes. ip6_output() will set it properly; it's supposed
 		 * to include the option header lengths as well.
 		 */
 		ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
 
 		if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss)
 			tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 		else
 			tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 
 		if (tp->t_state == TCPS_SYN_SENT)
 			TCP_PROBE5(connect__request, NULL, tp, ip6, tp, th);
 
 		TCP_PROBE5(send, NULL, tp, ip6, tp, th);
 		/* TODO: IPv6 IP6TOS_ECT bit on */
 		error = ip6_output(m, tp->t_inpcb->in6p_outputopts,
 		    &inp->inp_route6,
 		    ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
 		    NULL, NULL, inp);
 
 		if (error == EMSGSIZE && inp->inp_route6.ro_rt != NULL)
 			mtu = inp->inp_route6.ro_rt->rt_mtu;
 	}
 #endif				/* INET6 */
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		ip->ip_len = htons(m->m_pkthdr.len);
 #ifdef INET6
 		if (inp->inp_vflag & INP_IPV6PROTO)
 			ip->ip_ttl = in6_selecthlim(inp, NULL);
 #endif				/* INET6 */
 		/*
 		 * If we do path MTU discovery, then we set DF on every
 		 * packet. This might not be the best thing to do according
 		 * to RFC3390 Section 2. However the tcp hostcache migitates
 		 * the problem so it affects only the first tcp connection
 		 * with a host.
 		 *
 		 * NB: Don't set DF on small MTU/MSS to have a safe
 		 * fallback.
 		 */
 		if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) {
 			tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 			if (tp->t_port == 0 || len < V_tcp_minmss) {
 				ip->ip_off |= htons(IP_DF);
 			}
 		} else {
 			tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 		}
 
 		if (tp->t_state == TCPS_SYN_SENT)
 			TCP_PROBE5(connect__request, NULL, tp, ip, tp, th);
 
 		TCP_PROBE5(send, NULL, tp, ip, tp, th);
 
 		error = ip_output(m, tp->t_inpcb->inp_options, &inp->inp_route,
 		    ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0,
 		    inp);
 		if (error == EMSGSIZE && inp->inp_route.ro_rt != NULL)
 			mtu = inp->inp_route.ro_rt->rt_mtu;
 	}
 #endif				/* INET */
 
 out:
 	if (lgb) {
 		lgb->tlb_errno = error;
 		lgb = NULL;
 	}
 	/*
 	 * In transmit state, time the transmission and arrange for the
 	 * retransmit.  In persist state, just set snd_max.
 	 */
 	if (error == 0) {
 		if (TCPS_HAVEESTABLISHED(tp->t_state) &&
 		    (tp->t_flags & TF_SACK_PERMIT) &&
 		    tp->rcv_numsacks > 0)
 		    tcp_clean_dsack_blocks(tp);
 		if (len == 0)
 			counter_u64_add(rack_out_size[TCP_MSS_ACCT_SNDACK], 1);
 		else if (len == 1) {
 			counter_u64_add(rack_out_size[TCP_MSS_ACCT_PERSIST], 1);
 		} else if (len > 1) {
 			int idx;
 
 			idx = (len / tp->t_maxseg) + 3;
 			if (idx >= TCP_MSS_ACCT_ATIMER)
 				counter_u64_add(rack_out_size[(TCP_MSS_ACCT_ATIMER-1)], 1);
 			else
 				counter_u64_add(rack_out_size[idx], 1);
 		}
 	}
 	if (sub_from_prr && (error == 0)) {
 		if (rack->r_ctl.rc_prr_sndcnt >= len)
 			rack->r_ctl.rc_prr_sndcnt -= len;
 		else
 			rack->r_ctl.rc_prr_sndcnt = 0;
 	}
 	sub_from_prr = 0;
 	rack_log_output(tp, &to, len, rack_seq, (uint8_t) flags, error, cts,
 	    pass, rsm);
 	if ((tp->t_flags & TF_FORCEDATA) == 0 ||
 	    (rack->rc_in_persist == 0)) {
 #ifdef NETFLIX_STATS
 		tcp_seq startseq = tp->snd_nxt;
 #endif
 		/*
 		 * Advance snd_nxt over sequence space of this segment.
 		 */
 		if (error)
 			/* We don't log or do anything with errors */
 			goto timer;
 
 		if (flags & (TH_SYN | TH_FIN)) {
 			if (flags & TH_SYN)
 				tp->snd_nxt++;
 			if (flags & TH_FIN) {
 				tp->snd_nxt++;
 				tp->t_flags |= TF_SENTFIN;
 			}
 		}
 		/* In the ENOBUFS case we do *not* update snd_max */
 		if (sack_rxmit)
 			goto timer;
 
 		tp->snd_nxt += len;
 		if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
 			if (tp->snd_una == tp->snd_max) {
 				/*
 				 * Update the time we just added data since
 				 * none was outstanding.
 				 */
 				rack_log_progress_event(rack, tp, ticks, PROGRESS_START, __LINE__);
 				tp->t_acktime = ticks;
 			}
 			tp->snd_max = tp->snd_nxt;
 #ifdef NETFLIX_STATS
 			if (!(tp->t_flags & TF_GPUTINPROG) && len) {
 				tp->t_flags |= TF_GPUTINPROG;
 				tp->gput_seq = startseq;
 				tp->gput_ack = startseq +
 				    ulmin(sbavail(sb) - sb_offset, sendwin);
 				tp->gput_ts = tcp_ts_getticks();
 			}
 #endif
 		}
 		/*
 		 * Set retransmit timer if not currently set, and not doing
 		 * a pure ack or a keep-alive probe. Initial value for
 		 * retransmit timer is smoothed round-trip time + 2 *
 		 * round-trip time variance. Initialize shift counter which
 		 * is used for backoff of retransmit time.
 		 */
 timer:
 		if ((tp->snd_wnd == 0) &&
 		    TCPS_HAVEESTABLISHED(tp->t_state)) {
 			/*
 			 * If the persists timer was set above (right before
 			 * the goto send), and still needs to be on. Lets
 			 * make sure all is canceled. If the persist timer
 			 * is not running, we want to get it up.
 			 */
 			if (rack->rc_in_persist == 0) {
 				rack_enter_persist(tp, rack, cts);
 			}
 		}
 	} else {
 		/*
 		 * Persist case, update snd_max but since we are in persist
 		 * mode (no window) we do not update snd_nxt.
 		 */
 		int32_t xlen = len;
 
 		if (error)
 			goto nomore;
 
 		if (flags & TH_SYN)
 			++xlen;
 		if (flags & TH_FIN) {
 			++xlen;
 			tp->t_flags |= TF_SENTFIN;
 		}
 		/* In the ENOBUFS case we do *not* update snd_max */
 		if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max)) {
 			if (tp->snd_una == tp->snd_max) {
 				/*
 				 * Update the time we just added data since
 				 * none was outstanding.
 				 */
 				rack_log_progress_event(rack, tp, ticks, PROGRESS_START, __LINE__);
 				tp->t_acktime = ticks;
 			}
 			tp->snd_max = tp->snd_nxt + len;
 		}
 	}
 nomore:
 	if (error) {
 		SOCKBUF_UNLOCK_ASSERT(sb);	/* Check gotos. */
 		/*
 		 * Failures do not advance the seq counter above. For the
 		 * case of ENOBUFS we will fall out and retry in 1ms with
 		 * the hpts. Everything else will just have to retransmit
 		 * with the timer.
 		 *
 		 * In any case, we do not want to loop around for another
 		 * send without a good reason.
 		 */
 		sendalot = 0;
 		switch (error) {
 		case EPERM:
 			tp->t_flags &= ~TF_FORCEDATA;
 			tp->t_softerror = error;
 			return (error);
 		case ENOBUFS:
 			if (slot == 0) {
 				/*
 				 * Pace us right away to retry in a some
 				 * time
 				 */
 				slot = 1 + rack->rc_enobuf;
 				if (rack->rc_enobuf < 255)
 					rack->rc_enobuf++;
 				if (slot > (rack->rc_rack_rtt / 2)) {
 					slot = rack->rc_rack_rtt / 2;
 				}
 				if (slot < 10)
 					slot = 10;
 			}
 			counter_u64_add(rack_saw_enobuf, 1);
 			error = 0;
 			goto enobufs;
 		case EMSGSIZE:
 			/*
 			 * For some reason the interface we used initially
 			 * to send segments changed to another or lowered
 			 * its MTU. If TSO was active we either got an
 			 * interface without TSO capabilits or TSO was
 			 * turned off. If we obtained mtu from ip_output()
 			 * then update it and try again.
 			 */
 			if (tso)
 				tp->t_flags &= ~TF_TSO;
 			if (mtu != 0) {
 				tcp_mss_update(tp, -1, mtu, NULL, NULL);
 				goto again;
 			}
 			slot = 10;
 			rack_start_hpts_timer(rack, tp, cts, __LINE__, slot, 0, 1);
 			tp->t_flags &= ~TF_FORCEDATA;
 			return (error);
 		case ENETUNREACH:
 			counter_u64_add(rack_saw_enetunreach, 1);
 		case EHOSTDOWN:
 		case EHOSTUNREACH:
 		case ENETDOWN:
 			if (TCPS_HAVERCVDSYN(tp->t_state)) {
 				tp->t_softerror = error;
 			}
 			/* FALLTHROUGH */
 		default:
 			slot = 10;
 			rack_start_hpts_timer(rack, tp, cts, __LINE__, slot, 0, 1);
 			tp->t_flags &= ~TF_FORCEDATA;
 			return (error);
 		}
 	} else {
 		rack->rc_enobuf = 0;
 	}
 	TCPSTAT_INC(tcps_sndtotal);
 
 	/*
 	 * Data sent (as far as we can tell). If this advertises a larger
 	 * window than any other segment, then remember the size of the
 	 * advertised window. Any pending ACK has now been sent.
 	 */
 	if (recwin > 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv))
 		tp->rcv_adv = tp->rcv_nxt + recwin;
 	tp->last_ack_sent = tp->rcv_nxt;
 	tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
 enobufs:
 	rack->r_tlp_running = 0;
 	if ((flags & TH_RST) || (would_have_fin == 1)) {
 		/*
 		 * We don't send again after a RST. We also do *not* send
 		 * again if we would have had a find, but now have
 		 * outstanding data.
 		 */
 		slot = 0;
 		sendalot = 0;
 	}
 	if (slot) {
 		/* set the rack tcb into the slot N */
 		counter_u64_add(rack_paced_segments, 1);
 	} else if (sendalot) {
 		if (len)
 			counter_u64_add(rack_unpaced_segments, 1);
 		sack_rxmit = 0;
 		tp->t_flags &= ~TF_FORCEDATA;
 		goto again;
 	} else if (len) {
 		counter_u64_add(rack_unpaced_segments, 1);
 	}
 	tp->t_flags &= ~TF_FORCEDATA;
 	rack_start_hpts_timer(rack, tp, cts, __LINE__, slot, tot_len_this_send, 1);
 	return (error);
 }
 
 /*
  * rack_ctloutput() must drop the inpcb lock before performing copyin on
  * socket option arguments.  When it re-acquires the lock after the copy, it
  * has to revalidate that the connection is still valid for the socket
  * option.
  */
 static int
 rack_set_sockopt(struct socket *so, struct sockopt *sopt,
     struct inpcb *inp, struct tcpcb *tp, struct tcp_rack *rack)
 {
 	int32_t error = 0, optval;
 
 	switch (sopt->sopt_name) {
 	case TCP_RACK_PROP_RATE:
 	case TCP_RACK_PROP:
 	case TCP_RACK_TLP_REDUCE:
 	case TCP_RACK_EARLY_RECOV:
 	case TCP_RACK_PACE_ALWAYS:
 	case TCP_DELACK:
 	case TCP_RACK_PACE_REDUCE:
 	case TCP_RACK_PACE_MAX_SEG:
 	case TCP_RACK_PRR_SENDALOT:
 	case TCP_RACK_MIN_TO:
 	case TCP_RACK_EARLY_SEG:
 	case TCP_RACK_REORD_THRESH:
 	case TCP_RACK_REORD_FADE:
 	case TCP_RACK_TLP_THRESH:
 	case TCP_RACK_PKT_DELAY:
 	case TCP_RACK_TLP_USE:
 	case TCP_RACK_TLP_INC_VAR:
 	case TCP_RACK_IDLE_REDUCE_HIGH:
 	case TCP_RACK_MIN_PACE:
 	case TCP_RACK_MIN_PACE_SEG:
 	case TCP_BBR_RACK_RTT_USE:
 	case TCP_DATA_AFTER_CLOSE:
 		break;
 	default:
 		return (tcp_default_ctloutput(so, sopt, inp, tp));
 		break;
 	}
 	INP_WUNLOCK(inp);
 	error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval));
 	if (error)
 		return (error);
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		INP_WUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	switch (sopt->sopt_name) {
 	case TCP_RACK_PROP_RATE:
 		if ((optval <= 0) || (optval >= 100)) {
 			error = EINVAL;
 			break;
 		}
 		RACK_OPTS_INC(tcp_rack_prop_rate);
 		rack->r_ctl.rc_prop_rate = optval;
 		break;
 	case TCP_RACK_TLP_USE:
 		if ((optval < TLP_USE_ID) || (optval > TLP_USE_TWO_TWO)) {
 			error = EINVAL;
 			break;
 		}
 		RACK_OPTS_INC(tcp_tlp_use);
 		rack->rack_tlp_threshold_use = optval;
 		break;
 	case TCP_RACK_PROP:
 		/* RACK proportional rate reduction (bool) */
 		RACK_OPTS_INC(tcp_rack_prop);
 		rack->r_ctl.rc_prop_reduce = optval;
 		break;
 	case TCP_RACK_TLP_REDUCE:
 		/* RACK TLP cwnd reduction (bool) */
 		RACK_OPTS_INC(tcp_rack_tlp_reduce);
 		rack->r_ctl.rc_tlp_cwnd_reduce = optval;
 		break;
 	case TCP_RACK_EARLY_RECOV:
 		/* Should recovery happen early (bool) */
 		RACK_OPTS_INC(tcp_rack_early_recov);
 		rack->r_ctl.rc_early_recovery = optval;
 		break;
 	case TCP_RACK_PACE_ALWAYS:
 		/* Use the always pace method (bool)  */
 		RACK_OPTS_INC(tcp_rack_pace_always);
 		if (optval > 0)
 			rack->rc_always_pace = 1;
 		else
 			rack->rc_always_pace = 0;
 		break;
 	case TCP_RACK_PACE_REDUCE:
 		/* RACK Hptsi reduction factor (divisor) */
 		RACK_OPTS_INC(tcp_rack_pace_reduce);
 		if (optval)
 			/* Must be non-zero */
 			rack->rc_pace_reduce = optval;
 		else
 			error = EINVAL;
 		break;
 	case TCP_RACK_PACE_MAX_SEG:
 		/* Max segments in a pace */
 		RACK_OPTS_INC(tcp_rack_max_seg);
 		rack->rc_pace_max_segs = optval;
 		break;
 	case TCP_RACK_PRR_SENDALOT:
 		/* Allow PRR to send more than one seg */
 		RACK_OPTS_INC(tcp_rack_prr_sendalot);
 		rack->r_ctl.rc_prr_sendalot = optval;
 		break;
 	case TCP_RACK_MIN_TO:
 		/* Minimum time between rack t-o's in ms */
 		RACK_OPTS_INC(tcp_rack_min_to);
 		rack->r_ctl.rc_min_to = optval;
 		break;
 	case TCP_RACK_EARLY_SEG:
 		/* If early recovery max segments */
 		RACK_OPTS_INC(tcp_rack_early_seg);
 		rack->r_ctl.rc_early_recovery_segs = optval;
 		break;
 	case TCP_RACK_REORD_THRESH:
 		/* RACK reorder threshold (shift amount) */
 		RACK_OPTS_INC(tcp_rack_reord_thresh);
 		if ((optval > 0) && (optval < 31))
 			rack->r_ctl.rc_reorder_shift = optval;
 		else
 			error = EINVAL;
 		break;
 	case TCP_RACK_REORD_FADE:
 		/* Does reordering fade after ms time */
 		RACK_OPTS_INC(tcp_rack_reord_fade);
 		rack->r_ctl.rc_reorder_fade = optval;
 		break;
 	case TCP_RACK_TLP_THRESH:
 		/* RACK TLP theshold i.e. srtt+(srtt/N) */
 		RACK_OPTS_INC(tcp_rack_tlp_thresh);
 		if (optval)
 			rack->r_ctl.rc_tlp_threshold = optval;
 		else
 			error = EINVAL;
 		break;
 	case TCP_RACK_PKT_DELAY:
 		/* RACK added ms i.e. rack-rtt + reord + N */
 		RACK_OPTS_INC(tcp_rack_pkt_delay);
 		rack->r_ctl.rc_pkt_delay = optval;
 		break;
 	case TCP_RACK_TLP_INC_VAR:
 		/* Does TLP include rtt variance in t-o */
 		RACK_OPTS_INC(tcp_rack_tlp_inc_var);
 		rack->r_ctl.rc_prr_inc_var = optval;
 		break;
 	case TCP_RACK_IDLE_REDUCE_HIGH:
 		RACK_OPTS_INC(tcp_rack_idle_reduce_high);
 		if (optval)
 			rack->r_idle_reduce_largest = 1;
 		else
 			rack->r_idle_reduce_largest = 0;
 		break;
 	case TCP_DELACK:
 		if (optval == 0)
 			tp->t_delayed_ack = 0;
 		else
 			tp->t_delayed_ack = 1;
 		if (tp->t_flags & TF_DELACK) {
 			tp->t_flags &= ~TF_DELACK;
 			tp->t_flags |= TF_ACKNOW;
 			rack_output(tp);
 		}
 		break;
 	case TCP_RACK_MIN_PACE:
 		RACK_OPTS_INC(tcp_rack_min_pace);
 		if (optval > 3)
 			rack->r_enforce_min_pace = 3;
 		else
 			rack->r_enforce_min_pace = optval;
 		break;
 	case TCP_RACK_MIN_PACE_SEG:
 		RACK_OPTS_INC(tcp_rack_min_pace_seg);
 		if (optval >= 16)
 			rack->r_min_pace_seg_thresh = 15;
 		else
 			rack->r_min_pace_seg_thresh = optval;
 		break;
 	case TCP_BBR_RACK_RTT_USE:
 		if ((optval != USE_RTT_HIGH) &&
 		    (optval != USE_RTT_LOW) &&
 		    (optval != USE_RTT_AVG))
 			error = EINVAL;
 		else
 			rack->r_ctl.rc_rate_sample_method = optval;
 		break;
 	case TCP_DATA_AFTER_CLOSE:
 		if (optval)
 			rack->rc_allow_data_af_clo = 1;
 		else
 			rack->rc_allow_data_af_clo = 0;
 		break;
 	default:
 		return (tcp_default_ctloutput(so, sopt, inp, tp));
 		break;
 	}
 /*	tcp_log_socket_option(tp, sopt->sopt_name, optval, error);*/
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static int
 rack_get_sockopt(struct socket *so, struct sockopt *sopt,
     struct inpcb *inp, struct tcpcb *tp, struct tcp_rack *rack)
 {
 	int32_t error, optval;
 
 	/*
 	 * Because all our options are either boolean or an int, we can just
 	 * pull everything into optval and then unlock and copy. If we ever
 	 * add a option that is not a int, then this will have quite an
 	 * impact to this routine.
 	 */
 	switch (sopt->sopt_name) {
 	case TCP_RACK_PROP_RATE:
 		optval = rack->r_ctl.rc_prop_rate;
 		break;
 	case TCP_RACK_PROP:
 		/* RACK proportional rate reduction (bool) */
 		optval = rack->r_ctl.rc_prop_reduce;
 		break;
 	case TCP_RACK_TLP_REDUCE:
 		/* RACK TLP cwnd reduction (bool) */
 		optval = rack->r_ctl.rc_tlp_cwnd_reduce;
 		break;
 	case TCP_RACK_EARLY_RECOV:
 		/* Should recovery happen early (bool) */
 		optval = rack->r_ctl.rc_early_recovery;
 		break;
 	case TCP_RACK_PACE_REDUCE:
 		/* RACK Hptsi reduction factor (divisor) */
 		optval = rack->rc_pace_reduce;
 		break;
 	case TCP_RACK_PACE_MAX_SEG:
 		/* Max segments in a pace */
 		optval = rack->rc_pace_max_segs;
 		break;
 	case TCP_RACK_PACE_ALWAYS:
 		/* Use the always pace method */
 		optval = rack->rc_always_pace;
 		break;
 	case TCP_RACK_PRR_SENDALOT:
 		/* Allow PRR to send more than one seg */
 		optval = rack->r_ctl.rc_prr_sendalot;
 		break;
 	case TCP_RACK_MIN_TO:
 		/* Minimum time between rack t-o's in ms */
 		optval = rack->r_ctl.rc_min_to;
 		break;
 	case TCP_RACK_EARLY_SEG:
 		/* If early recovery max segments */
 		optval = rack->r_ctl.rc_early_recovery_segs;
 		break;
 	case TCP_RACK_REORD_THRESH:
 		/* RACK reorder threshold (shift amount) */
 		optval = rack->r_ctl.rc_reorder_shift;
 		break;
 	case TCP_RACK_REORD_FADE:
 		/* Does reordering fade after ms time */
 		optval = rack->r_ctl.rc_reorder_fade;
 		break;
 	case TCP_RACK_TLP_THRESH:
 		/* RACK TLP theshold i.e. srtt+(srtt/N) */
 		optval = rack->r_ctl.rc_tlp_threshold;
 		break;
 	case TCP_RACK_PKT_DELAY:
 		/* RACK added ms i.e. rack-rtt + reord + N */
 		optval = rack->r_ctl.rc_pkt_delay;
 		break;
 	case TCP_RACK_TLP_USE:
 		optval = rack->rack_tlp_threshold_use;
 		break;
 	case TCP_RACK_TLP_INC_VAR:
 		/* Does TLP include rtt variance in t-o */
 		optval = rack->r_ctl.rc_prr_inc_var;
 		break;
 	case TCP_RACK_IDLE_REDUCE_HIGH:
 		optval = rack->r_idle_reduce_largest;
 		break;
 	case TCP_RACK_MIN_PACE:
 		optval = rack->r_enforce_min_pace;
 		break;
 	case TCP_RACK_MIN_PACE_SEG:
 		optval = rack->r_min_pace_seg_thresh;
 		break;
 	case TCP_BBR_RACK_RTT_USE:
 		optval = rack->r_ctl.rc_rate_sample_method;
 		break;
 	case TCP_DELACK:
 		optval = tp->t_delayed_ack;
 		break;
 	case TCP_DATA_AFTER_CLOSE:
 		optval = rack->rc_allow_data_af_clo;
 		break;
 	default:
 		return (tcp_default_ctloutput(so, sopt, inp, tp));
 		break;
 	}
 	INP_WUNLOCK(inp);
 	error = sooptcopyout(sopt, &optval, sizeof optval);
 	return (error);
 }
 
 static int
 rack_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp)
 {
 	int32_t error = EINVAL;
 	struct tcp_rack *rack;
 
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	if (rack == NULL) {
 		/* Huh? */
 		goto out;
 	}
 	if (sopt->sopt_dir == SOPT_SET) {
 		return (rack_set_sockopt(so, sopt, inp, tp, rack));
 	} else if (sopt->sopt_dir == SOPT_GET) {
 		return (rack_get_sockopt(so, sopt, inp, tp, rack));
 	}
 out:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 
 struct tcp_function_block __tcp_rack = {
 	.tfb_tcp_block_name = __XSTRING(STACKNAME),
 	.tfb_tcp_output = rack_output,
 	.tfb_tcp_do_segment = rack_do_segment,
 	.tfb_tcp_ctloutput = rack_ctloutput,
 	.tfb_tcp_fb_init = rack_init,
 	.tfb_tcp_fb_fini = rack_fini,
 	.tfb_tcp_timer_stop_all = rack_stopall,
 	.tfb_tcp_timer_activate = rack_timer_activate,
 	.tfb_tcp_timer_active = rack_timer_active,
 	.tfb_tcp_timer_stop = rack_timer_stop,
 	.tfb_tcp_rexmit_tmr = rack_remxt_tmr,
 	.tfb_tcp_handoff_ok = rack_handoff_ok
 };
 
 static const char *rack_stack_names[] = {
 	__XSTRING(STACKNAME),
 #ifdef STACKALIAS
 	__XSTRING(STACKALIAS),
 #endif
 };
 
 static int
 rack_ctor(void *mem, int32_t size, void *arg, int32_t how)
 {
 	memset(mem, 0, size);
 	return (0);
 }
 
 static void
 rack_dtor(void *mem, int32_t size, void *arg)
 {
 
 }
 
 static bool rack_mod_inited = false;
 
 static int
 tcp_addrack(module_t mod, int32_t type, void *data)
 {
 	int32_t err = 0;
 	int num_stacks;
 
 	switch (type) {
 	case MOD_LOAD:
 		rack_zone = uma_zcreate(__XSTRING(MODNAME) "_map",
 		    sizeof(struct rack_sendmap),
 		    rack_ctor, rack_dtor, NULL, NULL, UMA_ALIGN_PTR, 0);
 
 		rack_pcb_zone = uma_zcreate(__XSTRING(MODNAME) "_pcb",
 		    sizeof(struct tcp_rack),
 		    rack_ctor, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
 
 		sysctl_ctx_init(&rack_sysctl_ctx);
 		rack_sysctl_root = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
 		    SYSCTL_STATIC_CHILDREN(_net_inet_tcp),
 		    OID_AUTO,
 		    __XSTRING(STACKNAME),
 		    CTLFLAG_RW, 0,
 		    "");
 		if (rack_sysctl_root == NULL) {
 			printf("Failed to add sysctl node\n");
 			err = EFAULT;
 			goto free_uma;
 		}
 		rack_init_sysctls();
 		num_stacks = nitems(rack_stack_names);
 		err = register_tcp_functions_as_names(&__tcp_rack, M_WAITOK,
 		    rack_stack_names, &num_stacks);
 		if (err) {
 			printf("Failed to register %s stack name for "
 			    "%s module\n", rack_stack_names[num_stacks],
 			    __XSTRING(MODNAME));
 			sysctl_ctx_free(&rack_sysctl_ctx);
 free_uma:
 			uma_zdestroy(rack_zone);
 			uma_zdestroy(rack_pcb_zone);
 			rack_counter_destroy();
 			printf("Failed to register rack module -- err:%d\n", err);
 			return (err);
 		}
 		rack_mod_inited = true;
 		break;
 	case MOD_QUIESCE:
 		err = deregister_tcp_functions(&__tcp_rack, true, false);
 		break;
 	case MOD_UNLOAD:
 		err = deregister_tcp_functions(&__tcp_rack, false, true);
 		if (err == EBUSY)
 			break;
 		if (rack_mod_inited) {
 			uma_zdestroy(rack_zone);
 			uma_zdestroy(rack_pcb_zone);
 			sysctl_ctx_free(&rack_sysctl_ctx);
 			rack_counter_destroy();
 			rack_mod_inited = false;
 		}
 		err = 0;
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (err);
 }
 
 static moduledata_t tcp_rack = {
 	.name = __XSTRING(MODNAME),
 	.evhand = tcp_addrack,
 	.priv = 0
 };
 
 MODULE_VERSION(MODNAME, 1);
 DECLARE_MODULE(MODNAME, tcp_rack, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
 MODULE_DEPEND(MODNAME, tcphpts, 1, 1, 1);
Index: projects/clang900-import/sys/netinet6/ip6_input.c
===================================================================
--- projects/clang900-import/sys/netinet6/ip6_input.c	(revision 352536)
+++ projects/clang900-import/sys/netinet6/ip6_input.c	(revision 352537)
@@ -1,1861 +1,1863 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/hhook.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/rss_config.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_systm.h>
 #include <net/if_llatbl.h>
 #ifdef INET
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #endif /* INET */
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/icmp6.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/mld6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/in6_rss.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <netinet6/ip6protosw.h>
 
 extern struct domain inet6domain;
 
 u_char ip6_protox[IPPROTO_MAX];
 VNET_DEFINE(struct in6_ifaddrhead, in6_ifaddrhead);
 VNET_DEFINE(struct in6_ifaddrlisthead *, in6_ifaddrhashtbl);
 VNET_DEFINE(u_long, in6_ifaddrhmask);
 
 static struct netisr_handler ip6_nh = {
 	.nh_name = "ip6",
 	.nh_handler = ip6_input,
 	.nh_proto = NETISR_IPV6,
 #ifdef RSS
 	.nh_m2cpuid = rss_soft_m2cpuid_v6,
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
 #else
 	.nh_policy = NETISR_POLICY_FLOW,
 #endif
 };
 
 static int
 sysctl_netinet6_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
 	netisr_getqlimit(&ip6_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qlimit < 1)
 		return (EINVAL);
 	return (netisr_setqlimit(&ip6_nh, qlimit));
 }
 SYSCTL_DECL(_net_inet6_ip6);
 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRQMAXLEN, intr_queue_maxlen,
     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet6_intr_queue_maxlen, "I",
     "Maximum size of the IPv6 input queue");
 
 #ifdef RSS
 static struct netisr_handler ip6_direct_nh = {
 	.nh_name = "ip6_direct",
 	.nh_handler = ip6_direct_input,
 	.nh_proto = NETISR_IPV6_DIRECT,
 	.nh_m2cpuid = rss_soft_m2cpuid_v6,
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
 };
 
 static int
 sysctl_netinet6_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
 	netisr_getqlimit(&ip6_direct_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qlimit < 1)
 		return (EINVAL);
 	return (netisr_setqlimit(&ip6_direct_nh, qlimit));
 }
 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRDQMAXLEN, intr_direct_queue_maxlen,
     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet6_intr_direct_queue_maxlen,
     "I", "Maximum size of the IPv6 direct input queue");
 
 #endif
 
 VNET_DEFINE(pfil_head_t, inet6_pfil_head);
 
 VNET_PCPUSTAT_DEFINE(struct ip6stat, ip6stat);
 VNET_PCPUSTAT_SYSINIT(ip6stat);
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(ip6stat);
 #endif /* VIMAGE */
 
 struct rmlock in6_ifaddr_lock;
 RM_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock");
 
 static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
 #ifdef PULLDOWN_TEST
 static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
 #endif
 
 /*
  * IP6 initialization: fill in IP6 protocol switch table.
  * All protocols not implemented in kernel go to raw IP6 protocol handler.
  */
 void
 ip6_init(void)
 {
 	struct pfil_head_args args;
 	struct protosw *pr;
 	int i;
 
 	TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal",
 	    &V_ip6_auto_linklocal);
 	TUNABLE_INT_FETCH("net.inet6.ip6.accept_rtadv", &V_ip6_accept_rtadv);
 	TUNABLE_INT_FETCH("net.inet6.ip6.no_radr", &V_ip6_no_radr);
 
 	CK_STAILQ_INIT(&V_in6_ifaddrhead);
 	V_in6_ifaddrhashtbl = hashinit(IN6ADDR_NHASH, M_IFADDR,
 	    &V_in6_ifaddrhmask);
 
 	/* Initialize packet filter hooks. */
 	args.pa_version = PFIL_VERSION;
 	args.pa_flags = PFIL_IN | PFIL_OUT;
 	args.pa_type = PFIL_TYPE_IP6;
 	args.pa_headname = PFIL_INET6_NAME;
 	V_inet6_pfil_head = pfil_head_register(&args);
 
 	if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET6,
 	    &V_ipsec_hhh_in[HHOOK_IPSEC_INET6],
 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register input helper hook\n",
 		    __func__);
 	if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET6,
 	    &V_ipsec_hhh_out[HHOOK_IPSEC_INET6],
 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register output helper hook\n",
 		    __func__);
 
 	scope6_init();
 	addrsel_policy_init();
 	nd6_init();
 	frag6_init();
 
 	V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
 
 	/* Skip global initialization stuff for non-default instances. */
 #ifdef VIMAGE
 	if (!IS_DEFAULT_VNET(curvnet)) {
 		netisr_register_vnet(&ip6_nh);
 #ifdef RSS
 		netisr_register_vnet(&ip6_direct_nh);
 #endif
 		return;
 	}
 #endif
 
 	pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		panic("ip6_init");
 
 	/* Initialize the entire ip6_protox[] array to IPPROTO_RAW. */
 	for (i = 0; i < IPPROTO_MAX; i++)
 		ip6_protox[i] = pr - inet6sw;
 	/*
 	 * Cycle through IP protocols and put them into the appropriate place
 	 * in ip6_protox[].
 	 */
 	for (pr = inet6domain.dom_protosw;
 	    pr < inet6domain.dom_protoswNPROTOSW; pr++)
 		if (pr->pr_domain->dom_family == PF_INET6 &&
 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
 			/* Be careful to only index valid IP protocols. */
 			if (pr->pr_protocol < IPPROTO_MAX)
 				ip6_protox[pr->pr_protocol] = pr - inet6sw;
 		}
 
 	netisr_register(&ip6_nh);
 #ifdef RSS
 	netisr_register(&ip6_direct_nh);
 #endif
 }
 
 /*
  * The protocol to be inserted into ip6_protox[] must be already registered
  * in inet6sw[], either statically or through pf_proto_register().
  */
 int
 ip6proto_register(short ip6proto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX)
 		return (EPROTONOSUPPORT);
 
 	/*
 	 * The protocol slot must not be occupied by another protocol
 	 * already.  An index pointing to IPPROTO_RAW is unused.
 	 */
 	pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip6_protox[ip6proto] != pr - inet6sw)	/* IPPROTO_RAW */
 		return (EEXIST);
 
 	/*
 	 * Find the protocol position in inet6sw[] and set the index.
 	 */
 	for (pr = inet6domain.dom_protosw;
 	    pr < inet6domain.dom_protoswNPROTOSW; pr++) {
 		if (pr->pr_domain->dom_family == PF_INET6 &&
 		    pr->pr_protocol && pr->pr_protocol == ip6proto) {
 			ip6_protox[pr->pr_protocol] = pr - inet6sw;
 			return (0);
 		}
 	}
 	return (EPROTONOSUPPORT);
 }
 
 int
 ip6proto_unregister(short ip6proto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX)
 		return (EPROTONOSUPPORT);
 
 	/* Check if the protocol was indeed registered. */
 	pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip6_protox[ip6proto] == pr - inet6sw)	/* IPPROTO_RAW */
 		return (ENOENT);
 
 	/* Reset the protocol slot to IPPROTO_RAW. */
 	ip6_protox[ip6proto] = pr - inet6sw;
 	return (0);
 }
 
 #ifdef VIMAGE
 static void
 ip6_destroy(void *unused __unused)
 {
 	struct ifaddr *ifa, *nifa;
 	struct ifnet *ifp;
 	int error;
 
 #ifdef RSS
 	netisr_unregister_vnet(&ip6_direct_nh);
 #endif
 	netisr_unregister_vnet(&ip6_nh);
 
 	pfil_head_unregister(V_inet6_pfil_head);
 	error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET6]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister input helper hook "
 		    "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET6: "
 		    "error %d returned\n", __func__, error);
 	}
 	error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET6]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister output helper hook "
 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET6: "
 		    "error %d returned\n", __func__, error);
 	}
 
 	/* Cleanup addresses. */
 	IFNET_RLOCK();
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		/* Cannot lock here - lock recursion. */
 		/* IF_ADDR_LOCK(ifp); */
 		CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) {
 
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			in6_purgeaddr(ifa);
 		}
 		/* IF_ADDR_UNLOCK(ifp); */
 		in6_ifdetach_destroy(ifp);
 		mld_domifdetach(ifp);
 		/* Make sure any routes are gone as well. */
 		rt_flushifroutes_af(ifp, AF_INET6);
 	}
 	IFNET_RUNLOCK();
 
 	nd6_destroy();
 	in6_ifattach_destroy();
 
 	hashdestroy(V_in6_ifaddrhashtbl, M_IFADDR, V_in6_ifaddrhmask);
 }
 
 VNET_SYSUNINIT(inet6, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip6_destroy, NULL);
 #endif
 
 static int
-ip6_input_hbh(struct mbuf *m, uint32_t *plen, uint32_t *rtalert, int *off,
+ip6_input_hbh(struct mbuf **mp, uint32_t *plen, uint32_t *rtalert, int *off,
     int *nxt, int *ours)
 {
+	struct mbuf *m;
 	struct ip6_hdr *ip6;
 	struct ip6_hbh *hbh;
 
-	if (ip6_hopopts_input(plen, rtalert, &m, off)) {
+	if (ip6_hopopts_input(plen, rtalert, mp, off)) {
 #if 0	/*touches NULL pointer*/
-		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
+		in6_ifstat_inc((*mp)->m_pkthdr.rcvif, ifs6_in_discard);
 #endif
 		goto out;	/* m have already been freed */
 	}
 
 	/* adjust pointer */
+	m = *mp;
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * if the payload length field is 0 and the next header field
 	 * indicates Hop-by-Hop Options header, then a Jumbo Payload
 	 * option MUST be included.
 	 */
 	if (ip6->ip6_plen == 0 && *plen == 0) {
 		/*
 		 * Note that if a valid jumbo payload option is
 		 * contained, ip6_hopopts_input() must set a valid
 		 * (non-zero) payload length to the variable plen.
 		 */
 		IP6STAT_INC(ip6s_badoptions);
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
 		icmp6_error(m, ICMP6_PARAM_PROB,
 			    ICMP6_PARAMPROB_HEADER,
 			    (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
 		goto out;
 	}
 #ifndef PULLDOWN_TEST
 	/* ip6_hopopts_input() ensures that mbuf is contiguous */
 	hbh = (struct ip6_hbh *)(ip6 + 1);
 #else
 	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
 		sizeof(struct ip6_hbh));
 	if (hbh == NULL) {
 		IP6STAT_INC(ip6s_tooshort);
 		goto out;
 	}
 #endif
 	*nxt = hbh->ip6h_nxt;
 
 	/*
 	 * If we are acting as a router and the packet contains a
 	 * router alert option, see if we know the option value.
 	 * Currently, we only support the option value for MLD, in which
 	 * case we should pass the packet to the multicast routing
 	 * daemon.
 	 */
 	if (*rtalert != ~0) {
 		switch (*rtalert) {
 		case IP6OPT_RTALERT_MLD:
 			if (V_ip6_forwarding)
 				*ours = 1;
 			break;
 		default:
 			/*
 			 * RFC2711 requires unrecognized values must be
 			 * silently ignored.
 			 */
 			break;
 		}
 	}
 
 	return (0);
 
 out:
 	return (1);
 }
 
 #ifdef RSS
 /*
  * IPv6 direct input routine.
  *
  * This is called when reinjecting completed fragments where
  * all of the previous checking and book-keeping has been done.
  */
 void
 ip6_direct_input(struct mbuf *m)
 {
 	int off, nxt;
 	int nest;
 	struct m_tag *mtag;
 	struct ip6_direct_ctx *ip6dc;
 
 	mtag = m_tag_locate(m, MTAG_ABI_IPV6, IPV6_TAG_DIRECT, NULL);
 	KASSERT(mtag != NULL, ("Reinjected packet w/o direct ctx tag!"));
 
 	ip6dc = (struct ip6_direct_ctx *)(mtag + 1);
 	nxt = ip6dc->ip6dc_nxt;
 	off = ip6dc->ip6dc_off;
 
 	nest = 0;
 
 	m_tag_delete(m, mtag);
 
 	while (nxt != IPPROTO_DONE) {
 		if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
 			IP6STAT_INC(ip6s_toomanyhdr);
 			goto bad;
 		}
 
 		/*
 		 * protection against faulty packet - there should be
 		 * more sanity checks in header chain processing.
 		 */
 		if (m->m_pkthdr.len < off) {
 			IP6STAT_INC(ip6s_tooshort);
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
 			goto bad;
 		}
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 		if (IPSEC_ENABLED(ipv6)) {
 			if (IPSEC_INPUT(ipv6, m, off, nxt) != 0)
 				return;
 		}
 #endif /* IPSEC */
 
 		nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
 	}
 	return;
 bad:
 	m_freem(m);
 }
 #endif
 
 void
 ip6_input(struct mbuf *m)
 {
 	struct in6_addr odst;
 	struct ip6_hdr *ip6;
 	struct in6_ifaddr *ia;
 	struct ifnet *rcvif;
 	u_int32_t plen;
 	u_int32_t rtalert = ~0;
 	int off = sizeof(struct ip6_hdr), nest;
 	int nxt, ours = 0;
 	int srcrt = 0;
 
 	/*
 	 * Drop the packet if IPv6 operation is disabled on the interface.
 	 */
 	rcvif = m->m_pkthdr.rcvif;
 	if ((ND_IFINFO(rcvif)->flags & ND6_IFF_IFDISABLED))
 		goto bad;
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/*
 	 * should the inner packet be considered authentic?
 	 * see comment in ah4_input().
 	 * NB: m cannot be NULL when passed to the input routine
 	 */
 
 	m->m_flags &= ~M_AUTHIPHDR;
 	m->m_flags &= ~M_AUTHIPDGM;
 
 #endif /* IPSEC */
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		/*
 		 * Firewall changed destination to local.
 		 */
 		ip6 = mtod(m, struct ip6_hdr *);
 		goto passin;
 	}
 
 	/*
 	 * mbuf statistics
 	 */
 	if (m->m_flags & M_EXT) {
 		if (m->m_next)
 			IP6STAT_INC(ip6s_mext2m);
 		else
 			IP6STAT_INC(ip6s_mext1);
 	} else {
 		if (m->m_next) {
 			if (m->m_flags & M_LOOP) {
 				IP6STAT_INC(ip6s_m2m[V_loif->if_index]);
 			} else if (rcvif->if_index < IP6S_M2MMAX)
 				IP6STAT_INC(ip6s_m2m[rcvif->if_index]);
 			else
 				IP6STAT_INC(ip6s_m2m[0]);
 		} else
 			IP6STAT_INC(ip6s_m1);
 	}
 
 	in6_ifstat_inc(rcvif, ifs6_in_receive);
 	IP6STAT_INC(ip6s_total);
 
 #ifndef PULLDOWN_TEST
 	/*
 	 * L2 bridge code and some other code can return mbuf chain
 	 * that does not conform to KAME requirement.  too bad.
 	 * XXX: fails to join if interface MTU > MCLBYTES.  jumbogram?
 	 */
 	if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
 		struct mbuf *n;
 
 		if (m->m_pkthdr.len > MHLEN)
 			n = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		else
 			n = m_gethdr(M_NOWAIT, MT_DATA);
 		if (n == NULL)
 			goto bad;
 
 		m_move_pkthdr(n, m);
 		m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
 		n->m_len = n->m_pkthdr.len;
 		m_freem(m);
 		m = n;
 	}
 	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /* nothing */);
 #endif
 
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
 			IP6STAT_INC(ip6s_toosmall);
 			in6_ifstat_inc(rcvif, ifs6_in_hdrerr);
 			goto bad;
 		}
 	}
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 		IP6STAT_INC(ip6s_badvers);
 		in6_ifstat_inc(rcvif, ifs6_in_hdrerr);
 		goto bad;
 	}
 
 	IP6STAT_INC(ip6s_nxthist[ip6->ip6_nxt]);
 	IP_PROBE(receive, NULL, NULL, ip6, rcvif, NULL, ip6);
 
 	/*
 	 * Check against address spoofing/corruption.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) {
 		/*
 		 * XXX: "badscope" is not very suitable for a multicast source.
 		 */
 		IP6STAT_INC(ip6s_badscope);
 		in6_ifstat_inc(rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) &&
 	    !(m->m_flags & M_LOOP)) {
 		/*
 		 * In this case, the packet should come from the loopback
 		 * interface.  However, we cannot just check the if_flags,
 		 * because ip6_mloopback() passes the "actual" interface
 		 * as the outgoing/incoming interface.
 		 */
 		IP6STAT_INC(ip6s_badscope);
 		in6_ifstat_inc(rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
 	    IPV6_ADDR_MC_SCOPE(&ip6->ip6_dst) == 0) {
 		/*
 		 * RFC4291 2.7:
 		 * Nodes must not originate a packet to a multicast address
 		 * whose scop field contains the reserved value 0; if such
 		 * a packet is received, it must be silently dropped.
 		 */
 		IP6STAT_INC(ip6s_badscope);
 		in6_ifstat_inc(rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 #ifdef ALTQ
 	if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) {
 		/* packet is dropped by traffic conditioner */
 		return;
 	}
 #endif
 	/*
 	 * The following check is not documented in specs.  A malicious
 	 * party may be able to use IPv4 mapped addr to confuse tcp/udp stack
 	 * and bypass security checks (act as if it was from 127.0.0.1 by using
 	 * IPv6 src ::ffff:127.0.0.1).  Be cautious.
 	 *
 	 * This check chokes if we are in an SIIT cloud.  As none of BSDs
 	 * support IPv4-less kernel compilation, we cannot support SIIT
 	 * environment at all.  So, it makes more sense for us to reject any
 	 * malicious packets for non-SIIT environment, than try to do a
 	 * partial support for SIIT environment.
 	 */
 	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
 		IP6STAT_INC(ip6s_badscope);
 		in6_ifstat_inc(rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 #if 0
 	/*
 	 * Reject packets with IPv4 compatible addresses (auto tunnel).
 	 *
 	 * The code forbids auto tunnel relay case in RFC1933 (the check is
 	 * stronger than RFC1933).  We may want to re-enable it if mech-xx
 	 * is revised to forbid relaying case.
 	 */
 	if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
 		IP6STAT_INC(ip6s_badscope);
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 #endif
 	/*
 	 * Try to forward the packet, but if we fail continue.
 	 * ip6_tryforward() does not generate redirects, so fall
 	 * through to normal processing if redirects are required.
 	 * ip6_tryforward() does inbound and outbound packet firewall
 	 * processing. If firewall has decided that destination becomes
 	 * our local address, it sets M_FASTFWD_OURS flag. In this
 	 * case skip another inbound firewall processing and update
 	 * ip6 pointer.
 	 */
 	if (V_ip6_forwarding != 0 && V_ip6_sendredirects == 0
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	    && (!IPSEC_ENABLED(ipv6) ||
 	    IPSEC_CAPS(ipv6, m, IPSEC_CAP_OPERABLE) == 0)
 #endif
 	    ) {
 		if ((m = ip6_tryforward(m)) == NULL)
 			return;
 		if (m->m_flags & M_FASTFWD_OURS) {
 			ip6 = mtod(m, struct ip6_hdr *);
 			goto passin;
 		}
 	}
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/*
 	 * Bypass packet filtering for packets previously handled by IPsec.
 	 */
 	if (IPSEC_ENABLED(ipv6) &&
 	    IPSEC_CAPS(ipv6, m, IPSEC_CAP_BYPASS_FILTER) != 0)
 			goto passin;
 #endif
 	/*
 	 * Run through list of hooks for input packets.
 	 *
 	 * NB: Beware of the destination address changing
 	 *     (e.g. by NAT rewriting).  When this happens,
 	 *     tell ip6_forward to do the right thing.
 	 */
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED_IN(V_inet6_pfil_head))
 		goto passin;
 
 	odst = ip6->ip6_dst;
 	if (pfil_run_hooks(V_inet6_pfil_head, &m, m->m_pkthdr.rcvif, PFIL_IN,
 	    NULL) != PFIL_PASS)
 		return;
 	ip6 = mtod(m, struct ip6_hdr *);
 	srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
 	if ((m->m_flags & (M_IP6_NEXTHOP | M_FASTFWD_OURS)) == M_IP6_NEXTHOP &&
 	    m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
 		/*
 		 * Directly ship the packet on.  This allows forwarding
 		 * packets originally destined to us to some other directly
 		 * connected host.
 		 */
 		ip6_forward(m, 1);
 		return;
 	}
 
 passin:
 	/*
 	 * Disambiguate address scope zones (if there is ambiguity).
 	 * We first make sure that the original source or destination address
 	 * is not in our internal form for scoped addresses.  Such addresses
 	 * are not necessarily invalid spec-wise, but we cannot accept them due
 	 * to the usage conflict.
 	 * in6_setscope() then also checks and rejects the cases where src or
 	 * dst are the loopback address and the receiving interface
 	 * is not loopback.
 	 */
 	if (in6_clearscope(&ip6->ip6_src) || in6_clearscope(&ip6->ip6_dst)) {
 		IP6STAT_INC(ip6s_badscope); /* XXX */
 		goto bad;
 	}
 	if (in6_setscope(&ip6->ip6_src, rcvif, NULL) ||
 	    in6_setscope(&ip6->ip6_dst, rcvif, NULL)) {
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 	if (m->m_flags & M_FASTFWD_OURS) {
 		m->m_flags &= ~M_FASTFWD_OURS;
 		ours = 1;
 		goto hbhcheck;
 	}
 	/*
 	 * Multicast check. Assume packet is for us to avoid
 	 * prematurely taking locks.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		ours = 1;
 		in6_ifstat_inc(rcvif, ifs6_in_mcast);
 		goto hbhcheck;
 	}
 	/*
 	 * Unicast check
 	 * XXX: For now we keep link-local IPv6 addresses with embedded
 	 *      scope zone id, therefore we use zero zoneid here.
 	 */
 	ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
 	if (ia != NULL) {
 		if (ia->ia6_flags & IN6_IFF_NOTREADY) {
 			char ip6bufs[INET6_ADDRSTRLEN];
 			char ip6bufd[INET6_ADDRSTRLEN];
 			/* address is not ready, so discard the packet. */
 			nd6log((LOG_INFO,
 			    "ip6_input: packet to an unready address %s->%s\n",
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst)));
 			ifa_free(&ia->ia_ifa);
 			goto bad;
 		}
 		/* Count the packet in the ip address stats */
 		counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 		counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len);
 		ifa_free(&ia->ia_ifa);
 		ours = 1;
 		goto hbhcheck;
 	}
 
 	/*
 	 * Now there is no reason to process the packet if it's not our own
 	 * and we're not a router.
 	 */
 	if (!V_ip6_forwarding) {
 		IP6STAT_INC(ip6s_cantforward);
 		goto bad;
 	}
 
   hbhcheck:
 	/*
 	 * Process Hop-by-Hop options header if it's contained.
 	 * m may be modified in ip6_hopopts_input().
 	 * If a JumboPayload option is included, plen will also be modified.
 	 */
 	plen = (u_int32_t)ntohs(ip6->ip6_plen);
 	if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
-		if (ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours) != 0)
+		if (ip6_input_hbh(&m, &plen, &rtalert, &off, &nxt, &ours) != 0)
 			return;
 	} else
 		nxt = ip6->ip6_nxt;
 
 	/*
 	 * Use mbuf flags to propagate Router Alert option to
 	 * ICMPv6 layer, as hop-by-hop options have been stripped.
 	 */
 	if (rtalert != ~0)
 		m->m_flags |= M_RTALERT_MLD;
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IPv6 header would have us expect.
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
 		IP6STAT_INC(ip6s_tooshort);
 		in6_ifstat_inc(rcvif, ifs6_in_truncated);
 		goto bad;
 	}
 	if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = sizeof(struct ip6_hdr) + plen;
 			m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
 		} else
 			m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len);
 	}
 
 	/*
 	 * Forward if desirable.
 	 */
 	if (V_ip6_mrouter &&
 	    IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		/*
 		 * If we are acting as a multicast router, all
 		 * incoming multicast packets are passed to the
 		 * kernel-level multicast forwarding function.
 		 * The packet is returned (relatively) intact; if
 		 * ip6_mforward() returns a non-zero value, the packet
 		 * must be discarded, else it may be accepted below.
 		 *
 		 * XXX TODO: Check hlim and multicast scope here to avoid
 		 * unnecessarily calling into ip6_mforward().
 		 */
 		if (ip6_mforward && ip6_mforward(ip6, rcvif, m)) {
 			IP6STAT_INC(ip6s_cantforward);
 			goto bad;
 		}
 	} else if (!ours) {
 		ip6_forward(m, srcrt);
 		return;
 	}
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * Malicious party may be able to use IPv4 mapped addr to confuse
 	 * tcp/udp stack and bypass security checks (act as if it was from
 	 * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1).  Be cautious.
 	 *
 	 * For SIIT end node behavior, you may want to disable the check.
 	 * However, you will  become vulnerable to attacks using IPv4 mapped
 	 * source.
 	 */
 	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
 		IP6STAT_INC(ip6s_badscope);
 		in6_ifstat_inc(rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 
 	/*
 	 * Tell launch routine the next header
 	 */
 	IP6STAT_INC(ip6s_delivered);
 	in6_ifstat_inc(rcvif, ifs6_in_deliver);
 	nest = 0;
 
 	while (nxt != IPPROTO_DONE) {
 		if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
 			IP6STAT_INC(ip6s_toomanyhdr);
 			goto bad;
 		}
 
 		/*
 		 * protection against faulty packet - there should be
 		 * more sanity checks in header chain processing.
 		 */
 		if (m->m_pkthdr.len < off) {
 			IP6STAT_INC(ip6s_tooshort);
 			in6_ifstat_inc(rcvif, ifs6_in_truncated);
 			goto bad;
 		}
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 		if (IPSEC_ENABLED(ipv6)) {
 			if (IPSEC_INPUT(ipv6, m, off, nxt) != 0)
 				return;
 		}
 #endif /* IPSEC */
 
 		nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
 	}
 	return;
 bad:
 	in6_ifstat_inc(rcvif, ifs6_in_discard);
 	if (m != NULL)
 		m_freem(m);
 }
 
 /*
  * Hop-by-Hop options header processing. If a valid jumbo payload option is
  * included, the real payload length will be stored in plenp.
  *
  * rtalertp - XXX: should be stored more smart way
  */
 static int
 ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp,
     struct mbuf **mp, int *offp)
 {
 	struct mbuf *m = *mp;
 	int off = *offp, hbhlen;
 	struct ip6_hbh *hbh;
 
 	/* validation of the length of the header */
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1);
 	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
 	hbhlen = (hbh->ip6h_len + 1) << 3;
 
 	IP6_EXTHDR_CHECK(m, off, hbhlen, -1);
 	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
 #else
 	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m,
 		sizeof(struct ip6_hdr), sizeof(struct ip6_hbh));
 	if (hbh == NULL) {
 		IP6STAT_INC(ip6s_tooshort);
 		return -1;
 	}
 	hbhlen = (hbh->ip6h_len + 1) << 3;
 	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
 		hbhlen);
 	if (hbh == NULL) {
 		IP6STAT_INC(ip6s_tooshort);
 		return -1;
 	}
 #endif
 	off += hbhlen;
 	hbhlen -= sizeof(struct ip6_hbh);
 	if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
 				hbhlen, rtalertp, plenp) < 0)
 		return (-1);
 
 	*offp = off;
 	*mp = m;
 	return (0);
 }
 
 /*
  * Search header for all Hop-by-hop options and process each option.
  * This function is separate from ip6_hopopts_input() in order to
  * handle a case where the sending node itself process its hop-by-hop
  * options header. In such a case, the function is called from ip6_output().
  *
  * The function assumes that hbh header is located right after the IPv6 header
  * (RFC2460 p7), opthead is pointer into data content in m, and opthead to
  * opthead + hbhlen is located in contiguous memory region.
  */
 int
 ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
     u_int32_t *rtalertp, u_int32_t *plenp)
 {
 	struct ip6_hdr *ip6;
 	int optlen = 0;
 	u_int8_t *opt = opthead;
 	u_int16_t rtalert_val;
 	u_int32_t jumboplen;
 	const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh);
 
 	for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) {
 		switch (*opt) {
 		case IP6OPT_PAD1:
 			optlen = 1;
 			break;
 		case IP6OPT_PADN:
 			if (hbhlen < IP6OPT_MINLEN) {
 				IP6STAT_INC(ip6s_toosmall);
 				goto bad;
 			}
 			optlen = *(opt + 1) + 2;
 			break;
 		case IP6OPT_ROUTER_ALERT:
 			/* XXX may need check for alignment */
 			if (hbhlen < IP6OPT_RTALERT_LEN) {
 				IP6STAT_INC(ip6s_toosmall);
 				goto bad;
 			}
 			if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
 				/* XXX stat */
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 1 - opthead);
 				return (-1);
 			}
 			optlen = IP6OPT_RTALERT_LEN;
 			bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2);
 			*rtalertp = ntohs(rtalert_val);
 			break;
 		case IP6OPT_JUMBO:
 			/* XXX may need check for alignment */
 			if (hbhlen < IP6OPT_JUMBO_LEN) {
 				IP6STAT_INC(ip6s_toosmall);
 				goto bad;
 			}
 			if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
 				/* XXX stat */
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 1 - opthead);
 				return (-1);
 			}
 			optlen = IP6OPT_JUMBO_LEN;
 
 			/*
 			 * IPv6 packets that have non 0 payload length
 			 * must not contain a jumbo payload option.
 			 */
 			ip6 = mtod(m, struct ip6_hdr *);
 			if (ip6->ip6_plen) {
 				IP6STAT_INC(ip6s_badoptions);
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt - opthead);
 				return (-1);
 			}
 
 			/*
 			 * We may see jumbolen in unaligned location, so
 			 * we'd need to perform bcopy().
 			 */
 			bcopy(opt + 2, &jumboplen, sizeof(jumboplen));
 			jumboplen = (u_int32_t)htonl(jumboplen);
 
 #if 1
 			/*
 			 * if there are multiple jumbo payload options,
 			 * *plenp will be non-zero and the packet will be
 			 * rejected.
 			 * the behavior may need some debate in ipngwg -
 			 * multiple options does not make sense, however,
 			 * there's no explicit mention in specification.
 			 */
 			if (*plenp != 0) {
 				IP6STAT_INC(ip6s_badoptions);
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 2 - opthead);
 				return (-1);
 			}
 #endif
 
 			/*
 			 * jumbo payload length must be larger than 65535.
 			 */
 			if (jumboplen <= IPV6_MAXPACKET) {
 				IP6STAT_INC(ip6s_badoptions);
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 2 - opthead);
 				return (-1);
 			}
 			*plenp = jumboplen;
 
 			break;
 		default:		/* unknown option */
 			if (hbhlen < IP6OPT_MINLEN) {
 				IP6STAT_INC(ip6s_toosmall);
 				goto bad;
 			}
 			optlen = ip6_unknown_opt(opt, m,
 			    erroff + opt - opthead);
 			if (optlen == -1)
 				return (-1);
 			optlen += 2;
 			break;
 		}
 	}
 
 	return (0);
 
   bad:
 	m_freem(m);
 	return (-1);
 }
 
 /*
  * Unknown option processing.
  * The third argument `off' is the offset from the IPv6 header to the option,
  * which is necessary if the IPv6 header the and option header and IPv6 header
  * is not contiguous in order to return an ICMPv6 error.
  */
 int
 ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off)
 {
 	struct ip6_hdr *ip6;
 
 	switch (IP6OPT_TYPE(*optp)) {
 	case IP6OPT_TYPE_SKIP: /* ignore the option */
 		return ((int)*(optp + 1));
 	case IP6OPT_TYPE_DISCARD:	/* silently discard */
 		m_freem(m);
 		return (-1);
 	case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
 		IP6STAT_INC(ip6s_badoptions);
 		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
 		return (-1);
 	case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
 		IP6STAT_INC(ip6s_badoptions);
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 		    (m->m_flags & (M_BCAST|M_MCAST)))
 			m_freem(m);
 		else
 			icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_OPTION, off);
 		return (-1);
 	}
 
 	m_freem(m);		/* XXX: NOTREACHED */
 	return (-1);
 }
 
 /*
  * Create the "control" list for this pcb.
  * These functions will not modify mbuf chain at all.
  *
  * With KAME mbuf chain restriction:
  * The routine will be called from upper layer handlers like tcp6_input().
  * Thus the routine assumes that the caller (tcp6_input) have already
  * called IP6_EXTHDR_CHECK() and all the extension headers are located in the
  * very first mbuf on the mbuf chain.
  *
  * ip6_savecontrol_v4 will handle those options that are possible to be
  * set on a v4-mapped socket.
  * ip6_savecontrol will directly call ip6_savecontrol_v4 to handle those
  * options and handle the v6-only ones itself.
  */
 struct mbuf **
 ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp,
     int *v4only)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 
 #ifdef SO_TIMESTAMP
 	if ((inp->inp_socket->so_options & SO_TIMESTAMP) != 0) {
 		union {
 			struct timeval tv;
 			struct bintime bt;
 			struct timespec ts;
 		} t;
 		struct bintime boottimebin, bt1;
 		struct timespec ts1;
 		bool stamped;
 
 		stamped = false;
 		switch (inp->inp_socket->so_ts_clock) {
 		case SO_TS_REALTIME_MICRO:
 			if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 			    M_TSTMP)) {
 				mbuf_tstmp2timespec(m, &ts1);
 				timespec2bintime(&ts1, &bt1);
 				getboottimebin(&boottimebin);
 				bintime_add(&bt1, &boottimebin);
 				bintime2timeval(&bt1, &t.tv);
 			} else {
 				microtime(&t.tv);
 			}
 			*mp = sbcreatecontrol((caddr_t) &t.tv, sizeof(t.tv),
 			    SCM_TIMESTAMP, SOL_SOCKET);
 			if (*mp != NULL) {
 				mp = &(*mp)->m_next;
 				stamped = true;
 			}
 			break;
 
 		case SO_TS_BINTIME:
 			if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 			    M_TSTMP)) {
 				mbuf_tstmp2timespec(m, &ts1);
 				timespec2bintime(&ts1, &t.bt);
 				getboottimebin(&boottimebin);
 				bintime_add(&t.bt, &boottimebin);
 			} else {
 				bintime(&t.bt);
 			}
 			*mp = sbcreatecontrol((caddr_t)&t.bt, sizeof(t.bt),
 			    SCM_BINTIME, SOL_SOCKET);
 			if (*mp != NULL) {
 				mp = &(*mp)->m_next;
 				stamped = true;
 			}
 			break;
 
 		case SO_TS_REALTIME:
 			if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 			    M_TSTMP)) {
 				mbuf_tstmp2timespec(m, &t.ts);
 				getboottimebin(&boottimebin);
 				bintime2timespec(&boottimebin, &ts1);
 				timespecadd(&t.ts, &ts1, &t.ts);
 			} else {
 				nanotime(&t.ts);
 			}
 			*mp = sbcreatecontrol((caddr_t)&t.ts, sizeof(t.ts),
 			    SCM_REALTIME, SOL_SOCKET);
 			if (*mp != NULL) {
 				mp = &(*mp)->m_next;
 				stamped = true;
 			}
 			break;
 
 		case SO_TS_MONOTONIC:
 			if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 			    M_TSTMP))
 				mbuf_tstmp2timespec(m, &t.ts);
 			else
 				nanouptime(&t.ts);
 			*mp = sbcreatecontrol((caddr_t)&t.ts, sizeof(t.ts),
 			    SCM_MONOTONIC, SOL_SOCKET);
 			if (*mp != NULL) {
 				mp = &(*mp)->m_next;
 				stamped = true;
 			}
 			break;
 
 		default:
 			panic("unknown (corrupted) so_ts_clock");
 		}
 		if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) ==
 		    (M_PKTHDR | M_TSTMP)) {
 			struct sock_timestamp_info sti;
 
 			bzero(&sti, sizeof(sti));
 			sti.st_info_flags = ST_INFO_HW;
 			if ((m->m_flags & M_TSTMP_HPREC) != 0)
 				sti.st_info_flags |= ST_INFO_HW_HPREC;
 			*mp = sbcreatecontrol((caddr_t)&sti, sizeof(sti),
 			    SCM_TIME_INFO, SOL_SOCKET);
 			if (*mp != NULL)
 				mp = &(*mp)->m_next;
 		}
 	}
 #endif
 
 #define IS2292(inp, x, y)	(((inp)->inp_flags & IN6P_RFC2292) ? (x) : (y))
 	/* RFC 2292 sec. 5 */
 	if ((inp->inp_flags & IN6P_PKTINFO) != 0) {
 		struct in6_pktinfo pi6;
 
 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 #ifdef INET
 			struct ip *ip;
 
 			ip = mtod(m, struct ip *);
 			pi6.ipi6_addr.s6_addr32[0] = 0;
 			pi6.ipi6_addr.s6_addr32[1] = 0;
 			pi6.ipi6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
 			pi6.ipi6_addr.s6_addr32[3] = ip->ip_dst.s_addr;
 #else
 			/* We won't hit this code */
 			bzero(&pi6.ipi6_addr, sizeof(struct in6_addr));
 #endif
 		} else {	
 			bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr));
 			in6_clearscope(&pi6.ipi6_addr);	/* XXX */
 		}
 		pi6.ipi6_ifindex =
 		    (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0;
 
 		*mp = sbcreatecontrol((caddr_t) &pi6,
 		    sizeof(struct in6_pktinfo),
 		    IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if ((inp->inp_flags & IN6P_HOPLIMIT) != 0) {
 		int hlim;
 
 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 #ifdef INET
 			struct ip *ip;
 
 			ip = mtod(m, struct ip *);
 			hlim = ip->ip_ttl;
 #else
 			/* We won't hit this code */
 			hlim = 0;
 #endif
 		} else {
 			hlim = ip6->ip6_hlim & 0xff;
 		}
 		*mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int),
 		    IS2292(inp, IPV6_2292HOPLIMIT, IPV6_HOPLIMIT),
 		    IPPROTO_IPV6);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if ((inp->inp_flags & IN6P_TCLASS) != 0) {
 		int tclass;
 
 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 #ifdef INET
 			struct ip *ip;
 
 			ip = mtod(m, struct ip *);
 			tclass = ip->ip_tos;
 #else
 			/* We won't hit this code */
 			tclass = 0;
 #endif
 		} else {
 			u_int32_t flowinfo;
 
 			flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
 			flowinfo >>= 20;
 			tclass = flowinfo & 0xff;
 		}
 		*mp = sbcreatecontrol((caddr_t) &tclass, sizeof(int),
 		    IPV6_TCLASS, IPPROTO_IPV6);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if (v4only != NULL) {
 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 			*v4only = 1;
 		} else {
 			*v4only = 0;
 		}
 	}
 
 	return (mp);
 }
 
 void
 ip6_savecontrol(struct inpcb *inp, struct mbuf *m, struct mbuf **mp)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	int v4only = 0;
 
 	mp = ip6_savecontrol_v4(inp, m, mp, &v4only);
 	if (v4only)
 		return;
 
 	/*
 	 * IPV6_HOPOPTS socket option.  Recall that we required super-user
 	 * privilege for the option (see ip6_ctloutput), but it might be too
 	 * strict, since there might be some hop-by-hop options which can be
 	 * returned to normal user.
 	 * See also RFC 2292 section 6 (or RFC 3542 section 8).
 	 */
 	if ((inp->inp_flags & IN6P_HOPOPTS) != 0) {
 		/*
 		 * Check if a hop-by-hop options header is contatined in the
 		 * received packet, and if so, store the options as ancillary
 		 * data. Note that a hop-by-hop options header must be
 		 * just after the IPv6 header, which is assured through the
 		 * IPv6 input processing.
 		 */
 		if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
 			struct ip6_hbh *hbh;
 			int hbhlen = 0;
 #ifdef PULLDOWN_TEST
 			struct mbuf *ext;
 #endif
 
 #ifndef PULLDOWN_TEST
 			hbh = (struct ip6_hbh *)(ip6 + 1);
 			hbhlen = (hbh->ip6h_len + 1) << 3;
 #else
 			ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
 			    ip6->ip6_nxt);
 			if (ext == NULL) {
 				IP6STAT_INC(ip6s_tooshort);
 				return;
 			}
 			hbh = mtod(ext, struct ip6_hbh *);
 			hbhlen = (hbh->ip6h_len + 1) << 3;
 			if (hbhlen != ext->m_len) {
 				m_freem(ext);
 				IP6STAT_INC(ip6s_tooshort);
 				return;
 			}
 #endif
 
 			/*
 			 * XXX: We copy the whole header even if a
 			 * jumbo payload option is included, the option which
 			 * is to be removed before returning according to
 			 * RFC2292.
 			 * Note: this constraint is removed in RFC3542
 			 */
 			*mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
 			    IS2292(inp, IPV6_2292HOPOPTS, IPV6_HOPOPTS),
 			    IPPROTO_IPV6);
 			if (*mp)
 				mp = &(*mp)->m_next;
 #ifdef PULLDOWN_TEST
 			m_freem(ext);
 #endif
 		}
 	}
 
 	if ((inp->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
 		int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
 
 		/*
 		 * Search for destination options headers or routing
 		 * header(s) through the header chain, and stores each
 		 * header as ancillary data.
 		 * Note that the order of the headers remains in
 		 * the chain of ancillary data.
 		 */
 		while (1) {	/* is explicit loop prevention necessary? */
 			struct ip6_ext *ip6e = NULL;
 			int elen;
 #ifdef PULLDOWN_TEST
 			struct mbuf *ext = NULL;
 #endif
 
 			/*
 			 * if it is not an extension header, don't try to
 			 * pull it from the chain.
 			 */
 			switch (nxt) {
 			case IPPROTO_DSTOPTS:
 			case IPPROTO_ROUTING:
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_AH: /* is it possible? */
 				break;
 			default:
 				goto loopend;
 			}
 
 #ifndef PULLDOWN_TEST
 			if (off + sizeof(*ip6e) > m->m_len)
 				goto loopend;
 			ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off);
 			if (nxt == IPPROTO_AH)
 				elen = (ip6e->ip6e_len + 2) << 2;
 			else
 				elen = (ip6e->ip6e_len + 1) << 3;
 			if (off + elen > m->m_len)
 				goto loopend;
 #else
 			ext = ip6_pullexthdr(m, off, nxt);
 			if (ext == NULL) {
 				IP6STAT_INC(ip6s_tooshort);
 				return;
 			}
 			ip6e = mtod(ext, struct ip6_ext *);
 			if (nxt == IPPROTO_AH)
 				elen = (ip6e->ip6e_len + 2) << 2;
 			else
 				elen = (ip6e->ip6e_len + 1) << 3;
 			if (elen != ext->m_len) {
 				m_freem(ext);
 				IP6STAT_INC(ip6s_tooshort);
 				return;
 			}
 #endif
 
 			switch (nxt) {
 			case IPPROTO_DSTOPTS:
 				if (!(inp->inp_flags & IN6P_DSTOPTS))
 					break;
 
 				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
 				    IS2292(inp,
 					IPV6_2292DSTOPTS, IPV6_DSTOPTS),
 				    IPPROTO_IPV6);
 				if (*mp)
 					mp = &(*mp)->m_next;
 				break;
 			case IPPROTO_ROUTING:
 				if (!(inp->inp_flags & IN6P_RTHDR))
 					break;
 
 				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
 				    IS2292(inp, IPV6_2292RTHDR, IPV6_RTHDR),
 				    IPPROTO_IPV6);
 				if (*mp)
 					mp = &(*mp)->m_next;
 				break;
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_AH: /* is it possible? */
 				break;
 
 			default:
 				/*
 				 * other cases have been filtered in the above.
 				 * none will visit this case.  here we supply
 				 * the code just in case (nxt overwritten or
 				 * other cases).
 				 */
 #ifdef PULLDOWN_TEST
 				m_freem(ext);
 #endif
 				goto loopend;
 
 			}
 
 			/* proceed with the next header. */
 			off += elen;
 			nxt = ip6e->ip6e_nxt;
 			ip6e = NULL;
 #ifdef PULLDOWN_TEST
 			m_freem(ext);
 			ext = NULL;
 #endif
 		}
 	  loopend:
 		;
 	}
 
 	if (inp->inp_flags2 & INP_RECVFLOWID) {
 		uint32_t flowid, flow_type;
 
 		flowid = m->m_pkthdr.flowid;
 		flow_type = M_HASHTYPE_GET(m);
 
 		/*
 		 * XXX should handle the failure of one or the
 		 * other - don't populate both?
 		 */
 		*mp = sbcreatecontrol((caddr_t) &flowid,
 		    sizeof(uint32_t), IPV6_FLOWID, IPPROTO_IPV6);
 		if (*mp)
 			mp = &(*mp)->m_next;
 		*mp = sbcreatecontrol((caddr_t) &flow_type,
 		    sizeof(uint32_t), IPV6_FLOWTYPE, IPPROTO_IPV6);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 #ifdef	RSS
 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
 		uint32_t flowid, flow_type;
 		uint32_t rss_bucketid;
 
 		flowid = m->m_pkthdr.flowid;
 		flow_type = M_HASHTYPE_GET(m);
 
 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
 			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
 			   sizeof(uint32_t), IPV6_RSSBUCKETID, IPPROTO_IPV6);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 	}
 #endif
 
 }
 #undef IS2292
 
 void
 ip6_notify_pmtu(struct inpcb *inp, struct sockaddr_in6 *dst, u_int32_t mtu)
 {
 	struct socket *so;
 	struct mbuf *m_mtu;
 	struct ip6_mtuinfo mtuctl;
 
 	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 	/*
 	 * Notify the error by sending IPV6_PATHMTU ancillary data if
 	 * application wanted to know the MTU value.
 	 * NOTE: we notify disconnected sockets, because some udp
 	 * applications keep sending sockets disconnected.
 	 * NOTE: our implementation doesn't notify connected sockets that has
 	 * foreign address that is different than given destination addresses
 	 * (this is permitted by RFC 3542).
 	 */
 	if ((inp->inp_flags & IN6P_MTU) == 0 || (
 	    !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
 	    !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &dst->sin6_addr)))
 		return;
 
 	mtuctl.ip6m_mtu = mtu;
 	mtuctl.ip6m_addr = *dst;
 	if (sa6_recoverscope(&mtuctl.ip6m_addr))
 		return;
 
 	if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl),
 	    IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
 		return;
 
 	so =  inp->inp_socket;
 	if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu)
 	    == 0) {
 		m_freem(m_mtu);
 		/* XXX: should count statistics */
 	} else
 		sorwakeup(so);
 }
 
 #ifdef PULLDOWN_TEST
 /*
  * pull single extension header from mbuf chain.  returns single mbuf that
  * contains the result, or NULL on error.
  */
 static struct mbuf *
 ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
 {
 	struct ip6_ext ip6e;
 	size_t elen;
 	struct mbuf *n;
 
 #ifdef DIAGNOSTIC
 	switch (nxt) {
 	case IPPROTO_DSTOPTS:
 	case IPPROTO_ROUTING:
 	case IPPROTO_HOPOPTS:
 	case IPPROTO_AH: /* is it possible? */
 		break;
 	default:
 		printf("ip6_pullexthdr: invalid nxt=%d\n", nxt);
 	}
 #endif
 
 	m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
 	if (nxt == IPPROTO_AH)
 		elen = (ip6e.ip6e_len + 2) << 2;
 	else
 		elen = (ip6e.ip6e_len + 1) << 3;
 
 	if (elen > MLEN)
 		n = m_getcl(M_NOWAIT, MT_DATA, 0);
 	else
 		n = m_get(M_NOWAIT, MT_DATA);
 	if (n == NULL)
 		return NULL;
 
 	m_copydata(m, off, elen, mtod(n, caddr_t));
 	n->m_len = elen;
 	return n;
 }
 #endif
 
 /*
  * Get pointer to the previous header followed by the header
  * currently processed.
  */
 int
 ip6_get_prevhdr(const struct mbuf *m, int off)
 {
 	struct ip6_ext ip6e;
 	struct ip6_hdr *ip6;
 	int len, nlen, nxt;
 
 	if (off == sizeof(struct ip6_hdr))
 		return (offsetof(struct ip6_hdr, ip6_nxt));
 	if (off < sizeof(struct ip6_hdr))
 		panic("%s: off < sizeof(struct ip6_hdr)", __func__);
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	nxt = ip6->ip6_nxt;
 	len = sizeof(struct ip6_hdr);
 	nlen = 0;
 	while (len < off) {
 		m_copydata(m, len, sizeof(ip6e), (caddr_t)&ip6e);
 		switch (nxt) {
 		case IPPROTO_FRAGMENT:
 			nlen = sizeof(struct ip6_frag);
 			break;
 		case IPPROTO_AH:
 			nlen = (ip6e.ip6e_len + 2) << 2;
 			break;
 		default:
 			nlen = (ip6e.ip6e_len + 1) << 3;
 		}
 		len += nlen;
 		nxt = ip6e.ip6e_nxt;
 	}
 	return (len - nlen);
 }
 
 /*
  * get next header offset.  m will be retained.
  */
 int
 ip6_nexthdr(const struct mbuf *m, int off, int proto, int *nxtp)
 {
 	struct ip6_hdr ip6;
 	struct ip6_ext ip6e;
 	struct ip6_frag fh;
 
 	/* just in case */
 	if (m == NULL)
 		panic("ip6_nexthdr: m == NULL");
 	if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off)
 		return -1;
 
 	switch (proto) {
 	case IPPROTO_IPV6:
 		if (m->m_pkthdr.len < off + sizeof(ip6))
 			return -1;
 		m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6);
 		if (nxtp)
 			*nxtp = ip6.ip6_nxt;
 		off += sizeof(ip6);
 		return off;
 
 	case IPPROTO_FRAGMENT:
 		/*
 		 * terminate parsing if it is not the first fragment,
 		 * it does not make sense to parse through it.
 		 */
 		if (m->m_pkthdr.len < off + sizeof(fh))
 			return -1;
 		m_copydata(m, off, sizeof(fh), (caddr_t)&fh);
 		/* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */
 		if (fh.ip6f_offlg & IP6F_OFF_MASK)
 			return -1;
 		if (nxtp)
 			*nxtp = fh.ip6f_nxt;
 		off += sizeof(struct ip6_frag);
 		return off;
 
 	case IPPROTO_AH:
 		if (m->m_pkthdr.len < off + sizeof(ip6e))
 			return -1;
 		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
 		if (nxtp)
 			*nxtp = ip6e.ip6e_nxt;
 		off += (ip6e.ip6e_len + 2) << 2;
 		return off;
 
 	case IPPROTO_HOPOPTS:
 	case IPPROTO_ROUTING:
 	case IPPROTO_DSTOPTS:
 		if (m->m_pkthdr.len < off + sizeof(ip6e))
 			return -1;
 		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
 		if (nxtp)
 			*nxtp = ip6e.ip6e_nxt;
 		off += (ip6e.ip6e_len + 1) << 3;
 		return off;
 
 	case IPPROTO_NONE:
 	case IPPROTO_ESP:
 	case IPPROTO_IPCOMP:
 		/* give up */
 		return -1;
 
 	default:
 		return -1;
 	}
 
 	/* NOTREACHED */
 }
 
 /*
  * get offset for the last header in the chain.  m will be kept untainted.
  */
 int
 ip6_lasthdr(const struct mbuf *m, int off, int proto, int *nxtp)
 {
 	int newoff;
 	int nxt;
 
 	if (!nxtp) {
 		nxt = -1;
 		nxtp = &nxt;
 	}
 	while (1) {
 		newoff = ip6_nexthdr(m, off, proto, nxtp);
 		if (newoff < 0)
 			return off;
 		else if (newoff < off)
 			return -1;	/* invalid */
 		else if (newoff == off)
 			return newoff;
 
 		off = newoff;
 		proto = *nxtp;
 	}
 }
 
 /*
  * System control for IP6
  */
 
 u_char	inet6ctlerrmap[PRC_NCMDS] = {
 	0,		0,		0,		0,
 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
 	0,		0,		EHOSTUNREACH,	0,
 	ENOPROTOOPT,	ECONNREFUSED
 };
Index: projects/clang900-import/sys/sys/pmc.h
===================================================================
--- projects/clang900-import/sys/sys/pmc.h	(revision 352536)
+++ projects/clang900-import/sys/sys/pmc.h	(revision 352537)
@@ -1,1230 +1,1232 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2003-2008, Joseph Koshy
  * Copyright (c) 2007 The FreeBSD Foundation
  * All rights reserved.
  *
  * Portions of this software were developed by A. Joseph Koshy under
  * sponsorship from the FreeBSD Foundation and Google, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_PMC_H_
 #define	_SYS_PMC_H_
 
 #include <dev/hwpmc/pmc_events.h>
 #include <sys/proc.h>
 #include <sys/counter.h>
 #include <machine/pmc_mdep.h>
 #include <machine/profile.h>
 #ifdef _KERNEL
 #include <sys/epoch.h>
 #include <ck_queue.h>
 #endif
 
 #define	PMC_MODULE_NAME		"hwpmc"
 #define	PMC_NAME_MAX		64 /* HW counter name size */
 #define	PMC_CLASS_MAX		8  /* max #classes of PMCs per-system */
 
 /*
  * Kernel<->userland API version number [MMmmpppp]
  *
  * Major numbers are to be incremented when an incompatible change to
  * the ABI occurs that older clients will not be able to handle.
  *
  * Minor numbers are incremented when a backwards compatible change
  * occurs that allows older correct programs to run unchanged.  For
  * example, when support for a new PMC type is added.
  *
  * The patch version is incremented for every bug fix.
  */
 #define	PMC_VERSION_MAJOR	0x09
 #define	PMC_VERSION_MINOR	0x03
 #define	PMC_VERSION_PATCH	0x0000
 
 #define	PMC_VERSION		(PMC_VERSION_MAJOR << 24 |		\
 	PMC_VERSION_MINOR << 16 | PMC_VERSION_PATCH)
 
 #define PMC_CPUID_LEN 64
 /* cpu model name for pmu lookup */
 extern char pmc_cpuid[PMC_CPUID_LEN];
 
 /*
  * Kinds of CPUs known.
  *
  * We keep track of CPU variants that need to be distinguished in
  * some way for PMC operations.  CPU names are grouped by manufacturer
  * and numbered sparsely in order to minimize changes to the ABI involved
  * when new CPUs are added.
  */
 
 #define	__PMC_CPUS()						\
 	__PMC_CPU(AMD_K7,	0x00,	"AMD K7")		\
 	__PMC_CPU(AMD_K8,	0x01,	"AMD K8")		\
 	__PMC_CPU(INTEL_P5,	0x80,	"Intel Pentium")	\
 	__PMC_CPU(INTEL_P6,	0x81,	"Intel Pentium Pro")	\
 	__PMC_CPU(INTEL_CL,	0x82,	"Intel Celeron")	\
 	__PMC_CPU(INTEL_PII,	0x83,	"Intel Pentium II")	\
 	__PMC_CPU(INTEL_PIII,	0x84,	"Intel Pentium III")	\
 	__PMC_CPU(INTEL_PM,	0x85,	"Intel Pentium M")	\
 	__PMC_CPU(INTEL_PIV,	0x86,	"Intel Pentium IV")	\
 	__PMC_CPU(INTEL_CORE,	0x87,	"Intel Core Solo/Duo")	\
 	__PMC_CPU(INTEL_CORE2,	0x88,	"Intel Core2")		\
 	__PMC_CPU(INTEL_CORE2EXTREME,	0x89,	"Intel Core2 Extreme")	\
 	__PMC_CPU(INTEL_ATOM,	0x8A,	"Intel Atom")		\
 	__PMC_CPU(INTEL_COREI7, 0x8B,   "Intel Core i7")	\
 	__PMC_CPU(INTEL_WESTMERE, 0x8C,   "Intel Westmere")	\
 	__PMC_CPU(INTEL_SANDYBRIDGE, 0x8D,   "Intel Sandy Bridge")	\
 	__PMC_CPU(INTEL_IVYBRIDGE, 0x8E,   "Intel Ivy Bridge")	\
 	__PMC_CPU(INTEL_SANDYBRIDGE_XEON, 0x8F,   "Intel Sandy Bridge Xeon")	\
 	__PMC_CPU(INTEL_IVYBRIDGE_XEON, 0x90,   "Intel Ivy Bridge Xeon")	\
 	__PMC_CPU(INTEL_HASWELL, 0x91,   "Intel Haswell")	\
 	__PMC_CPU(INTEL_ATOM_SILVERMONT, 0x92,	"Intel Atom Silvermont")    \
 	__PMC_CPU(INTEL_NEHALEM_EX, 0x93,   "Intel Nehalem Xeon 7500")	\
 	__PMC_CPU(INTEL_WESTMERE_EX, 0x94,   "Intel Westmere Xeon E7")	\
 	__PMC_CPU(INTEL_HASWELL_XEON, 0x95,   "Intel Haswell Xeon E5 v3") \
 	__PMC_CPU(INTEL_BROADWELL, 0x96,   "Intel Broadwell") \
 	__PMC_CPU(INTEL_BROADWELL_XEON, 0x97,   "Intel Broadwell Xeon") \
 	__PMC_CPU(INTEL_SKYLAKE, 0x98,   "Intel Skylake")		\
 	__PMC_CPU(INTEL_SKYLAKE_XEON, 0x99,   "Intel Skylake Xeon")	\
 	__PMC_CPU(INTEL_XSCALE,	0x100,	"Intel XScale")		\
 	__PMC_CPU(MIPS_24K,     0x200,  "MIPS 24K")		\
 	__PMC_CPU(MIPS_OCTEON,  0x201,  "Cavium Octeon")	\
 	__PMC_CPU(MIPS_74K,     0x202,  "MIPS 74K")		\
+	__PMC_CPU(MIPS_BERI,	0x203,  "BERI")			\
 	__PMC_CPU(PPC_7450,     0x300,  "PowerPC MPC7450")	\
 	__PMC_CPU(PPC_E500,     0x340,  "PowerPC e500 Core")	\
 	__PMC_CPU(PPC_970,      0x380,  "IBM PowerPC 970")	\
 	__PMC_CPU(GENERIC, 	0x400,  "Generic")		\
 	__PMC_CPU(ARMV7_CORTEX_A5,	0x500,	"ARMv7 Cortex A5")	\
 	__PMC_CPU(ARMV7_CORTEX_A7,	0x501,	"ARMv7 Cortex A7")	\
 	__PMC_CPU(ARMV7_CORTEX_A8,	0x502,	"ARMv7 Cortex A8")	\
 	__PMC_CPU(ARMV7_CORTEX_A9,	0x503,	"ARMv7 Cortex A9")	\
 	__PMC_CPU(ARMV7_CORTEX_A15,	0x504,	"ARMv7 Cortex A15")	\
 	__PMC_CPU(ARMV7_CORTEX_A17,	0x505,	"ARMv7 Cortex A17")	\
 	__PMC_CPU(ARMV8_CORTEX_A53,	0x600,	"ARMv8 Cortex A53")	\
 	__PMC_CPU(ARMV8_CORTEX_A57,	0x601,	"ARMv8 Cortex A57")
 
 enum pmc_cputype {
 #undef	__PMC_CPU
 #define	__PMC_CPU(S,V,D)	PMC_CPU_##S = V,
 	__PMC_CPUS()
 };
 
 #define	PMC_CPU_FIRST	PMC_CPU_AMD_K7
 #define	PMC_CPU_LAST	PMC_CPU_GENERIC
 
 /*
  * Classes of PMCs
  */
 
 #define	__PMC_CLASSES()							\
 	__PMC_CLASS(TSC,	0x00,	"CPU Timestamp counter")	\
 	__PMC_CLASS(K7,		0x01,	"AMD K7 performance counters")	\
 	__PMC_CLASS(K8,		0x02,	"AMD K8 performance counters")	\
 	__PMC_CLASS(P5,		0x03,	"Intel Pentium counters")	\
 	__PMC_CLASS(P6,		0x04,	"Intel Pentium Pro counters")	\
 	__PMC_CLASS(P4,		0x05,	"Intel Pentium-IV counters")	\
 	__PMC_CLASS(IAF,	0x06,	"Intel Core2/Atom, fixed function") \
 	__PMC_CLASS(IAP,	0x07,	"Intel Core...Atom, programmable") \
 	__PMC_CLASS(UCF,	0x08,	"Intel Uncore fixed function")	\
 	__PMC_CLASS(UCP,	0x09,	"Intel Uncore programmable")	\
 	__PMC_CLASS(XSCALE,	0x0A,	"Intel XScale counters")	\
 	__PMC_CLASS(MIPS24K,	0x0B,	"MIPS 24K")			\
 	__PMC_CLASS(OCTEON,	0x0C,	"Cavium Octeon")		\
 	__PMC_CLASS(PPC7450,	0x0D,	"Motorola MPC7450 class")	\
 	__PMC_CLASS(PPC970,	0x0E,	"IBM PowerPC 970 class")	\
 	__PMC_CLASS(SOFT,	0x0F,	"Software events")		\
 	__PMC_CLASS(ARMV7,	0x10,	"ARMv7")			\
 	__PMC_CLASS(ARMV8,	0x11,	"ARMv8")			\
 	__PMC_CLASS(MIPS74K,	0x12,	"MIPS 74K")			\
-	__PMC_CLASS(E500,	0x13,	"Freescale e500 class")
+	__PMC_CLASS(E500,	0x13,	"Freescale e500 class")		\
+	__PMC_CLASS(BERI,	0x14,	"MIPS BERI")
 
 enum pmc_class {
 #undef  __PMC_CLASS
 #define	__PMC_CLASS(S,V,D)	PMC_CLASS_##S = V,
 	__PMC_CLASSES()
 };
 
 #define	PMC_CLASS_FIRST	PMC_CLASS_TSC
 #define	PMC_CLASS_LAST	PMC_CLASS_E500
 
 /*
  * A PMC can be in the following states:
  *
  * Hardware states:
  *   DISABLED   -- administratively prohibited from being used.
  *   FREE       -- HW available for use
  * Software states:
  *   ALLOCATED  -- allocated
  *   STOPPED    -- allocated, but not counting events
  *   RUNNING    -- allocated, and in operation; 'pm_runcount'
  *                 holds the number of CPUs using this PMC at
  *                 a given instant
  *   DELETED    -- being destroyed
  */
 
 #define	__PMC_HWSTATES()			\
 	__PMC_STATE(DISABLED)			\
 	__PMC_STATE(FREE)
 
 #define	__PMC_SWSTATES()			\
 	__PMC_STATE(ALLOCATED)			\
 	__PMC_STATE(STOPPED)			\
 	__PMC_STATE(RUNNING)			\
 	__PMC_STATE(DELETED)
 
 #define	__PMC_STATES()				\
 	__PMC_HWSTATES()			\
 	__PMC_SWSTATES()
 
 enum pmc_state {
 #undef	__PMC_STATE
 #define	__PMC_STATE(S)	PMC_STATE_##S,
 	__PMC_STATES()
 	__PMC_STATE(MAX)
 };
 
 #define	PMC_STATE_FIRST	PMC_STATE_DISABLED
 #define	PMC_STATE_LAST	PMC_STATE_DELETED
 
 /*
  * An allocated PMC may used as a 'global' counter or as a
  * 'thread-private' one.  Each such mode of use can be in either
  * statistical sampling mode or in counting mode.  Thus a PMC in use
  *
  * SS i.e., SYSTEM STATISTICAL  -- system-wide statistical profiling
  * SC i.e., SYSTEM COUNTER      -- system-wide counting mode
  * TS i.e., THREAD STATISTICAL  -- thread virtual, statistical profiling
  * TC i.e., THREAD COUNTER      -- thread virtual, counting mode
  *
  * Statistical profiling modes rely on the PMC periodically delivering
  * a interrupt to the CPU (when the configured number of events have
  * been measured), so the PMC must have the ability to generate
  * interrupts.
  *
  * In counting modes, the PMC counts its configured events, with the
  * value of the PMC being read whenever needed by its owner process.
  *
  * The thread specific modes "virtualize" the PMCs -- the PMCs appear
  * to be thread private and count events only when the profiled thread
  * actually executes on the CPU.
  *
  * The system-wide "global" modes keep the PMCs running all the time
  * and are used to measure the behaviour of the whole system.
  */
 
 #define	__PMC_MODES()				\
 	__PMC_MODE(SS,	0)			\
 	__PMC_MODE(SC,	1)			\
 	__PMC_MODE(TS,	2)			\
 	__PMC_MODE(TC,	3)
 
 enum pmc_mode {
 #undef	__PMC_MODE
 #define	__PMC_MODE(M,N)	PMC_MODE_##M = N,
 	__PMC_MODES()
 };
 
 #define	PMC_MODE_FIRST	PMC_MODE_SS
 #define	PMC_MODE_LAST	PMC_MODE_TC
 
 #define	PMC_IS_COUNTING_MODE(mode)				\
 	((mode) == PMC_MODE_SC || (mode) == PMC_MODE_TC)
 #define	PMC_IS_SYSTEM_MODE(mode)				\
 	((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC)
 #define	PMC_IS_SAMPLING_MODE(mode)				\
 	((mode) == PMC_MODE_SS || (mode) == PMC_MODE_TS)
 #define	PMC_IS_VIRTUAL_MODE(mode)				\
 	((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC)
 
 /*
  * PMC row disposition
  */
 
 #define	__PMC_DISPOSITIONS(N)					\
 	__PMC_DISP(STANDALONE)	/* global/disabled counters */	\
 	__PMC_DISP(FREE)	/* free/available */		\
 	__PMC_DISP(THREAD)	/* thread-virtual PMCs */	\
 	__PMC_DISP(UNKNOWN)	/* sentinel */
 
 enum pmc_disp {
 #undef	__PMC_DISP
 #define	__PMC_DISP(D)	PMC_DISP_##D ,
 	__PMC_DISPOSITIONS()
 };
 
 #define	PMC_DISP_FIRST	PMC_DISP_STANDALONE
 #define	PMC_DISP_LAST	PMC_DISP_THREAD
 
 /*
  * Counter capabilities
  *
  * __PMC_CAPS(NAME, VALUE, DESCRIPTION)
  */
 
 #define	__PMC_CAPS()							\
 	__PMC_CAP(INTERRUPT,	0, "generate interrupts")		\
 	__PMC_CAP(USER,		1, "count user-mode events")		\
 	__PMC_CAP(SYSTEM,	2, "count system-mode events")		\
 	__PMC_CAP(EDGE,		3, "do edge detection of events")	\
 	__PMC_CAP(THRESHOLD,	4, "ignore events below a threshold")	\
 	__PMC_CAP(READ,		5, "read PMC counter")			\
 	__PMC_CAP(WRITE,	6, "reprogram PMC counter")		\
 	__PMC_CAP(INVERT,	7, "invert comparison sense")		\
 	__PMC_CAP(QUALIFIER,	8, "further qualify monitored events")	\
 	__PMC_CAP(PRECISE,	9, "perform precise sampling")		\
 	__PMC_CAP(TAGGING,	10, "tag upstream events")		\
 	__PMC_CAP(CASCADE,	11, "cascade counters")
 
 enum pmc_caps
 {
 #undef	__PMC_CAP
 #define	__PMC_CAP(NAME, VALUE, DESCR)	PMC_CAP_##NAME = (1 << VALUE) ,
 	__PMC_CAPS()
 };
 
 #define	PMC_CAP_FIRST		PMC_CAP_INTERRUPT
 #define	PMC_CAP_LAST		PMC_CAP_CASCADE
 
 /*
  * PMC Event Numbers
  *
  * These are generated from the definitions in "dev/hwpmc/pmc_events.h".
  */
 
 enum pmc_event {
 #undef	__PMC_EV
 #undef	__PMC_EV_BLOCK
 #define	__PMC_EV_BLOCK(C,V)	PMC_EV_ ## C ## __BLOCK_START = (V) - 1 ,
 #define	__PMC_EV(C,N)		PMC_EV_ ## C ## _ ## N ,
 	__PMC_EVENTS()
 };
 
 /*
  * PMC SYSCALL INTERFACE
  */
 
 /*
  * "PMC_OPS" -- these are the commands recognized by the kernel
  * module, and are used when performing a system call from userland.
  */
 #define	__PMC_OPS()							\
 	__PMC_OP(CONFIGURELOG, "Set log file")				\
 	__PMC_OP(FLUSHLOG, "Flush log file")				\
 	__PMC_OP(GETCPUINFO, "Get system CPU information")		\
 	__PMC_OP(GETDRIVERSTATS, "Get driver statistics")		\
 	__PMC_OP(GETMODULEVERSION, "Get module version")		\
 	__PMC_OP(GETPMCINFO, "Get per-cpu PMC information")		\
 	__PMC_OP(PMCADMIN, "Set PMC state")				\
 	__PMC_OP(PMCALLOCATE, "Allocate and configure a PMC")		\
 	__PMC_OP(PMCATTACH, "Attach a PMC to a process")		\
 	__PMC_OP(PMCDETACH, "Detach a PMC from a process")		\
 	__PMC_OP(PMCGETMSR, "Get a PMC's hardware address")		\
 	__PMC_OP(PMCRELEASE, "Release a PMC")				\
 	__PMC_OP(PMCRW, "Read/Set a PMC")				\
 	__PMC_OP(PMCSETCOUNT, "Set initial count/sampling rate")	\
 	__PMC_OP(PMCSTART, "Start a PMC")				\
 	__PMC_OP(PMCSTOP, "Stop a PMC")					\
 	__PMC_OP(WRITELOG, "Write a cookie to the log file")		\
 	__PMC_OP(CLOSELOG, "Close log file")				\
 	__PMC_OP(GETDYNEVENTINFO, "Get dynamic events list")
 
 
 enum pmc_ops {
 #undef	__PMC_OP
 #define	__PMC_OP(N, D)	PMC_OP_##N,
 	__PMC_OPS()
 };
 
 
 /*
  * Flags used in operations on PMCs.
  */
 
 #define	PMC_F_UNUSED1		0x00000001 /* unused */
 #define	PMC_F_DESCENDANTS	0x00000002 /*OP ALLOCATE track descendants */
 #define	PMC_F_LOG_PROCCSW	0x00000004 /*OP ALLOCATE track ctx switches */
 #define	PMC_F_LOG_PROCEXIT	0x00000008 /*OP ALLOCATE log proc exits */
 #define	PMC_F_NEWVALUE		0x00000010 /*OP RW write new value */
 #define	PMC_F_OLDVALUE		0x00000020 /*OP RW get old value */
 
 /* V2 API */
 #define	PMC_F_CALLCHAIN		0x00000080 /*OP ALLOCATE capture callchains */
 #define	PMC_F_USERCALLCHAIN	0x00000100 /*OP ALLOCATE use userspace stack */
 
 /* internal flags */
 #define	PMC_F_ATTACHED_TO_OWNER	0x00010000 /*attached to owner*/
 #define	PMC_F_NEEDS_LOGFILE	0x00020000 /*needs log file */
 #define	PMC_F_ATTACH_DONE	0x00040000 /*attached at least once */
 
 #define	PMC_CALLCHAIN_DEPTH_MAX	512
 
 #define	PMC_CC_F_USERSPACE	0x01	   /*userspace callchain*/
 
 /*
  * Cookies used to denote allocated PMCs, and the values of PMCs.
  */
 
 typedef uint32_t	pmc_id_t;
 typedef uint64_t	pmc_value_t;
 
 #define	PMC_ID_INVALID		(~ (pmc_id_t) 0)
 
 /*
  * PMC IDs have the following format:
  *
  * +-----------------------+-------+-----------+
  * |   CPU      | PMC MODE | CLASS | ROW INDEX |
  * +-----------------------+-------+-----------+
  *
  * where CPU is 12 bits, MODE 8, CLASS 4, and ROW INDEX 8  Field 'CPU'
  * is set to the requested CPU for system-wide PMCs or PMC_CPU_ANY for
  * process-mode PMCs.  Field 'PMC MODE' is the allocated PMC mode.
  * Field 'PMC CLASS' is the class of the PMC.  Field 'ROW INDEX' is the
  * row index for the PMC.
  *
  * The 'ROW INDEX' ranges over 0..NWPMCS where NHWPMCS is the total
  * number of hardware PMCs on this cpu.
  */
 
 
 #define	PMC_ID_TO_ROWINDEX(ID)	((ID) & 0xFF)
 #define	PMC_ID_TO_CLASS(ID)	(((ID) & 0xF00) >> 8)
 #define	PMC_ID_TO_MODE(ID)	(((ID) & 0xFF000) >> 12)
 #define	PMC_ID_TO_CPU(ID)	(((ID) & 0xFFF00000) >> 20)
 #define	PMC_ID_MAKE_ID(CPU,MODE,CLASS,ROWINDEX)			\
 	((((CPU) & 0xFFF) << 20) | (((MODE) & 0xFF) << 12) |	\
 	(((CLASS) & 0xF) << 8) | ((ROWINDEX) & 0xFF))
 
 /*
  * Data structures for system calls supported by the pmc driver.
  */
 
 /*
  * OP PMCALLOCATE
  *
  * Allocate a PMC on the named CPU.
  */
 
 #define	PMC_CPU_ANY	~0
 
 struct pmc_op_pmcallocate {
 	uint32_t	pm_caps;	/* PMC_CAP_* */
 	uint32_t	pm_cpu;		/* CPU number or PMC_CPU_ANY */
 	enum pmc_class	pm_class;	/* class of PMC desired */
 	enum pmc_event	pm_ev;		/* [enum pmc_event] desired */
 	uint32_t	pm_flags;	/* additional modifiers PMC_F_* */
 	enum pmc_mode	pm_mode;	/* desired mode */
 	pmc_id_t	pm_pmcid;	/* [return] process pmc id */
 	pmc_value_t	pm_count;	/* initial/sample count */
 
 	union pmc_md_op_pmcallocate pm_md; /* MD layer extensions */
 };
 
 /*
  * OP PMCADMIN
  *
  * Set the administrative state (i.e., whether enabled or disabled) of
  * a PMC 'pm_pmc' on CPU 'pm_cpu'.  Note that 'pm_pmc' specifies an
  * absolute PMC number and need not have been first allocated by the
  * calling process.
  */
 
 struct pmc_op_pmcadmin {
 	int		pm_cpu;		/* CPU# */
 	uint32_t	pm_flags;	/* flags */
 	int		pm_pmc;         /* PMC# */
 	enum pmc_state  pm_state;	/* desired state */
 };
 
 /*
  * OP PMCATTACH / OP PMCDETACH
  *
  * Attach/detach a PMC and a process.
  */
 
 struct pmc_op_pmcattach {
 	pmc_id_t	pm_pmc;		/* PMC to attach to */
 	pid_t		pm_pid;		/* target process */
 };
 
 /*
  * OP PMCSETCOUNT
  *
  * Set the sampling rate (i.e., the reload count) for statistical counters.
  * 'pm_pmcid' need to have been previously allocated using PMCALLOCATE.
  */
 
 struct pmc_op_pmcsetcount {
 	pmc_value_t	pm_count;	/* initial/sample count */
 	pmc_id_t	pm_pmcid;	/* PMC id to set */
 };
 
 
 /*
  * OP PMCRW
  *
  * Read the value of a PMC named by 'pm_pmcid'.  'pm_pmcid' needs
  * to have been previously allocated using PMCALLOCATE.
  */
 
 
 struct pmc_op_pmcrw {
 	uint32_t	pm_flags;	/* PMC_F_{OLD,NEW}VALUE*/
 	pmc_id_t	pm_pmcid;	/* pmc id */
 	pmc_value_t	pm_value;	/* new&returned value */
 };
 
 
 /*
  * OP GETPMCINFO
  *
  * retrieve PMC state for a named CPU.  The caller is expected to
  * allocate 'npmc' * 'struct pmc_info' bytes of space for the return
  * values.
  */
 
 struct pmc_info {
 	char		pm_name[PMC_NAME_MAX]; /* pmc name */
 	enum pmc_class	pm_class;	/* enum pmc_class */
 	int		pm_enabled;	/* whether enabled */
 	enum pmc_disp	pm_rowdisp;	/* FREE, THREAD or STANDLONE */
 	pid_t		pm_ownerpid;	/* owner, or -1 */
 	enum pmc_mode	pm_mode;	/* current mode [enum pmc_mode] */
 	enum pmc_event	pm_event;	/* current event */
 	uint32_t	pm_flags;	/* current flags */
 	pmc_value_t	pm_reloadcount;	/* sampling counters only */
 };
 
 struct pmc_op_getpmcinfo {
 	int32_t		pm_cpu;		/* 0 <= cpu < mp_maxid */
 	struct pmc_info	pm_pmcs[];	/* space for 'npmc' structures */
 };
 
 
 /*
  * OP GETCPUINFO
  *
  * Retrieve system CPU information.
  */
 
 
 struct pmc_classinfo {
 	enum pmc_class	pm_class;	/* class id */
 	uint32_t	pm_caps;	/* counter capabilities */
 	uint32_t	pm_width;	/* width of the PMC */
 	uint32_t	pm_num;		/* number of PMCs in class */
 };
 
 struct pmc_op_getcpuinfo {
 	enum pmc_cputype pm_cputype; /* what kind of CPU */
 	uint32_t	pm_ncpu;    /* max CPU number */
 	uint32_t	pm_npmc;    /* #PMCs per CPU */
 	uint32_t	pm_nclass;  /* #classes of PMCs */
 	struct pmc_classinfo  pm_classes[PMC_CLASS_MAX];
 };
 
 /*
  * OP CONFIGURELOG
  *
  * Configure a log file for writing system-wide statistics to.
  */
 
 struct pmc_op_configurelog {
 	int		pm_flags;
 	int		pm_logfd;   /* logfile fd (or -1) */
 };
 
 /*
  * OP GETDRIVERSTATS
  *
  * Retrieve pmc(4) driver-wide statistics.
  */
 #ifdef _KERNEL
 struct pmc_driverstats {
 	counter_u64_t	pm_intr_ignored;	/* #interrupts ignored */
 	counter_u64_t	pm_intr_processed;	/* #interrupts processed */
 	counter_u64_t	pm_intr_bufferfull;	/* #interrupts with ENOSPC */
 	counter_u64_t	pm_syscalls;		/* #syscalls */
 	counter_u64_t	pm_syscall_errors;	/* #syscalls with errors */
 	counter_u64_t	pm_buffer_requests;	/* #buffer requests */
 	counter_u64_t	pm_buffer_requests_failed; /* #failed buffer requests */
 	counter_u64_t	pm_log_sweeps;		/* #sample buffer processing
 						   passes */
 	counter_u64_t	pm_merges;		/* merged k+u */
 	counter_u64_t	pm_overwrites;		/* UR overwrites */
 };
 #endif
 
 struct pmc_op_getdriverstats {
 	unsigned int	pm_intr_ignored;	/* #interrupts ignored */
 	unsigned int	pm_intr_processed;	/* #interrupts processed */
 	unsigned int	pm_intr_bufferfull;	/* #interrupts with ENOSPC */
 	unsigned int	pm_syscalls;		/* #syscalls */
 	unsigned int	pm_syscall_errors;	/* #syscalls with errors */
 	unsigned int	pm_buffer_requests;	/* #buffer requests */
 	unsigned int	pm_buffer_requests_failed; /* #failed buffer requests */
 	unsigned int	pm_log_sweeps;		/* #sample buffer processing
 						   passes */
 };
 
 /*
  * OP RELEASE / OP START / OP STOP
  *
  * Simple operations on a PMC id.
  */
 
 struct pmc_op_simple {
 	pmc_id_t	pm_pmcid;
 };
 
 /*
  * OP WRITELOG
  *
  * Flush the current log buffer and write 4 bytes of user data to it.
  */
 
 struct pmc_op_writelog {
 	uint32_t	pm_userdata;
 };
 
 /*
  * OP GETMSR
  *
  * Retrieve the machine specific address associated with the allocated
  * PMC.  This number can be used subsequently with a read-performance-counter
  * instruction.
  */
 
 struct pmc_op_getmsr {
 	uint32_t	pm_msr;		/* machine specific address */
 	pmc_id_t	pm_pmcid;	/* allocated pmc id */
 };
 
 /*
  * OP GETDYNEVENTINFO
  *
  * Retrieve a PMC dynamic class events list.
  */
 
 struct pmc_dyn_event_descr {
 	char		pm_ev_name[PMC_NAME_MAX];
 	enum pmc_event	pm_ev_code;
 };
 
 struct pmc_op_getdyneventinfo {
 	enum pmc_class			pm_class;
 	unsigned int			pm_nevent;
 	struct pmc_dyn_event_descr	pm_events[PMC_EV_DYN_COUNT];
 };
 
 #ifdef _KERNEL
 
 #include <sys/malloc.h>
 #include <sys/sysctl.h>
 #include <sys/_cpuset.h>
 
 #include <machine/frame.h>
 
 #define	PMC_HASH_SIZE				1024
 #define	PMC_MTXPOOL_SIZE			2048
 #define	PMC_LOG_BUFFER_SIZE			256
 #define	PMC_NLOGBUFFERS_PCPU			32
 #define	PMC_NSAMPLES				256
 #define	PMC_CALLCHAIN_DEPTH			128
 #define	PMC_THREADLIST_MAX			128
 
 #define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "."
 
 /*
  * Locking keys
  *
  * (b) - pmc_bufferlist_mtx (spin lock)
  * (k) - pmc_kthread_mtx (sleep lock)
  * (o) - po->po_mtx (spin lock)
  * (g) - global_epoch_preempt (epoch)
  * (p) - pmc_sx (sx)
  */
 
 /*
  * PMC commands
  */
 
 struct pmc_syscall_args {
 	register_t	pmop_code;	/* one of PMC_OP_* */
 	void		*pmop_data;	/* syscall parameter */
 };
 
 /*
  * Interface to processor specific s1tuff
  */
 
 /*
  * struct pmc_descr
  *
  * Machine independent (i.e., the common parts) of a human readable
  * PMC description.
  */
 
 struct pmc_descr {
 	char		pd_name[PMC_NAME_MAX]; /* name */
 	uint32_t	pd_caps;	/* capabilities */
 	enum pmc_class	pd_class;	/* class of the PMC */
 	uint32_t	pd_width;	/* width in bits */
 };
 
 /*
  * struct pmc_target
  *
  * This structure records all the target processes associated with a
  * PMC.
  */
 
 struct pmc_target {
 	LIST_ENTRY(pmc_target)	pt_next;
 	struct pmc_process	*pt_process; /* target descriptor */
 };
 
 /*
  * struct pmc
  *
  * Describes each allocated PMC.
  *
  * Each PMC has precisely one owner, namely the process that allocated
  * the PMC.
  *
  * A PMC may be attached to multiple target processes.  The
  * 'pm_targets' field links all the target processes being monitored
  * by this PMC.
  *
  * The 'pm_savedvalue' field is protected by a mutex.
  *
  * On a multi-cpu machine, multiple target threads associated with a
  * process-virtual PMC could be concurrently executing on different
  * CPUs.  The 'pm_runcount' field is atomically incremented every time
  * the PMC gets scheduled on a CPU and atomically decremented when it
  * get descheduled.  Deletion of a PMC is only permitted when this
  * field is '0'.
  *
  */
 struct pmc_pcpu_state {
 	uint8_t pps_stalled;
 	uint8_t pps_cpustate;
 } __aligned(CACHE_LINE_SIZE);
 struct pmc {
 	LIST_HEAD(,pmc_target)	pm_targets;	/* list of target processes */
 	LIST_ENTRY(pmc)		pm_next;	/* owner's list */
 
 	/*
 	 * System-wide PMCs are allocated on a CPU and are not moved
 	 * around.  For system-wide PMCs we record the CPU the PMC was
 	 * allocated on in the 'CPU' field of the pmc ID.
 	 *
 	 * Virtual PMCs run on whichever CPU is currently executing
 	 * their targets' threads.  For these PMCs we need to save
 	 * their current PMC counter values when they are taken off
 	 * CPU.
 	 */
 
 	union {
 		pmc_value_t	pm_savedvalue;	/* Virtual PMCS */
 	} pm_gv;
 
 	/*
 	 * For sampling mode PMCs, we keep track of the PMC's "reload
 	 * count", which is the counter value to be loaded in when
 	 * arming the PMC for the next counting session.  For counting
 	 * modes on PMCs that are read-only (e.g., the x86 TSC), we
 	 * keep track of the initial value at the start of
 	 * counting-mode operation.
 	 */
 
 	union {
 		pmc_value_t	pm_reloadcount;	/* sampling PMC modes */
 		pmc_value_t	pm_initial;	/* counting PMC modes */
 	} pm_sc;
 
 	struct pmc_pcpu_state *pm_pcpu_state;
 	volatile cpuset_t pm_cpustate;	/* CPUs where PMC should be active */
 	uint32_t	pm_caps;	/* PMC capabilities */
 	enum pmc_event	pm_event;	/* event being measured */
 	uint32_t	pm_flags;	/* additional flags PMC_F_... */
 	struct pmc_owner *pm_owner;	/* owner thread state */
 	counter_u64_t		pm_runcount;	/* #cpus currently on */
 	enum pmc_state	pm_state;	/* current PMC state */
 	uint32_t	pm_overflowcnt;	/* count overflow interrupts */
 
 	/*
 	 * The PMC ID field encodes the row-index for the PMC, its
 	 * mode, class and the CPU# associated with the PMC.
 	 */
 
 	pmc_id_t	pm_id;		/* allocated PMC id */
 	enum pmc_class pm_class;
 
 	/* md extensions */
 	union pmc_md_pmc	pm_md;
 };
 
 /*
  * Accessor macros for 'struct pmc'
  */
 
 #define	PMC_TO_MODE(P)		PMC_ID_TO_MODE((P)->pm_id)
 #define	PMC_TO_CLASS(P)		PMC_ID_TO_CLASS((P)->pm_id)
 #define	PMC_TO_ROWINDEX(P)	PMC_ID_TO_ROWINDEX((P)->pm_id)
 #define	PMC_TO_CPU(P)		PMC_ID_TO_CPU((P)->pm_id)
 
 /*
  * struct pmc_threadpmcstate
  *
  * Record per-PMC, per-thread state.
  */
 struct pmc_threadpmcstate {
 	pmc_value_t	pt_pmcval;	/* per-thread reload count */
 };
 
 /*
  * struct pmc_thread
  *
  * Record a 'target' thread being profiled.
  */
 struct pmc_thread {
 	LIST_ENTRY(pmc_thread) pt_next;		/* linked list */
 	struct thread	*pt_td;			/* target thread */
 	struct pmc_threadpmcstate pt_pmcs[];	/* per-PMC state */
 };
 
 /*
  * struct pmc_process
  *
  * Record a 'target' process being profiled.
  *
  * The target process being profiled could be different from the owner
  * process which allocated the PMCs.  Each target process descriptor
  * is associated with NHWPMC 'struct pmc *' pointers.  Each PMC at a
  * given hardware row-index 'n' will use slot 'n' of the 'pp_pmcs[]'
  * array.  The size of this structure is thus PMC architecture
  * dependent.
  *
  */
 
 struct pmc_targetstate {
 	struct pmc	*pp_pmc;   /* target PMC */
 	pmc_value_t	pp_pmcval; /* per-process value */
 };
 
 struct pmc_process {
 	LIST_ENTRY(pmc_process) pp_next;	/* hash chain */
 	LIST_HEAD(,pmc_thread) pp_tds;		/* list of threads */
 	struct mtx	*pp_tdslock;		/* lock on pp_tds thread list */
 	int		pp_refcnt;		/* reference count */
 	uint32_t	pp_flags;		/* flags PMC_PP_* */
 	struct proc	*pp_proc;		/* target process */
 	struct pmc_targetstate pp_pmcs[];       /* NHWPMCs */
 };
 
 #define	PMC_PP_ENABLE_MSR_ACCESS	0x00000001
 
 /*
  * struct pmc_owner
  *
  * We associate a PMC with an 'owner' process.
  *
  * A process can be associated with 0..NCPUS*NHWPMC PMCs during its
  * lifetime, where NCPUS is the numbers of CPUS in the system and
  * NHWPMC is the number of hardware PMCs per CPU.  These are
  * maintained in the list headed by the 'po_pmcs' to save on space.
  *
  */
 
 struct pmc_owner  {
 	LIST_ENTRY(pmc_owner)	po_next;	/* hash chain */
 	CK_LIST_ENTRY(pmc_owner)	po_ssnext;	/* (g/p) list of SS PMC owners */
 	LIST_HEAD(, pmc)	po_pmcs;	/* owned PMC list */
 	TAILQ_HEAD(, pmclog_buffer) po_logbuffers; /* (o) logbuffer list */
 	struct mtx		po_mtx;		/* spin lock for (o) */
 	struct proc		*po_owner;	/* owner proc */
 	uint32_t		po_flags;	/* (k) flags PMC_PO_* */
 	struct proc		*po_kthread;	/* (k) helper kthread */
 	struct file		*po_file;	/* file reference */
 	int			po_error;	/* recorded error */
 	short			po_sscount;	/* # SS PMCs owned */
 	short			po_logprocmaps;	/* global mappings done */
 	struct pmclog_buffer	*po_curbuf[MAXCPU];	/* current log buffer */
 };
 
 #define	PMC_PO_OWNS_LOGFILE		0x00000001 /* has a log file */
 #define	PMC_PO_SHUTDOWN			0x00000010 /* in the process of shutdown */
 #define	PMC_PO_INITIAL_MAPPINGS_DONE	0x00000020
 
 /*
  * struct pmc_hw -- describe the state of the PMC hardware
  *
  * When in use, a HW PMC is associated with one allocated 'struct pmc'
  * pointed to by field 'phw_pmc'.  When inactive, this field is NULL.
  *
  * On an SMP box, one or more HW PMC's in process virtual mode with
  * the same 'phw_pmc' could be executing on different CPUs.  In order
  * to handle this case correctly, we need to ensure that only
  * incremental counts get added to the saved value in the associated
  * 'struct pmc'.  The 'phw_save' field is used to keep the saved PMC
  * value at the time the hardware is started during this context
  * switch (i.e., the difference between the new (hardware) count and
  * the saved count is atomically added to the count field in 'struct
  * pmc' at context switch time).
  *
  */
 
 struct pmc_hw {
 	uint32_t	phw_state;	/* see PHW_* macros below */
 	struct pmc	*phw_pmc;	/* current thread PMC */
 };
 
 #define	PMC_PHW_RI_MASK		0x000000FF
 #define	PMC_PHW_CPU_SHIFT	8
 #define	PMC_PHW_CPU_MASK	0x0000FF00
 #define	PMC_PHW_FLAGS_SHIFT	16
 #define	PMC_PHW_FLAGS_MASK	0xFFFF0000
 
 #define	PMC_PHW_INDEX_TO_STATE(ri)	((ri) & PMC_PHW_RI_MASK)
 #define	PMC_PHW_STATE_TO_INDEX(state)	((state) & PMC_PHW_RI_MASK)
 #define	PMC_PHW_CPU_TO_STATE(cpu)	(((cpu) << PMC_PHW_CPU_SHIFT) & \
 	PMC_PHW_CPU_MASK)
 #define	PMC_PHW_STATE_TO_CPU(state)	(((state) & PMC_PHW_CPU_MASK) >> \
 	PMC_PHW_CPU_SHIFT)
 #define	PMC_PHW_FLAGS_TO_STATE(flags)	(((flags) << PMC_PHW_FLAGS_SHIFT) & \
 	PMC_PHW_FLAGS_MASK)
 #define	PMC_PHW_STATE_TO_FLAGS(state)	(((state) & PMC_PHW_FLAGS_MASK) >> \
 	PMC_PHW_FLAGS_SHIFT)
 #define	PMC_PHW_FLAG_IS_ENABLED		(PMC_PHW_FLAGS_TO_STATE(0x01))
 #define	PMC_PHW_FLAG_IS_SHAREABLE	(PMC_PHW_FLAGS_TO_STATE(0x02))
 
 /*
  * struct pmc_sample
  *
  * Space for N (tunable) PC samples and associated control data.
  */
 
 struct pmc_sample {
 	uint16_t		ps_nsamples;	/* callchain depth */
 	uint16_t		ps_nsamples_actual;
 	uint16_t		ps_cpu;		/* cpu number */
 	uint16_t		ps_flags;	/* other flags */
 	lwpid_t			ps_tid;		/* thread id */
 	pid_t			ps_pid;		/* process PID or -1 */
 	int		ps_ticks; /* ticks at sample time */
 	/* pad */
 	struct thread		*ps_td;		/* which thread */
 	struct pmc		*ps_pmc;	/* interrupting PMC */
 	uintptr_t		*ps_pc;		/* (const) callchain start */
 	uint64_t		ps_tsc;		/* tsc value */
 };
 
 #define 	PMC_SAMPLE_FREE		((uint16_t) 0)
 #define 	PMC_USER_CALLCHAIN_PENDING	((uint16_t) 0xFFFF)
 
 struct pmc_samplebuffer {
 	volatile uint64_t		ps_prodidx; /* producer index */
 	volatile uint64_t		ps_considx; /* consumer index */
 	uintptr_t		*ps_callchains;	/* all saved call chains */
 	struct pmc_sample	ps_samples[];	/* array of sample entries */
 };
 
 #define PMC_CONS_SAMPLE(psb)					\
 	(&(psb)->ps_samples[(psb)->ps_considx & pmc_sample_mask])
 
 #define PMC_CONS_SAMPLE_OFF(psb, off)							\
 	(&(psb)->ps_samples[(off) & pmc_sample_mask])
 
 #define PMC_PROD_SAMPLE(psb)					\
 	(&(psb)->ps_samples[(psb)->ps_prodidx & pmc_sample_mask])
 
 /*
  * struct pmc_cpustate
  *
  * A CPU is modelled as a collection of HW PMCs with space for additional
  * flags.
  */
 
 struct pmc_cpu {
 	uint32_t	pc_state;	/* physical cpu number + flags */
 	struct pmc_samplebuffer *pc_sb[3]; /* space for samples */
 	struct pmc_hw	*pc_hwpmcs[];	/* 'npmc' pointers */
 };
 
 #define	PMC_PCPU_CPU_MASK		0x000000FF
 #define	PMC_PCPU_FLAGS_MASK		0xFFFFFF00
 #define	PMC_PCPU_FLAGS_SHIFT		8
 #define	PMC_PCPU_STATE_TO_CPU(S)	((S) & PMC_PCPU_CPU_MASK)
 #define	PMC_PCPU_STATE_TO_FLAGS(S)	(((S) & PMC_PCPU_FLAGS_MASK) >> PMC_PCPU_FLAGS_SHIFT)
 #define	PMC_PCPU_FLAGS_TO_STATE(F)	(((F) << PMC_PCPU_FLAGS_SHIFT) & PMC_PCPU_FLAGS_MASK)
 #define	PMC_PCPU_CPU_TO_STATE(C)	((C) & PMC_PCPU_CPU_MASK)
 #define	PMC_PCPU_FLAG_HTT		(PMC_PCPU_FLAGS_TO_STATE(0x1))
 
 /*
  * struct pmc_binding
  *
  * CPU binding information.
  */
 
 struct pmc_binding {
 	int	pb_bound;	/* is bound? */
 	int	pb_cpu;		/* if so, to which CPU */
 };
 
 
 struct pmc_mdep;
 
 /*
  * struct pmc_classdep
  *
  * PMC class-dependent operations.
  */
 struct pmc_classdep {
 	uint32_t	pcd_caps;	/* class capabilities */
 	enum pmc_class	pcd_class;	/* class id */
 	int		pcd_num;	/* number of PMCs */
 	int		pcd_ri;		/* row index of the first PMC in class */
 	int		pcd_width;	/* width of the PMC */
 
 	/* configuring/reading/writing the hardware PMCs */
 	int (*pcd_config_pmc)(int _cpu, int _ri, struct pmc *_pm);
 	int (*pcd_get_config)(int _cpu, int _ri, struct pmc **_ppm);
 	int (*pcd_read_pmc)(int _cpu, int _ri, pmc_value_t *_value);
 	int (*pcd_write_pmc)(int _cpu, int _ri, pmc_value_t _value);
 
 	/* pmc allocation/release */
 	int (*pcd_allocate_pmc)(int _cpu, int _ri, struct pmc *_t,
 		const struct pmc_op_pmcallocate *_a);
 	int (*pcd_release_pmc)(int _cpu, int _ri, struct pmc *_pm);
 
 	/* starting and stopping PMCs */
 	int (*pcd_start_pmc)(int _cpu, int _ri);
 	int (*pcd_stop_pmc)(int _cpu, int _ri);
 
 	/* description */
 	int (*pcd_describe)(int _cpu, int _ri, struct pmc_info *_pi,
 		struct pmc **_ppmc);
 
 	/* class-dependent initialization & finalization */
 	int (*pcd_pcpu_init)(struct pmc_mdep *_md, int _cpu);
 	int (*pcd_pcpu_fini)(struct pmc_mdep *_md, int _cpu);
 
 	/* machine-specific interface */
 	int (*pcd_get_msr)(int _ri, uint32_t *_msr);
 };
 
 /*
  * struct pmc_mdep
  *
  * Machine dependent bits needed per CPU type.
  */
 
 struct pmc_mdep  {
 	uint32_t	pmd_cputype;    /* from enum pmc_cputype */
 	uint32_t	pmd_npmc;	/* number of PMCs per CPU */
 	uint32_t	pmd_nclass;	/* number of PMC classes present */
 
 	/*
 	 * Machine dependent methods.
 	 */
 
 	/* per-cpu initialization and finalization */
 	int (*pmd_pcpu_init)(struct pmc_mdep *_md, int _cpu);
 	int (*pmd_pcpu_fini)(struct pmc_mdep *_md, int _cpu);
 
 	/* thread context switch in/out */
 	int (*pmd_switch_in)(struct pmc_cpu *_p, struct pmc_process *_pp);
 	int (*pmd_switch_out)(struct pmc_cpu *_p, struct pmc_process *_pp);
 
 	/* handle a PMC interrupt */
 	int (*pmd_intr)(struct trapframe *_tf);
 
 	/*
 	 * PMC class dependent information.
 	 */
 	struct pmc_classdep pmd_classdep[];
 };
 
 /*
  * Per-CPU state.  This is an array of 'mp_ncpu' pointers
  * to struct pmc_cpu descriptors.
  */
 
 extern struct pmc_cpu **pmc_pcpu;
 
 /* driver statistics */
 extern struct pmc_driverstats pmc_stats;
 
 #if	defined(HWPMC_DEBUG)
 #include <sys/ktr.h>
 
 /* debug flags, major flag groups */
 struct pmc_debugflags {
 	int	pdb_CPU;
 	int	pdb_CSW;
 	int	pdb_LOG;
 	int	pdb_MDP;
 	int	pdb_MOD;
 	int	pdb_OWN;
 	int	pdb_PMC;
 	int	pdb_PRC;
 	int	pdb_SAM;
 };
 
 extern struct pmc_debugflags pmc_debugflags;
 
 #define	KTR_PMC			KTR_SUBSYS
 
 #define	PMC_DEBUG_STRSIZE		128
 #define	PMC_DEBUG_DEFAULT_FLAGS		{ 0, 0, 0, 0, 0, 0, 0, 0, 0 }
 
 #define	PMCDBG0(M, N, L, F) do {					\
 	if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N))	\
 		CTR0(KTR_PMC, #M ":" #N ":" #L  ": " F);		\
 } while (0)
 #define	PMCDBG1(M, N, L, F, p1) do {					\
 	if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N))	\
 		CTR1(KTR_PMC, #M ":" #N ":" #L  ": " F, p1);		\
 } while (0)
 #define	PMCDBG2(M, N, L, F, p1, p2) do {				\
 	if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N))	\
 		CTR2(KTR_PMC, #M ":" #N ":" #L  ": " F, p1, p2);	\
 } while (0)
 #define	PMCDBG3(M, N, L, F, p1, p2, p3) do {				\
 	if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N))	\
 		CTR3(KTR_PMC, #M ":" #N ":" #L  ": " F, p1, p2, p3);	\
 } while (0)
 #define	PMCDBG4(M, N, L, F, p1, p2, p3, p4) do {			\
 	if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N))	\
 		CTR4(KTR_PMC, #M ":" #N ":" #L  ": " F, p1, p2, p3, p4);\
 } while (0)
 #define	PMCDBG5(M, N, L, F, p1, p2, p3, p4, p5) do {			\
 	if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N))	\
 		CTR5(KTR_PMC, #M ":" #N ":" #L  ": " F, p1, p2, p3, p4,	\
 		    p5);						\
 } while (0)
 #define	PMCDBG6(M, N, L, F, p1, p2, p3, p4, p5, p6) do {		\
 	if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N))	\
 		CTR6(KTR_PMC, #M ":" #N ":" #L  ": " F, p1, p2, p3, p4,	\
 		    p5, p6);						\
 } while (0)
 	
 /* Major numbers */
 #define	PMC_DEBUG_MAJ_CPU		0 /* cpu switches */
 #define	PMC_DEBUG_MAJ_CSW		1 /* context switches */
 #define	PMC_DEBUG_MAJ_LOG		2 /* logging */
 #define	PMC_DEBUG_MAJ_MDP		3 /* machine dependent */
 #define	PMC_DEBUG_MAJ_MOD		4 /* misc module infrastructure */
 #define	PMC_DEBUG_MAJ_OWN		5 /* owner */
 #define	PMC_DEBUG_MAJ_PMC		6 /* pmc management */
 #define	PMC_DEBUG_MAJ_PRC		7 /* processes */
 #define	PMC_DEBUG_MAJ_SAM		8 /* sampling */
 
 /* Minor numbers */
 
 /* Common (8 bits) */
 #define	PMC_DEBUG_MIN_ALL		0 /* allocation */
 #define	PMC_DEBUG_MIN_REL		1 /* release */
 #define	PMC_DEBUG_MIN_OPS		2 /* ops: start, stop, ... */
 #define	PMC_DEBUG_MIN_INI		3 /* init */
 #define	PMC_DEBUG_MIN_FND		4 /* find */
 
 /* MODULE */
 #define	PMC_DEBUG_MIN_PMH	       14 /* pmc_hook */
 #define	PMC_DEBUG_MIN_PMS	       15 /* pmc_syscall */
 
 /* OWN */
 #define	PMC_DEBUG_MIN_ORM		8 /* owner remove */
 #define	PMC_DEBUG_MIN_OMR		9 /* owner maybe remove */
 
 /* PROCESSES */
 #define	PMC_DEBUG_MIN_TLK		8 /* link target */
 #define	PMC_DEBUG_MIN_TUL		9 /* unlink target */
 #define	PMC_DEBUG_MIN_EXT	       10 /* process exit */
 #define	PMC_DEBUG_MIN_EXC	       11 /* process exec */
 #define	PMC_DEBUG_MIN_FRK	       12 /* process fork */
 #define	PMC_DEBUG_MIN_ATT	       13 /* attach/detach */
 #define	PMC_DEBUG_MIN_SIG	       14 /* signalling */
 
 /* CONTEXT SWITCHES */
 #define	PMC_DEBUG_MIN_SWI		8 /* switch in */
 #define	PMC_DEBUG_MIN_SWO		9 /* switch out */
 
 /* PMC */
 #define	PMC_DEBUG_MIN_REG		8 /* pmc register */
 #define	PMC_DEBUG_MIN_ALR		9 /* allocate row */
 
 /* MACHINE DEPENDENT LAYER */
 #define	PMC_DEBUG_MIN_REA		8 /* read */
 #define	PMC_DEBUG_MIN_WRI		9 /* write */
 #define	PMC_DEBUG_MIN_CFG	       10 /* config */
 #define	PMC_DEBUG_MIN_STA	       11 /* start */
 #define	PMC_DEBUG_MIN_STO	       12 /* stop */
 #define	PMC_DEBUG_MIN_INT	       13 /* interrupts */
 
 /* CPU */
 #define	PMC_DEBUG_MIN_BND		8 /* bind */
 #define	PMC_DEBUG_MIN_SEL		9 /* select */
 
 /* LOG */
 #define	PMC_DEBUG_MIN_GTB		8 /* get buf */
 #define	PMC_DEBUG_MIN_SIO		9 /* schedule i/o */
 #define	PMC_DEBUG_MIN_FLS	       10 /* flush */
 #define	PMC_DEBUG_MIN_SAM	       11 /* sample */
 #define	PMC_DEBUG_MIN_CLO	       12 /* close */
 
 #else
 #define	PMCDBG0(M, N, L, F)		/* nothing */
 #define	PMCDBG1(M, N, L, F, p1)
 #define	PMCDBG2(M, N, L, F, p1, p2)
 #define	PMCDBG3(M, N, L, F, p1, p2, p3)
 #define	PMCDBG4(M, N, L, F, p1, p2, p3, p4)
 #define	PMCDBG5(M, N, L, F, p1, p2, p3, p4, p5)
 #define	PMCDBG6(M, N, L, F, p1, p2, p3, p4, p5, p6)
 #endif
 
 /* declare a dedicated memory pool */
 MALLOC_DECLARE(M_PMC);
 
 /*
  * Functions
  */
 
 struct pmc_mdep *pmc_md_initialize(void);	/* MD init function */
 void	pmc_md_finalize(struct pmc_mdep *_md);	/* MD fini function */
 int	pmc_getrowdisp(int _ri);
 int	pmc_process_interrupt(int _ring, struct pmc *_pm, struct trapframe *_tf);
 int	pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples,
     struct trapframe *_tf);
 int	pmc_save_user_callchain(uintptr_t *_cc, int _maxsamples,
     struct trapframe *_tf);
 struct pmc_mdep *pmc_mdep_alloc(int nclasses);
 void pmc_mdep_free(struct pmc_mdep *md);
 uint64_t pmc_rdtsc(void);
 #endif /* _KERNEL */
 #endif /* _SYS_PMC_H_ */
Index: projects/clang900-import/sys/sys/sockio.h
===================================================================
--- projects/clang900-import/sys/sys/sockio.h	(revision 352536)
+++ projects/clang900-import/sys/sys/sockio.h	(revision 352537)
@@ -1,146 +1,148 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1990, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)sockio.h	8.1 (Berkeley) 3/28/94
  * $FreeBSD$
  */
 
 #ifndef _SYS_SOCKIO_H_
 #define	_SYS_SOCKIO_H_
 
 #include <sys/ioccom.h>
 
 /* Socket ioctl's. */
 #define	SIOCSHIWAT	 _IOW('s',  0, int)		/* set high watermark */
 #define	SIOCGHIWAT	 _IOR('s',  1, int)		/* get high watermark */
 #define	SIOCSLOWAT	 _IOW('s',  2, int)		/* set low watermark */
 #define	SIOCGLOWAT	 _IOR('s',  3, int)		/* get low watermark */
 #define	SIOCATMARK	 _IOR('s',  7, int)		/* at oob mark? */
 #define	SIOCSPGRP	 _IOW('s',  8, int)		/* set process group */
 #define	SIOCGPGRP	 _IOR('s',  9, int)		/* get process group */
 
 /*	SIOCADDRT	 _IOW('r', 10, struct ortentry)	4.3BSD */
 /*	SIOCDELRT	 _IOW('r', 11, struct ortentry)	4.3BSD */
 #define	SIOCGETVIFCNT	_IOWR('r', 15, struct sioc_vif_req)/* get vif pkt cnt */
 #define	SIOCGETSGCNT	_IOWR('r', 16, struct sioc_sg_req) /* get s,g pkt cnt */
 
 #define	SIOCSIFADDR	 _IOW('i', 12, struct ifreq)	/* set ifnet address */
 /*	OSIOCGIFADDR	_IOWR('i', 13, struct ifreq)	4.3BSD */
 #define	SIOCGIFADDR	_IOWR('i', 33, struct ifreq)	/* get ifnet address */
 #define	SIOCSIFDSTADDR	 _IOW('i', 14, struct ifreq)	/* set p-p address */
 /*	OSIOCGIFDSTADDR	_IOWR('i', 15, struct ifreq)	4.3BSD */
 #define	SIOCGIFDSTADDR	_IOWR('i', 34, struct ifreq)	/* get p-p address */
 #define	SIOCSIFFLAGS	 _IOW('i', 16, struct ifreq)	/* set ifnet flags */
 #define	SIOCGIFFLAGS	_IOWR('i', 17, struct ifreq)	/* get ifnet flags */
 /*	OSIOCGIFBRDADDR	_IOWR('i', 18, struct ifreq)	4.3BSD */
 #define	SIOCGIFBRDADDR	_IOWR('i', 35, struct ifreq)	/* get broadcast addr */
 #define	SIOCSIFBRDADDR	 _IOW('i', 19, struct ifreq)	/* set broadcast addr */
 /*	OSIOCGIFCONF	_IOWR('i', 20, struct ifconf)	4.3BSD */
 #define	SIOCGIFCONF	_IOWR('i', 36, struct ifconf)	/* get ifnet list */
 /*	OSIOCGIFNETMASK	_IOWR('i', 21, struct ifreq)	4.3BSD */
 #define	SIOCGIFNETMASK	_IOWR('i', 37, struct ifreq)	/* get net addr mask */
 #define	SIOCSIFNETMASK	 _IOW('i', 22, struct ifreq)	/* set net addr mask */
 #define	SIOCGIFMETRIC	_IOWR('i', 23, struct ifreq)	/* get IF metric */
 #define	SIOCSIFMETRIC	 _IOW('i', 24, struct ifreq)	/* set IF metric */
 #define	SIOCDIFADDR	 _IOW('i', 25, struct ifreq)	/* delete IF addr */
 #define	OSIOCAIFADDR	 _IOW('i', 26, struct oifaliasreq) /* FreeBSD 9.x */
 /*	SIOCALIFADDR	 _IOW('i', 27, struct if_laddrreq) KAME */
 /*	SIOCGLIFADDR	_IOWR('i', 28, struct if_laddrreq) KAME */
 /*	SIOCDLIFADDR	 _IOW('i', 29, struct if_laddrreq) KAME */
 #define	SIOCSIFCAP	 _IOW('i', 30, struct ifreq)	/* set IF features */
 #define	SIOCGIFCAP	_IOWR('i', 31, struct ifreq)	/* get IF features */
 #define	SIOCGIFINDEX	_IOWR('i', 32, struct ifreq)	/* get IF index */
 #define	SIOCGIFMAC	_IOWR('i', 38, struct ifreq)	/* get IF MAC label */
 #define	SIOCSIFMAC	 _IOW('i', 39, struct ifreq)	/* set IF MAC label */
 #define	SIOCSIFNAME	 _IOW('i', 40, struct ifreq)	/* set IF name */
 #define	SIOCSIFDESCR	 _IOW('i', 41, struct ifreq)	/* set ifnet descr */ 
 #define	SIOCGIFDESCR	_IOWR('i', 42, struct ifreq)	/* get ifnet descr */ 
 #define	SIOCAIFADDR	 _IOW('i', 43, struct ifaliasreq)/* add/chg IF alias */
 
 #define	SIOCADDMULTI	 _IOW('i', 49, struct ifreq)	/* add m'cast addr */
 #define	SIOCDELMULTI	 _IOW('i', 50, struct ifreq)	/* del m'cast addr */
 #define	SIOCGIFMTU	_IOWR('i', 51, struct ifreq)	/* get IF mtu */
 #define	SIOCSIFMTU	 _IOW('i', 52, struct ifreq)	/* set IF mtu */
 #define	SIOCGIFPHYS	_IOWR('i', 53, struct ifreq)	/* get IF wire */
 #define	SIOCSIFPHYS	 _IOW('i', 54, struct ifreq)	/* set IF wire */
 #define	SIOCSIFMEDIA	_IOWR('i', 55, struct ifreq)	/* set net media */
 #define	SIOCGIFMEDIA	_IOWR('i', 56, struct ifmediareq) /* get net media */
 
 #define	SIOCSIFGENERIC	 _IOW('i', 57, struct ifreq)	/* generic IF set op */
 #define	SIOCGIFGENERIC	_IOWR('i', 58, struct ifreq)	/* generic IF get op */
 
 #define	SIOCGIFSTATUS	_IOWR('i', 59, struct ifstat)	/* get IF status */
 #define	SIOCSIFLLADDR	 _IOW('i', 60, struct ifreq)	/* set linklevel addr */
 #define	SIOCGI2C	_IOWR('i', 61, struct ifreq)	/* get I2C data  */
 #define	SIOCGHWADDR	_IOWR('i', 62, struct ifreq)	/* get hardware lladdr */
 
 #define	SIOCSIFPHYADDR	 _IOW('i', 70, struct ifaliasreq) /* set gif address */
 #define	SIOCGIFPSRCADDR	_IOWR('i', 71, struct ifreq)	/* get gif psrc addr */
 #define	SIOCGIFPDSTADDR	_IOWR('i', 72, struct ifreq)	/* get gif pdst addr */
 #define	SIOCDIFPHYADDR	 _IOW('i', 73, struct ifreq)	/* delete gif addrs */
 /*	SIOCSLIFPHYADDR	 _IOW('i', 74, struct if_laddrreq) KAME */
 /*	SIOCGLIFPHYADDR	_IOWR('i', 75, struct if_laddrreq) KAME */
 
 #define	SIOCGPRIVATE_0	_IOWR('i', 80, struct ifreq)	/* device private 0 */
 #define	SIOCGPRIVATE_1	_IOWR('i', 81, struct ifreq)	/* device private 1 */
 
 #define	SIOCSIFVNET	_IOWR('i', 90, struct ifreq)	/* move IF jail/vnet */
 #define	SIOCSIFRVNET	_IOWR('i', 91, struct ifreq)	/* reclaim vnet IF */
 
 #define	SIOCGIFFIB	_IOWR('i', 92, struct ifreq)	/* get IF fib */
 #define	SIOCSIFFIB	 _IOW('i', 93, struct ifreq)	/* set IF fib */
 
 #define	SIOCGTUNFIB	_IOWR('i', 94, struct ifreq)	/* get tunnel fib */
 #define	SIOCSTUNFIB	 _IOW('i', 95, struct ifreq)	/* set tunnel fib */
 
 #define	SIOCSDRVSPEC	_IOW('i', 123, struct ifdrv)	/* set driver-specific
 								  parameters */
 #define	SIOCGDRVSPEC	_IOWR('i', 123, struct ifdrv)	/* get driver-specific
 								  parameters */
 
 #define	SIOCIFCREATE	_IOWR('i', 122, struct ifreq)	/* create clone if */
 #define	SIOCIFCREATE2	_IOWR('i', 124, struct ifreq)	/* create clone if */
 #define	SIOCIFDESTROY	 _IOW('i', 121, struct ifreq)	/* destroy clone if */
 #define	SIOCIFGCLONERS	_IOWR('i', 120, struct if_clonereq) /* get cloners */
 
 #define	SIOCAIFGROUP	 _IOW('i', 135, struct ifgroupreq) /* add an ifgroup */
 #define	SIOCGIFGROUP	_IOWR('i', 136, struct ifgroupreq) /* get ifgroups */
 #define	SIOCDIFGROUP	 _IOW('i', 137, struct ifgroupreq) /* delete ifgroup */
 #define	SIOCGIFGMEMB	_IOWR('i', 138, struct ifgroupreq) /* get members */
 #define	SIOCGIFXMEDIA	_IOWR('i', 139, struct ifmediareq) /* get net xmedia */
 
 #define	SIOCGIFRSSKEY	_IOWR('i', 150, struct ifrsskey)/* get RSS key */
 #define	SIOCGIFRSSHASH	_IOWR('i', 151, struct ifrsshash)/* get the current RSS
 							type/func settings */
 
 #define	SIOCGLANPCP	_IOWR('i', 152, struct ifreq)	/* Get (V)LAN PCP */
 #define	SIOCSLANPCP	 _IOW('i', 153, struct ifreq)	/* Set (V)LAN PCP */
 
+#define	SIOCGIFDOWNREASON	_IOWR('i', 154, struct ifdownreason)
+
 #endif /* !_SYS_SOCKIO_H_ */
Index: projects/clang900-import/sys/sys/sysctl.h
===================================================================
--- projects/clang900-import/sys/sys/sysctl.h	(revision 352536)
+++ projects/clang900-import/sys/sys/sysctl.h	(revision 352537)
@@ -1,1148 +1,1159 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Mike Karels at Berkeley Software Design, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)sysctl.h	8.1 (Berkeley) 6/2/93
  * $FreeBSD$
  */
 
 #ifndef _SYS_SYSCTL_H_
 #define	_SYS_SYSCTL_H_
 
 #ifdef _KERNEL
 #include <sys/queue.h>
 #endif
 
 struct thread;
 /*
  * Definitions for sysctl call.  The sysctl call uses a hierarchical name
  * for objects that can be examined or modified.  The name is expressed as
  * a sequence of integers.  Like a file path name, the meaning of each
  * component depends on its place in the hierarchy.  The top-level and kern
  * identifiers are defined here, and other identifiers are defined in the
  * respective subsystem header files.
  */
 
 #define	CTL_MAXNAME	24	/* largest number of components supported */
 
 /*
  * Each subsystem defined by sysctl defines a list of variables
  * for that subsystem. Each name is either a node with further
  * levels defined below it, or it is a leaf of some particular
  * type given below. Each sysctl level defines a set of name/type
  * pairs to be used by sysctl(8) in manipulating the subsystem.
  */
 struct ctlname {
 	char	*ctl_name;	/* subsystem name */
 	int	 ctl_type;	/* type of name */
 };
 
 #define	CTLTYPE		0xf	/* mask for the type */
 #define	CTLTYPE_NODE	1	/* name is a node */
 #define	CTLTYPE_INT	2	/* name describes an integer */
 #define	CTLTYPE_STRING	3	/* name describes a string */
 #define	CTLTYPE_S64	4	/* name describes a signed 64-bit number */
 #define	CTLTYPE_OPAQUE	5	/* name describes a structure */
 #define	CTLTYPE_STRUCT	CTLTYPE_OPAQUE	/* name describes a structure */
 #define	CTLTYPE_UINT	6	/* name describes an unsigned integer */
 #define	CTLTYPE_LONG	7	/* name describes a long */
 #define	CTLTYPE_ULONG	8	/* name describes an unsigned long */
 #define	CTLTYPE_U64	9	/* name describes an unsigned 64-bit number */
 #define	CTLTYPE_U8	0xa	/* name describes an unsigned 8-bit number */
 #define	CTLTYPE_U16	0xb	/* name describes an unsigned 16-bit number */
 #define	CTLTYPE_S8	0xc	/* name describes a signed 8-bit number */
 #define	CTLTYPE_S16	0xd	/* name describes a signed 16-bit number */
 #define	CTLTYPE_S32	0xe	/* name describes a signed 32-bit number */
 #define	CTLTYPE_U32	0xf	/* name describes an unsigned 32-bit number */
 
 #define	CTLFLAG_RD	0x80000000	/* Allow reads of variable */
 #define	CTLFLAG_WR	0x40000000	/* Allow writes to the variable */
 #define	CTLFLAG_RW	(CTLFLAG_RD|CTLFLAG_WR)
 #define	CTLFLAG_DORMANT	0x20000000	/* This sysctl is not active yet */
 #define	CTLFLAG_ANYBODY	0x10000000	/* All users can set this var */
 #define	CTLFLAG_SECURE	0x08000000	/* Permit set only if securelevel<=0 */
 #define	CTLFLAG_PRISON	0x04000000	/* Prisoned roots can fiddle */
 #define	CTLFLAG_DYN	0x02000000	/* Dynamic oid - can be freed */
 #define	CTLFLAG_SKIP	0x01000000	/* Skip this sysctl when listing */
 #define	CTLMASK_SECURE	0x00F00000	/* Secure level */
 #define	CTLFLAG_TUN	0x00080000	/* Default value is loaded from getenv() */
 #define	CTLFLAG_RDTUN	(CTLFLAG_RD|CTLFLAG_TUN)
 #define	CTLFLAG_RWTUN	(CTLFLAG_RW|CTLFLAG_TUN)
 #define	CTLFLAG_MPSAFE	0x00040000	/* Handler is MP safe */
 #define	CTLFLAG_VNET	0x00020000	/* Prisons with vnet can fiddle */
 #define	CTLFLAG_DYING	0x00010000	/* Oid is being removed */
 #define	CTLFLAG_CAPRD	0x00008000	/* Can be read in capability mode */
 #define	CTLFLAG_CAPWR	0x00004000	/* Can be written in capability mode */
 #define	CTLFLAG_STATS	0x00002000	/* Statistics, not a tuneable */
 #define	CTLFLAG_NOFETCH	0x00001000	/* Don't fetch tunable from getenv() */
 #define	CTLFLAG_CAPRW	(CTLFLAG_CAPRD|CTLFLAG_CAPWR)
 
 /*
  * Secure level.   Note that CTLFLAG_SECURE == CTLFLAG_SECURE1.
  *
  * Secure when the securelevel is raised to at least N.
  */
 #define	CTLSHIFT_SECURE	20
 #define	CTLFLAG_SECURE1	(CTLFLAG_SECURE | (0 << CTLSHIFT_SECURE))
 #define	CTLFLAG_SECURE2	(CTLFLAG_SECURE | (1 << CTLSHIFT_SECURE))
 #define	CTLFLAG_SECURE3	(CTLFLAG_SECURE | (2 << CTLSHIFT_SECURE))
 
 /*
  * USE THIS instead of a hardwired number from the categories below
  * to get dynamically assigned sysctl entries using the linker-set
  * technology. This is the way nearly all new sysctl variables should
  * be implemented.
  * e.g. SYSCTL_INT(_parent, OID_AUTO, name, CTLFLAG_RW, &variable, 0, "");
  */
 #define	OID_AUTO	(-1)
 
 /*
  * The starting number for dynamically-assigned entries.  WARNING!
  * ALL static sysctl entries should have numbers LESS than this!
  */
 #define	CTL_AUTO_START	0x100
 
 #ifdef _KERNEL
 #include <sys/linker_set.h>
 
 #ifdef KLD_MODULE
 /* XXX allow overspecification of type in external kernel modules */
 #define	SYSCTL_CT_ASSERT_MASK CTLTYPE
 #else
 #define	SYSCTL_CT_ASSERT_MASK 0
 #endif
 
 #define	SYSCTL_HANDLER_ARGS struct sysctl_oid *oidp, void *arg1,	\
 	intmax_t arg2, struct sysctl_req *req
 
 /* definitions for sysctl_req 'lock' member */
 #define	REQ_UNWIRED	1
 #define	REQ_WIRED	2
 
 /* definitions for sysctl_req 'flags' member */
 #if defined(__aarch64__) || defined(__amd64__) || defined(__powerpc64__) ||\
     (defined(__mips__) && defined(__mips_n64))
 #define	SCTL_MASK32	1	/* 32 bit emulation */
 #endif
 
 /*
  * This describes the access space for a sysctl request.  This is needed
  * so that we can use the interface from the kernel or from user-space.
  */
 struct sysctl_req {
 	struct thread	*td;		/* used for access checking */
 	int		 lock;		/* wiring state */
 	void		*oldptr;
 	size_t		 oldlen;
 	size_t		 oldidx;
 	int		(*oldfunc)(struct sysctl_req *, const void *, size_t);
 	const void		*newptr;
 	size_t		 newlen;
 	size_t		 newidx;
 	int		(*newfunc)(struct sysctl_req *, void *, size_t);
 	size_t		 validlen;
 	int		 flags;
 };
 
 SLIST_HEAD(sysctl_oid_list, sysctl_oid);
 
 /*
  * This describes one "oid" in the MIB tree.  Potentially more nodes can
  * be hidden behind it, expanded by the handler.
  */
 struct sysctl_oid {
 	struct sysctl_oid_list oid_children;
 	struct sysctl_oid_list *oid_parent;
 	SLIST_ENTRY(sysctl_oid) oid_link;
 	int		 oid_number;
 	u_int		 oid_kind;
 	void		*oid_arg1;
 	intmax_t	 oid_arg2;
 	const char	*oid_name;
 	int		(*oid_handler)(SYSCTL_HANDLER_ARGS);
 	const char	*oid_fmt;
 	int		 oid_refcnt;
 	u_int		 oid_running;
 	const char	*oid_descr;
 	const char	*oid_label;
 };
 
 #define	SYSCTL_IN(r, p, l)	(r->newfunc)(r, p, l)
 #define	SYSCTL_OUT(r, p, l)	(r->oldfunc)(r, p, l)
 #define	SYSCTL_OUT_STR(r, p)	(r->oldfunc)(r, p, strlen(p) + 1)
 
 int sysctl_handle_bool(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_8(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_16(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_32(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_64(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_int(SYSCTL_HANDLER_ARGS);
 int sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_long(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_string(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_opaque(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_counter_u64(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_counter_u64_array(SYSCTL_HANDLER_ARGS);
 
 int sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS);
 
 int sysctl_msec_to_sbintime(SYSCTL_HANDLER_ARGS);
 int sysctl_usec_to_sbintime(SYSCTL_HANDLER_ARGS);
 int sysctl_sec_to_timeval(SYSCTL_HANDLER_ARGS);
 
 int sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS);
 int sysctl_dpcpu_long(SYSCTL_HANDLER_ARGS);
 int sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS);
 
 /*
  * These functions are used to add/remove an oid from the mib.
  */
 void sysctl_register_oid(struct sysctl_oid *oidp);
 void sysctl_register_disabled_oid(struct sysctl_oid *oidp);
 void sysctl_enable_oid(struct sysctl_oid *oidp);
 void sysctl_unregister_oid(struct sysctl_oid *oidp);
 
 /* Declare a static oid to allow child oids to be added to it. */
 #define	SYSCTL_DECL(name)			\
 	extern struct sysctl_oid sysctl__##name
 
 /* Hide these in macros. */
 #define	SYSCTL_CHILDREN(oid_ptr)		(&(oid_ptr)->oid_children)
 #define	SYSCTL_PARENT(oid_ptr)					\
     (((oid_ptr)->oid_parent != &sysctl__children) ?		\
 	__containerof((oid_ptr)->oid_parent, struct sysctl_oid,	\
 	oid_children) : (struct sysctl_oid *)NULL)
 #define	SYSCTL_STATIC_CHILDREN(oid_name)	(&sysctl__##oid_name.oid_children)
 
 /* === Structs and macros related to context handling. === */
 
 /* All dynamically created sysctls can be tracked in a context list. */
 struct sysctl_ctx_entry {
 	struct sysctl_oid *entry;
 	TAILQ_ENTRY(sysctl_ctx_entry) link;
 };
 
 TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 
 #define	SYSCTL_NODE_CHILDREN(parent, name) \
 	sysctl__##parent##_##name.oid_children
 
 #ifndef NO_SYSCTL_DESCR
 #define	__DESCR(d) d
 #else
 #define	__DESCR(d) ""
 #endif
 
 /* This macro is only for internal use */
 #define	SYSCTL_OID_RAW(id, parent_child_head, nbr, name, kind, a1, a2, handler, fmt, descr, label) \
 	struct sysctl_oid id = {					\
 		.oid_parent = (parent_child_head),			\
 		.oid_children = SLIST_HEAD_INITIALIZER(&id.oid_children), \
 		.oid_number = (nbr),					\
 		.oid_kind = (kind),					\
 		.oid_arg1 = (a1),					\
 		.oid_arg2 = (a2),					\
 		.oid_name = (name),					\
 		.oid_handler = (handler),				\
 		.oid_fmt = (fmt),					\
 		.oid_descr = __DESCR(descr),				\
 		.oid_label = (label),					\
 	};								\
 	DATA_SET(sysctl_set, id)
 
 /* This constructs a static "raw" MIB oid. */
 #define	SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
 	SYSCTL_OID_WITH_LABEL(parent, nbr, name, kind, a1, a2,		\
 	    handler, fmt, descr, NULL)
 
 #define	SYSCTL_OID_WITH_LABEL(parent, nbr, name, kind, a1, a2, handler, fmt, descr, label) \
     static SYSCTL_OID_RAW(sysctl__##parent##_##name,			\
 	SYSCTL_CHILDREN(&sysctl__##parent),				\
 	nbr, #name, kind, a1, a2, handler, fmt, descr, label)
 
 /* This constructs a global "raw" MIB oid. */
 #define	SYSCTL_OID_GLOBAL(parent, nbr, name, kind, a1, a2, handler, fmt, descr, label) \
     SYSCTL_OID_RAW(sysctl__##parent##_##name, \
 	SYSCTL_CHILDREN(&sysctl__##parent),	\
 	nbr, #name, kind, a1, a2, handler, fmt, descr, label)
 
 #define	SYSCTL_ADD_OID(ctx, parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
 	sysctl_add_oid(ctx, parent, nbr, name, kind, a1, a2, handler, fmt, __DESCR(descr), NULL)
 
 /* This constructs a root node from which other nodes can hang. */
 #define	SYSCTL_ROOT_NODE(nbr, name, access, handler, descr)	\
 	SYSCTL_OID_RAW(sysctl___##name, &sysctl__children,	\
 	    nbr, #name, CTLTYPE_NODE|(access), NULL, 0,		\
 	    handler, "N", descr, NULL);				\
 	CTASSERT(((access) & CTLTYPE) == 0 ||			\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_NODE)
 
 /* This constructs a node from which other oids can hang. */
 #define	SYSCTL_NODE(parent, nbr, name, access, handler, descr) \
 	SYSCTL_NODE_WITH_LABEL(parent, nbr, name, access, handler, descr, NULL)
 
 #define	SYSCTL_NODE_WITH_LABEL(parent, nbr, name, access, handler, descr, label) \
 	SYSCTL_OID_GLOBAL(parent, nbr, name, CTLTYPE_NODE|(access),	\
 	    NULL, 0, handler, "N", descr, label);			\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_NODE)
 
 #define	SYSCTL_ADD_NODE(ctx, parent, nbr, name, access, handler, descr)	\
 	SYSCTL_ADD_NODE_WITH_LABEL(ctx, parent, nbr, name, access, \
 	    handler, descr, NULL)
 
 #define	SYSCTL_ADD_NODE_WITH_LABEL(ctx, parent, nbr, name, access, handler, descr, label) \
 ({									\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_NODE);	\
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_NODE|(access),	\
 	    NULL, 0, handler, "N", __DESCR(descr), label);		\
 })
 
 #define	SYSCTL_ADD_ROOT_NODE(ctx, nbr, name, access, handler, descr)	\
 ({									\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_NODE);	\
 	sysctl_add_oid(ctx, &sysctl__children, nbr, name,		\
 	    CTLTYPE_NODE|(access),					\
 	    NULL, 0, handler, "N", __DESCR(descr), NULL);		\
 })
 
 /* Oid for a string.  len can be 0 to indicate '\0' termination. */
 #define	SYSCTL_STRING(parent, nbr, name, access, arg, len, descr)	\
 	SYSCTL_OID(parent, nbr, name, CTLTYPE_STRING|(access),		\
 	    arg, len, sysctl_handle_string, "A", descr);		\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_STRING)
 
 #define	SYSCTL_ADD_STRING(ctx, parent, nbr, name, access, arg, len, descr) \
 ({									\
 	char *__arg = (arg);						\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_STRING);	\
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_STRING|(access),	\
 	    __arg, len, sysctl_handle_string, "A", __DESCR(descr),	\
 	    NULL); \
 })
 
 /* Oid for a constant '\0' terminated string. */
 #define	SYSCTL_CONST_STRING(parent, nbr, name, access, arg, descr)	\
 	SYSCTL_OID(parent, nbr, name, CTLTYPE_STRING|(access),		\
 	    __DECONST(char *, arg), 0, sysctl_handle_string, "A", descr); \
 	CTASSERT(!(access & CTLFLAG_WR));				\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_STRING)
 
 #define	SYSCTL_ADD_CONST_STRING(ctx, parent, nbr, name, access, arg, descr) \
 ({									\
 	char *__arg = __DECONST(char *, arg);				\
 	CTASSERT(!(access & CTLFLAG_WR));				\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_STRING);	\
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_STRING|(access),	\
 	    __arg, 0, sysctl_handle_string, "A", __DESCR(descr),	\
 	    NULL); \
 })
 
 /* Oid for a bool.  If ptr is NULL, val is returned. */
 #define	SYSCTL_NULL_BOOL_PTR ((bool *)NULL)
 #define	SYSCTL_BOOL(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,				\
 	    CTLTYPE_U8 | CTLFLAG_MPSAFE | (access),		\
 	    ptr, val, sysctl_handle_bool, "CU", descr);		\
 	CTASSERT(((access) & CTLTYPE) == 0 &&			\
 	    sizeof(bool) == sizeof(*(ptr)))
 
 #define	SYSCTL_ADD_BOOL(ctx, parent, nbr, name, access, ptr, val, descr) \
 ({									\
 	bool *__ptr = (ptr);						\
 	CTASSERT(((access) & CTLTYPE) == 0);				\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_U8 | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, val, sysctl_handle_bool, "CU", __DESCR(descr),	\
 	    NULL);							\
 })
 
 /* Oid for a signed 8-bit int.  If ptr is NULL, val is returned. */
 #define	SYSCTL_NULL_S8_PTR ((int8_t *)NULL)
 #define	SYSCTL_S8(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,				\
 	    CTLTYPE_S8 | CTLFLAG_MPSAFE | (access),		\
 	    ptr, val, sysctl_handle_8, "C", descr);		\
 	CTASSERT((((access) & CTLTYPE) == 0 ||			\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S8) && \
 	    sizeof(int8_t) == sizeof(*(ptr)))
 
 #define	SYSCTL_ADD_S8(ctx, parent, nbr, name, access, ptr, val, descr)	\
 ({									\
 	int8_t *__ptr = (ptr);						\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S8);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_S8 | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, val, sysctl_handle_8, "C", __DESCR(descr), NULL);	\
 })
 
 /* Oid for an unsigned 8-bit int.  If ptr is NULL, val is returned. */
 #define	SYSCTL_NULL_U8_PTR ((uint8_t *)NULL)
 #define	SYSCTL_U8(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,				\
 	    CTLTYPE_U8 | CTLFLAG_MPSAFE | (access),		\
 	    ptr, val, sysctl_handle_8, "CU", descr);		\
 	CTASSERT((((access) & CTLTYPE) == 0 ||			\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U8) && \
 	    sizeof(uint8_t) == sizeof(*(ptr)))
 
 #define	SYSCTL_ADD_U8(ctx, parent, nbr, name, access, ptr, val, descr)	\
 ({									\
 	uint8_t *__ptr = (ptr);						\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U8);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_U8 | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, val, sysctl_handle_8, "CU", __DESCR(descr), NULL);	\
 })
 
 /* Oid for a signed 16-bit int.  If ptr is NULL, val is returned. */
 #define	SYSCTL_NULL_S16_PTR ((int16_t *)NULL)
 #define	SYSCTL_S16(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,				\
 	    CTLTYPE_S16 | CTLFLAG_MPSAFE | (access),		\
 	    ptr, val, sysctl_handle_16, "S", descr);		\
 	CTASSERT((((access) & CTLTYPE) == 0 ||			\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S16) && \
 	    sizeof(int16_t) == sizeof(*(ptr)))
 
 #define	SYSCTL_ADD_S16(ctx, parent, nbr, name, access, ptr, val, descr)	\
 ({									\
 	int16_t *__ptr = (ptr);						\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S16);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_S16 | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, val, sysctl_handle_16, "S", __DESCR(descr), NULL);	\
 })
 
 /* Oid for an unsigned 16-bit int.  If ptr is NULL, val is returned. */
 #define	SYSCTL_NULL_U16_PTR ((uint16_t *)NULL)
 #define	SYSCTL_U16(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,				\
 	    CTLTYPE_U16 | CTLFLAG_MPSAFE | (access),		\
 	    ptr, val, sysctl_handle_16, "SU", descr);		\
 	CTASSERT((((access) & CTLTYPE) == 0 ||			\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U16) && \
 	    sizeof(uint16_t) == sizeof(*(ptr)))
 
 #define	SYSCTL_ADD_U16(ctx, parent, nbr, name, access, ptr, val, descr)	\
 ({									\
 	uint16_t *__ptr = (ptr);					\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U16);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_U16 | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, val, sysctl_handle_16, "SU", __DESCR(descr), NULL);	\
 })
 
 /* Oid for a signed 32-bit int.  If ptr is NULL, val is returned. */
 #define	SYSCTL_NULL_S32_PTR ((int32_t *)NULL)
 #define	SYSCTL_S32(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,				\
 	    CTLTYPE_S32 | CTLFLAG_MPSAFE | (access),		\
 	    ptr, val, sysctl_handle_32, "I", descr);		\
 	CTASSERT((((access) & CTLTYPE) == 0 ||			\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S32) && \
 	    sizeof(int32_t) == sizeof(*(ptr)))
 
 #define	SYSCTL_ADD_S32(ctx, parent, nbr, name, access, ptr, val, descr)	\
 ({									\
 	int32_t *__ptr = (ptr);						\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S32);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_S32 | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, val, sysctl_handle_32, "I", __DESCR(descr), NULL);	\
 })
 
 /* Oid for an unsigned 32-bit int.  If ptr is NULL, val is returned. */
 #define	SYSCTL_NULL_U32_PTR ((uint32_t *)NULL)
 #define	SYSCTL_U32(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,				\
 	    CTLTYPE_U32 | CTLFLAG_MPSAFE | (access),		\
 	    ptr, val, sysctl_handle_32, "IU", descr);		\
 	CTASSERT((((access) & CTLTYPE) == 0 ||			\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U32) && \
 	    sizeof(uint32_t) == sizeof(*(ptr)))
 
 #define	SYSCTL_ADD_U32(ctx, parent, nbr, name, access, ptr, val, descr)	\
 ({									\
 	uint32_t *__ptr = (ptr);					\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U32);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_U32 | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, val, sysctl_handle_32, "IU", __DESCR(descr), NULL);	\
 })
 
 /* Oid for a signed 64-bit int.  If ptr is NULL, val is returned. */
 #define	SYSCTL_NULL_S64_PTR ((int64_t *)NULL)
 #define	SYSCTL_S64(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,				\
 	    CTLTYPE_S64 | CTLFLAG_MPSAFE | (access),		\
 	    ptr, val, sysctl_handle_64, "Q", descr);		\
 	CTASSERT((((access) & CTLTYPE) == 0 ||			\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64) && \
 	    sizeof(int64_t) == sizeof(*(ptr)))
 
 #define	SYSCTL_ADD_S64(ctx, parent, nbr, name, access, ptr, val, descr)	\
 ({									\
 	int64_t *__ptr = (ptr);						\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_S64 | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, val, sysctl_handle_64, "Q", __DESCR(descr), NULL);	\
 })
 
 /* Oid for an unsigned 64-bit int.  If ptr is NULL, val is returned. */
 #define	SYSCTL_NULL_U64_PTR ((uint64_t *)NULL)
 #define	SYSCTL_U64(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,				\
 	    CTLTYPE_U64 | CTLFLAG_MPSAFE | (access),		\
 	    ptr, val, sysctl_handle_64, "QU", descr);		\
 	CTASSERT((((access) & CTLTYPE) == 0 ||			\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U64) && \
 	    sizeof(uint64_t) == sizeof(*(ptr)))
 
 #define	SYSCTL_ADD_U64(ctx, parent, nbr, name, access, ptr, val, descr)	\
 ({									\
 	uint64_t *__ptr = (ptr);					\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U64);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_U64 | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, val, sysctl_handle_64, "QU", __DESCR(descr), NULL);	\
 })
 
 /* Oid for an int.  If ptr is SYSCTL_NULL_INT_PTR, val is returned. */
 #define	SYSCTL_NULL_INT_PTR ((int *)NULL)
 #define	SYSCTL_INT(parent, nbr, name, access, ptr, val, descr) \
 	SYSCTL_INT_WITH_LABEL(parent, nbr, name, access, ptr, val, descr, NULL)
 
 #define	SYSCTL_INT_WITH_LABEL(parent, nbr, name, access, ptr, val, descr, label) \
 	SYSCTL_OID_WITH_LABEL(parent, nbr, name,			\
 	    CTLTYPE_INT | CTLFLAG_MPSAFE | (access),			\
 	    ptr, val, sysctl_handle_int, "I", descr, label);		\
 	CTASSERT((((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT) && \
 	    sizeof(int) == sizeof(*(ptr)))
 
 #define	SYSCTL_ADD_INT(ctx, parent, nbr, name, access, ptr, val, descr)	\
 ({									\
 	int *__ptr = (ptr);						\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_INT | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, val, sysctl_handle_int, "I", __DESCR(descr), NULL);	\
 })
 
 /* Oid for an unsigned int.  If ptr is NULL, val is returned. */
 #define	SYSCTL_NULL_UINT_PTR ((unsigned *)NULL)
 #define	SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,				\
 	    CTLTYPE_UINT | CTLFLAG_MPSAFE | (access),		\
 	    ptr, val, sysctl_handle_int, "IU", descr);		\
 	CTASSERT((((access) & CTLTYPE) == 0 ||			\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_UINT) && \
 	    sizeof(unsigned) == sizeof(*(ptr)))
 
 #define	SYSCTL_ADD_UINT(ctx, parent, nbr, name, access, ptr, val, descr) \
 ({									\
 	unsigned *__ptr = (ptr);					\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_UINT);	\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_UINT | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, val, sysctl_handle_int, "IU", __DESCR(descr), NULL);	\
 })
 
 /* Oid for a long.  The pointer must be non NULL. */
 #define	SYSCTL_NULL_LONG_PTR ((long *)NULL)
 #define	SYSCTL_LONG(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,				\
 	    CTLTYPE_LONG | CTLFLAG_MPSAFE | (access),		\
 	    ptr, val, sysctl_handle_long, "L", descr);		\
 	CTASSERT((((access) & CTLTYPE) == 0 ||			\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_LONG) && \
 	    sizeof(long) == sizeof(*(ptr)))
 
 #define	SYSCTL_ADD_LONG(ctx, parent, nbr, name, access, ptr, descr)	\
 ({									\
 	long *__ptr = (ptr);						\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_LONG);	\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_LONG | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, 0, sysctl_handle_long, "L", __DESCR(descr), NULL);	\
 })
 
 /* Oid for an unsigned long.  The pointer must be non NULL. */
 #define	SYSCTL_NULL_ULONG_PTR ((unsigned long *)NULL)
 #define	SYSCTL_ULONG(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,					\
 	    CTLTYPE_ULONG | CTLFLAG_MPSAFE | (access),			\
 	    ptr, val, sysctl_handle_long, "LU", descr);			\
 	CTASSERT((((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_ULONG) &&	\
 	    sizeof(unsigned long) == sizeof(*(ptr)))
 
 #define	SYSCTL_ADD_ULONG(ctx, parent, nbr, name, access, ptr, descr)	\
 ({									\
 	unsigned long *__ptr = (ptr);					\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_ULONG);	\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_ULONG | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, 0, sysctl_handle_long, "LU", __DESCR(descr), NULL);	\
 })
 
 /* Oid for a quad.  The pointer must be non NULL. */
 #define	SYSCTL_NULL_QUAD_PTR ((int64_t *)NULL)
 #define	SYSCTL_QUAD(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,				\
 	    CTLTYPE_S64 | CTLFLAG_MPSAFE | (access),		\
 	    ptr, val, sysctl_handle_64, "Q", descr);		\
 	CTASSERT((((access) & CTLTYPE) == 0 ||			\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64) && \
 	    sizeof(int64_t) == sizeof(*(ptr)))
 
 #define	SYSCTL_ADD_QUAD(ctx, parent, nbr, name, access, ptr, descr)	\
 ({									\
 	int64_t *__ptr = (ptr);						\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_S64 | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, 0, sysctl_handle_64, "Q", __DESCR(descr), NULL);	\
 })
 
 #define	SYSCTL_NULL_UQUAD_PTR ((uint64_t *)NULL)
 #define	SYSCTL_UQUAD(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,					\
 	    CTLTYPE_U64 | CTLFLAG_MPSAFE | (access),			\
 	     ptr, val, sysctl_handle_64, "QU", descr);			\
 	CTASSERT((((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U64) &&	\
 	    sizeof(uint64_t) == sizeof(*(ptr)))
 
 #define	SYSCTL_ADD_UQUAD(ctx, parent, nbr, name, access, ptr, descr)	\
 ({									\
 	uint64_t *__ptr = (ptr);					\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U64);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_U64 | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, 0, sysctl_handle_64, "QU", __DESCR(descr), NULL);	\
 })
 
 /* Oid for a CPU dependent variable */
 #define	SYSCTL_ADD_UAUTO(ctx, parent, nbr, name, access, ptr, descr)	\
 ({									\
 	struct sysctl_oid *__ret;					\
 	CTASSERT((sizeof(uint64_t) == sizeof(*(ptr)) ||			\
 	    sizeof(unsigned) == sizeof(*(ptr))) &&			\
 	    ((access) & CTLTYPE) == 0);					\
 	if (sizeof(uint64_t) == sizeof(*(ptr))) {			\
 		__ret = sysctl_add_oid(ctx, parent, nbr, name,		\
 		    CTLTYPE_U64 | CTLFLAG_MPSAFE | (access),		\
 		    (ptr), 0, sysctl_handle_64, "QU",			\
 		    __DESCR(descr), NULL);				\
 	} else {							\
 		__ret = sysctl_add_oid(ctx, parent, nbr, name,		\
 		    CTLTYPE_UINT | CTLFLAG_MPSAFE | (access),		\
 		    (ptr), 0, sysctl_handle_int, "IU",			\
 		    __DESCR(descr), NULL);				\
 	}								\
 	__ret;								\
 })
 
 /* Oid for a 64-bit unsigned counter(9).  The pointer must be non NULL. */
 #define	SYSCTL_COUNTER_U64(parent, nbr, name, access, ptr, descr)	\
 	SYSCTL_OID(parent, nbr, name,					\
 	    CTLTYPE_U64 | CTLFLAG_MPSAFE | (access),			\
 	    (ptr), 0, sysctl_handle_counter_u64, "QU", descr);		\
 	CTASSERT((((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U64) &&	\
 	    sizeof(counter_u64_t) == sizeof(*(ptr)) &&			\
 	    sizeof(uint64_t) == sizeof(**(ptr)))
 
 #define	SYSCTL_ADD_COUNTER_U64(ctx, parent, nbr, name, access, ptr, descr) \
 ({									\
 	counter_u64_t *__ptr = (ptr);					\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U64);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_U64 | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, 0, sysctl_handle_counter_u64, "QU", __DESCR(descr),	\
 	    NULL);							\
 })
 
 /* Oid for an array of counter(9)s.  The pointer and length must be non zero. */
 #define	SYSCTL_COUNTER_U64_ARRAY(parent, nbr, name, access, ptr, len, descr) \
 	SYSCTL_OID(parent, nbr, name,					\
 	    CTLTYPE_OPAQUE | CTLFLAG_MPSAFE | (access),			\
 	    (ptr), (len), sysctl_handle_counter_u64_array, "S", descr);	\
 	CTASSERT((((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE) &&	\
 	    sizeof(counter_u64_t) == sizeof(*(ptr)) &&			\
 	    sizeof(uint64_t) == sizeof(**(ptr)))
 
 #define	SYSCTL_ADD_COUNTER_U64_ARRAY(ctx, parent, nbr, name, access,	\
     ptr, len, descr)							\
 ({									\
 	counter_u64_t *__ptr = (ptr);					\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE);	\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_OPAQUE | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, len, sysctl_handle_counter_u64_array, "S",		\
 	    __DESCR(descr), NULL);					\
 })
 
 /* Oid for an opaque object.  Specified by a pointer and a length. */
 #define	SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr)	\
 	SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|(access),		\
 	    ptr, len, sysctl_handle_opaque, fmt, descr);		\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE)
 
 #define	SYSCTL_ADD_OPAQUE(ctx, parent, nbr, name, access, ptr, len, fmt, descr)	\
 ({									\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE);	\
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_OPAQUE|(access),	\
 	    ptr, len, sysctl_handle_opaque, fmt, __DESCR(descr), NULL);	\
 })
 
 /* Oid for a struct.  Specified by a pointer and a type. */
 #define	SYSCTL_STRUCT(parent, nbr, name, access, ptr, type, descr)	\
 	SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|(access),		\
 	    ptr, sizeof(struct type), sysctl_handle_opaque,		\
 	    "S," #type, descr);						\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE)
 
 #define	SYSCTL_ADD_STRUCT(ctx, parent, nbr, name, access, ptr, type, descr) \
 ({									\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE);	\
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_OPAQUE|(access),	\
 	    (ptr), sizeof(struct type),					\
 	    sysctl_handle_opaque, "S," #type, __DESCR(descr), NULL);	\
 })
 
 /* Oid for a procedure.  Specified by a pointer and an arg. */
 #define	SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt, descr) \
 	SYSCTL_OID(parent, nbr, name, (access),				\
 	    ptr, arg, handler, fmt, descr);				\
 	CTASSERT(((access) & CTLTYPE) != 0)
 
 #define	SYSCTL_ADD_PROC(ctx, parent, nbr, name, access, ptr, arg, handler, fmt, descr) \
 ({									\
 	CTASSERT(((access) & CTLTYPE) != 0);				\
 	sysctl_add_oid(ctx, parent, nbr, name, (access),		\
 	    (ptr), (arg), (handler), (fmt), __DESCR(descr), NULL);	\
 })
 
 /* Oid to handle limits on uma(9) zone specified by pointer. */
 #define	SYSCTL_UMA_MAX(parent, nbr, name, access, ptr, descr)	\
 	SYSCTL_OID(parent, nbr, name,				\
 	    CTLTYPE_INT | CTLFLAG_MPSAFE | (access),		\
 	    (ptr), 0, sysctl_handle_uma_zone_max, "I", descr);	\
 	CTASSERT(((access) & CTLTYPE) == 0 ||			\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT)
 
 #define	SYSCTL_ADD_UMA_MAX(ctx, parent, nbr, name, access, ptr, descr)	\
 ({									\
 	uma_zone_t __ptr = (ptr);					\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_INT | CTLFLAG_MPSAFE | (access),			\
 	    __ptr, 0, sysctl_handle_uma_zone_max, "I", __DESCR(descr),	\
 	    NULL);							\
 })
 
 /* Oid to obtain current use of uma(9) zone specified by pointer. */
 #define	SYSCTL_UMA_CUR(parent, nbr, name, access, ptr, descr)		\
 	SYSCTL_OID(parent, nbr, name,					\
 	    CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access),	\
 	    (ptr), 0, sysctl_handle_uma_zone_cur, "I", descr);		\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT)
 
 #define	SYSCTL_ADD_UMA_CUR(ctx, parent, nbr, name, access, ptr, descr)	\
 ({									\
 	uma_zone_t __ptr = (ptr);					\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access),	\
 	    __ptr, 0, sysctl_handle_uma_zone_cur, "I", __DESCR(descr),	\
 	    NULL);							\
 })
 
 /* OID expressing a sbintime_t as microseconds */
 #define	SYSCTL_SBINTIME_USEC(parent, nbr, name, access, ptr, descr)	\
 	SYSCTL_OID(parent, nbr, name,					\
 	    CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access),	\
 	    (ptr), 0, sysctl_usec_to_sbintime, "Q", descr);		\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64)
 #define	SYSCTL_ADD_SBINTIME_USEC(ctx, parent, nbr, name, access, ptr, descr) \
 ({									\
 	sbintime_t *__ptr = (ptr);					\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access),	\
 	    __ptr, 0, sysctl_usec_to_sbintime, "Q", __DESCR(descr),	\
 	    NULL);							\
 })
 
 /* OID expressing a sbintime_t as milliseconds */
 #define	SYSCTL_SBINTIME_MSEC(parent, nbr, name, access, ptr, descr)	\
 	SYSCTL_OID(parent, nbr, name,					\
 	    CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access),	\
 	    (ptr), 0, sysctl_msec_to_sbintime, "Q", descr);		\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64)
 #define	SYSCTL_ADD_SBINTIME_MSEC(ctx, parent, nbr, name, access, ptr, descr) \
 ({									\
 	sbintime_t *__ptr = (ptr);					\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access),	\
 	    __ptr, 0, sysctl_msec_to_sbintime, "Q", __DESCR(descr),	\
 	    NULL);							\
 })
 
 /* OID expressing a struct timeval as seconds */
 #define	SYSCTL_TIMEVAL_SEC(parent, nbr, name, access, ptr, descr)	\
 	SYSCTL_OID(parent, nbr, name,					\
 	    CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access),	\
 	    (ptr), 0, sysctl_sec_to_timeval, "I", descr);		\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT)
 #define	SYSCTL_ADD_TIMEVAL_SEC(ctx, parent, nbr, name, access, ptr, descr) \
 ({									\
 	struct timeval *__ptr = (ptr);					\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT);		\
 	sysctl_add_oid(ctx, parent, nbr, name,				\
 	    CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access),	\
 	    __ptr, 0, sysctl_sec_to_timeval, "I", __DESCR(descr),	\
 	    NULL);							\
 })
 
 /*
  * A macro to generate a read-only sysctl to indicate the presence of optional
  * kernel features.
  */
 #define	FEATURE(name, desc)						\
 	SYSCTL_INT_WITH_LABEL(_kern_features, OID_AUTO, name,		\
 	    CTLFLAG_RD | CTLFLAG_CAPRD, SYSCTL_NULL_INT_PTR, 1, desc, "feature")
 
 #endif /* _KERNEL */
 
 /*
  * Top-level identifiers
  */
-#define	CTL_UNSPEC	0		/* unused */
+#define	CTL_SYSCTL	0		/* "magic" numbers */
 #define	CTL_KERN	1		/* "high kernel": proc, limits */
 #define	CTL_VM		2		/* virtual memory */
 #define	CTL_VFS		3		/* filesystem, mount type is next */
 #define	CTL_NET		4		/* network, see socket.h */
 #define	CTL_DEBUG	5		/* debugging parameters */
 #define	CTL_HW		6		/* generic cpu/io */
 #define	CTL_MACHDEP	7		/* machine dependent */
 #define	CTL_USER	8		/* user-level */
 #define	CTL_P1003_1B	9		/* POSIX 1003.1B */
+
+/*
+ * CTL_SYSCTL identifiers
+ */
+#define	CTL_SYSCTL_DEBUG	0	/* printf all nodes */
+#define	CTL_SYSCTL_NAME		1	/* string name of OID */
+#define	CTL_SYSCTL_NEXT		2	/* next OID */
+#define	CTL_SYSCTL_NAME2OID	3	/* int array of name */
+#define	CTL_SYSCTL_OIDFMT	4	/* OID's kind and format */
+#define	CTL_SYSCTL_OIDDESCR	5	/* OID's description */
+#define	CTL_SYSCTL_OIDLABEL	6	/* aggregation label */
 
 /*
  * CTL_KERN identifiers
  */
 #define	KERN_OSTYPE		 1	/* string: system version */
 #define	KERN_OSRELEASE		 2	/* string: system release */
 #define	KERN_OSREV		 3	/* int: system revision */
 #define	KERN_VERSION		 4	/* string: compile time info */
 #define	KERN_MAXVNODES		 5	/* int: max vnodes */
 #define	KERN_MAXPROC		 6	/* int: max processes */
 #define	KERN_MAXFILES		 7	/* int: max open files */
 #define	KERN_ARGMAX		 8	/* int: max arguments to exec */
 #define	KERN_SECURELVL		 9	/* int: system security level */
 #define	KERN_HOSTNAME		10	/* string: hostname */
 #define	KERN_HOSTID		11	/* int: host identifier */
 #define	KERN_CLOCKRATE		12	/* struct: struct clockrate */
 #define	KERN_VNODE		13	/* struct: vnode structures */
 #define	KERN_PROC		14	/* struct: process entries */
 #define	KERN_FILE		15	/* struct: file entries */
 #define	KERN_PROF		16	/* node: kernel profiling info */
 #define	KERN_POSIX1		17	/* int: POSIX.1 version */
 #define	KERN_NGROUPS		18	/* int: # of supplemental group ids */
 #define	KERN_JOB_CONTROL	19	/* int: is job control available */
 #define	KERN_SAVED_IDS		20	/* int: saved set-user/group-ID */
 #define	KERN_BOOTTIME		21	/* struct: time kernel was booted */
 #define	KERN_NISDOMAINNAME	22	/* string: YP domain name */
 #define	KERN_UPDATEINTERVAL	23	/* int: update process sleep time */
 #define	KERN_OSRELDATE		24	/* int: kernel release date */
 #define	KERN_NTP_PLL		25	/* node: NTP PLL control */
 #define	KERN_BOOTFILE		26	/* string: name of booted kernel */
 #define	KERN_MAXFILESPERPROC	27	/* int: max open files per proc */
 #define	KERN_MAXPROCPERUID	28	/* int: max processes per uid */
 #define	KERN_DUMPDEV		29	/* struct cdev *: device to dump on */
 #define	KERN_IPC		30	/* node: anything related to IPC */
 #define	KERN_DUMMY		31	/* unused */
 #define	KERN_PS_STRINGS		32	/* int: address of PS_STRINGS */
 #define	KERN_USRSTACK		33	/* int: address of USRSTACK */
 #define	KERN_LOGSIGEXIT		34	/* int: do we log sigexit procs? */
 #define	KERN_IOV_MAX		35	/* int: value of UIO_MAXIOV */
 #define	KERN_HOSTUUID		36	/* string: host UUID identifier */
 #define	KERN_ARND		37	/* int: from arc4rand() */
 #define	KERN_MAXPHYS		38	/* int: MAXPHYS value */
 /*
  * KERN_PROC subtypes
  */
 #define	KERN_PROC_ALL		0	/* everything */
 #define	KERN_PROC_PID		1	/* by process id */
 #define	KERN_PROC_PGRP		2	/* by process group id */
 #define	KERN_PROC_SESSION	3	/* by session of pid */
 #define	KERN_PROC_TTY		4	/* by controlling tty */
 #define	KERN_PROC_UID		5	/* by effective uid */
 #define	KERN_PROC_RUID		6	/* by real uid */
 #define	KERN_PROC_ARGS		7	/* get/set arguments/proctitle */
 #define	KERN_PROC_PROC		8	/* only return procs */
 #define	KERN_PROC_SV_NAME	9	/* get syscall vector name */
 #define	KERN_PROC_RGID		10	/* by real group id */
 #define	KERN_PROC_GID		11	/* by effective group id */
 #define	KERN_PROC_PATHNAME	12	/* path to executable */
 #define	KERN_PROC_OVMMAP	13	/* Old VM map entries for process */
 #define	KERN_PROC_OFILEDESC	14	/* Old file descriptors for process */
 #define	KERN_PROC_KSTACK	15	/* Kernel stacks for process */
 #define	KERN_PROC_INC_THREAD	0x10	/*
 					 * modifier for pid, pgrp, tty,
 					 * uid, ruid, gid, rgid and proc
 					 * This effectively uses 16-31
 					 */
 #define	KERN_PROC_VMMAP		32	/* VM map entries for process */
 #define	KERN_PROC_FILEDESC	33	/* File descriptors for process */
 #define	KERN_PROC_GROUPS	34	/* process groups */
 #define	KERN_PROC_ENV		35	/* get environment */
 #define	KERN_PROC_AUXV		36	/* get ELF auxiliary vector */
 #define	KERN_PROC_RLIMIT	37	/* process resource limits */
 #define	KERN_PROC_PS_STRINGS	38	/* get ps_strings location */
 #define	KERN_PROC_UMASK		39	/* process umask */
 #define	KERN_PROC_OSREL		40	/* osreldate for process binary */
 #define	KERN_PROC_SIGTRAMP	41	/* signal trampoline location */
 #define	KERN_PROC_CWD		42	/* process current working directory */
 #define	KERN_PROC_NFDS		43	/* number of open file descriptors */
 
 /*
  * KERN_IPC identifiers
  */
 #define	KIPC_MAXSOCKBUF		1	/* int: max size of a socket buffer */
 #define	KIPC_SOCKBUF_WASTE	2	/* int: wastage factor in sockbuf */
 #define	KIPC_SOMAXCONN		3	/* int: max length of connection q */
 #define	KIPC_MAX_LINKHDR	4	/* int: max length of link header */
 #define	KIPC_MAX_PROTOHDR	5	/* int: max length of network header */
 #define	KIPC_MAX_HDR		6	/* int: max total length of headers */
 #define	KIPC_MAX_DATALEN	7	/* int: max length of data? */
 
 /*
  * CTL_HW identifiers
  */
 #define	HW_MACHINE	 1		/* string: machine class */
 #define	HW_MODEL	 2		/* string: specific machine model */
 #define	HW_NCPU		 3		/* int: number of cpus */
 #define	HW_BYTEORDER	 4		/* int: machine byte order */
 #define	HW_PHYSMEM	 5		/* int: total memory */
 #define	HW_USERMEM	 6		/* int: non-kernel memory */
 #define	HW_PAGESIZE	 7		/* int: software page size */
 #define	HW_DISKNAMES	 8		/* strings: disk drive names */
 #define	HW_DISKSTATS	 9		/* struct: diskstats[] */
 #define	HW_FLOATINGPT	10		/* int: has HW floating point? */
 #define	HW_MACHINE_ARCH	11		/* string: machine architecture */
 #define	HW_REALMEM	12		/* int: 'real' memory */
 
 /*
  * CTL_USER definitions
  */
 #define	USER_CS_PATH		 1	/* string: _CS_PATH */
 #define	USER_BC_BASE_MAX	 2	/* int: BC_BASE_MAX */
 #define	USER_BC_DIM_MAX		 3	/* int: BC_DIM_MAX */
 #define	USER_BC_SCALE_MAX	 4	/* int: BC_SCALE_MAX */
 #define	USER_BC_STRING_MAX	 5	/* int: BC_STRING_MAX */
 #define	USER_COLL_WEIGHTS_MAX	 6	/* int: COLL_WEIGHTS_MAX */
 #define	USER_EXPR_NEST_MAX	 7	/* int: EXPR_NEST_MAX */
 #define	USER_LINE_MAX		 8	/* int: LINE_MAX */
 #define	USER_RE_DUP_MAX		 9	/* int: RE_DUP_MAX */
 #define	USER_POSIX2_VERSION	10	/* int: POSIX2_VERSION */
 #define	USER_POSIX2_C_BIND	11	/* int: POSIX2_C_BIND */
 #define	USER_POSIX2_C_DEV	12	/* int: POSIX2_C_DEV */
 #define	USER_POSIX2_CHAR_TERM	13	/* int: POSIX2_CHAR_TERM */
 #define	USER_POSIX2_FORT_DEV	14	/* int: POSIX2_FORT_DEV */
 #define	USER_POSIX2_FORT_RUN	15	/* int: POSIX2_FORT_RUN */
 #define	USER_POSIX2_LOCALEDEF	16	/* int: POSIX2_LOCALEDEF */
 #define	USER_POSIX2_SW_DEV	17	/* int: POSIX2_SW_DEV */
 #define	USER_POSIX2_UPE		18	/* int: POSIX2_UPE */
 #define	USER_STREAM_MAX		19	/* int: POSIX2_STREAM_MAX */
 #define	USER_TZNAME_MAX		20	/* int: POSIX2_TZNAME_MAX */
 
 #define	CTL_P1003_1B_ASYNCHRONOUS_IO		1	/* boolean */
 #define	CTL_P1003_1B_MAPPED_FILES		2	/* boolean */
 #define	CTL_P1003_1B_MEMLOCK			3	/* boolean */
 #define	CTL_P1003_1B_MEMLOCK_RANGE		4	/* boolean */
 #define	CTL_P1003_1B_MEMORY_PROTECTION		5	/* boolean */
 #define	CTL_P1003_1B_MESSAGE_PASSING		6	/* boolean */
 #define	CTL_P1003_1B_PRIORITIZED_IO		7	/* boolean */
 #define	CTL_P1003_1B_PRIORITY_SCHEDULING	8	/* boolean */
 #define	CTL_P1003_1B_REALTIME_SIGNALS		9	/* boolean */
 #define	CTL_P1003_1B_SEMAPHORES			10	/* boolean */
 #define	CTL_P1003_1B_FSYNC			11	/* boolean */
 #define	CTL_P1003_1B_SHARED_MEMORY_OBJECTS	12	/* boolean */
 #define	CTL_P1003_1B_SYNCHRONIZED_IO		13	/* boolean */
 #define	CTL_P1003_1B_TIMERS			14	/* boolean */
 #define	CTL_P1003_1B_AIO_LISTIO_MAX		15	/* int */
 #define	CTL_P1003_1B_AIO_MAX			16	/* int */
 #define	CTL_P1003_1B_AIO_PRIO_DELTA_MAX		17	/* int */
 #define	CTL_P1003_1B_DELAYTIMER_MAX		18	/* int */
 #define	CTL_P1003_1B_MQ_OPEN_MAX		19	/* int */
 #define	CTL_P1003_1B_PAGESIZE			20	/* int */
 #define	CTL_P1003_1B_RTSIG_MAX			21	/* int */
 #define	CTL_P1003_1B_SEM_NSEMS_MAX		22	/* int */
 #define	CTL_P1003_1B_SEM_VALUE_MAX		23	/* int */
 #define	CTL_P1003_1B_SIGQUEUE_MAX		24	/* int */
 #define	CTL_P1003_1B_TIMER_MAX			25	/* int */
 
 #define	CTL_P1003_1B_MAXID		26
 
 #ifdef _KERNEL
 
 /*
  * Declare some common oids.
  */
 extern struct sysctl_oid_list sysctl__children;
 SYSCTL_DECL(_kern);
 SYSCTL_DECL(_kern_features);
 SYSCTL_DECL(_kern_ipc);
 SYSCTL_DECL(_kern_proc);
 SYSCTL_DECL(_kern_sched);
 SYSCTL_DECL(_kern_sched_stats);
 SYSCTL_DECL(_sysctl);
 SYSCTL_DECL(_vm);
 SYSCTL_DECL(_vm_stats);
 SYSCTL_DECL(_vm_stats_misc);
 SYSCTL_DECL(_vfs);
 SYSCTL_DECL(_net);
 SYSCTL_DECL(_debug);
 SYSCTL_DECL(_debug_sizeof);
 SYSCTL_DECL(_dev);
 SYSCTL_DECL(_hw);
 SYSCTL_DECL(_hw_bus);
 SYSCTL_DECL(_hw_bus_devices);
 SYSCTL_DECL(_hw_bus_info);
 SYSCTL_DECL(_machdep);
 SYSCTL_DECL(_user);
 SYSCTL_DECL(_compat);
 SYSCTL_DECL(_regression);
 SYSCTL_DECL(_security);
 SYSCTL_DECL(_security_bsd);
 
 extern char	machine[];
 extern char	osrelease[];
 extern char	ostype[];
 extern char	kern_ident[];
 
 /* Dynamic oid handling */
 struct sysctl_oid *sysctl_add_oid(struct sysctl_ctx_list *clist,
 	    struct sysctl_oid_list *parent, int nbr, const char *name, int kind,
 	    void *arg1, intmax_t arg2, int (*handler)(SYSCTL_HANDLER_ARGS),
 	    const char *fmt, const char *descr, const char *label);
 int	sysctl_remove_name(struct sysctl_oid *parent, const char *name, int del,
 	    int recurse);
 void	sysctl_rename_oid(struct sysctl_oid *oidp, const char *name);
 int	sysctl_move_oid(struct sysctl_oid *oidp,
 	    struct sysctl_oid_list *parent);
 int	sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse);
 int	sysctl_ctx_init(struct sysctl_ctx_list *clist);
 int	sysctl_ctx_free(struct sysctl_ctx_list *clist);
 struct	sysctl_ctx_entry *sysctl_ctx_entry_add(struct sysctl_ctx_list *clist,
 	    struct sysctl_oid *oidp);
 struct	sysctl_ctx_entry *sysctl_ctx_entry_find(struct sysctl_ctx_list *clist,
 	    struct sysctl_oid *oidp);
 int	sysctl_ctx_entry_del(struct sysctl_ctx_list *clist,
 	    struct sysctl_oid *oidp);
 
 int	kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old,
 	    size_t *oldlenp, void *new, size_t newlen, size_t *retval,
 	    int flags);
 int	kernel_sysctlbyname(struct thread *td, char *name, void *old,
 	    size_t *oldlenp, void *new, size_t newlen, size_t *retval,
 	    int flags);
 int	userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
 	    size_t *oldlenp, int inkernel, const void *new, size_t newlen,
 	    size_t *retval, int flags);
 int	sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid,
 	    int *nindx, struct sysctl_req *req);
 void	sysctl_wlock(void);
 void	sysctl_wunlock(void);
 int	sysctl_wire_old_buffer(struct sysctl_req *req, size_t len);
 int	kern___sysctlbyname(struct thread *td, const char *name,
 	    size_t namelen, void *old, size_t *oldlenp, void *new,
 	    size_t newlen, size_t *retval, int flags, bool inkernel);
 
 struct sbuf;
 struct sbuf *sbuf_new_for_sysctl(struct sbuf *, char *, int,
 	    struct sysctl_req *);
 #else	/* !_KERNEL */
 #include <sys/cdefs.h>
 
 __BEGIN_DECLS
 int	sysctl(const int *, u_int, void *, size_t *, const void *, size_t);
 int	sysctlbyname(const char *, void *, size_t *, const void *, size_t);
 int	sysctlnametomib(const char *, int *, size_t *);
 __END_DECLS
 #endif	/* _KERNEL */
 
 #endif	/* !_SYS_SYSCTL_H_ */
Index: projects/clang900-import/sys/vm/vm_glue.c
===================================================================
--- projects/clang900-import/sys/vm/vm_glue.c	(revision 352536)
+++ projects/clang900-import/sys/vm/vm_glue.c	(revision 352537)
@@ -1,602 +1,602 @@
 /*-
  * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
  *
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_glue.c	8.6 (Berkeley) 1/5/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 #include "opt_kstack_pages.h"
 #include "opt_kstack_max_pages.h"
 #include "opt_kstack_usage_prof.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/domainset.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sf_buf.h>
 #include <sys/shm.h>
 #include <sys/vmmeter.h>
 #include <sys/vmem.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/unistd.h>
 
 #include <vm/uma.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_domainset.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_object.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pager.h>
 #include <vm/swap_pager.h>
 
 #include <machine/cpu.h>
 
 /*
  * MPSAFE
  *
  * WARNING!  This code calls vm_map_check_protection() which only checks
  * the associated vm_map_entry range.  It does not determine whether the
  * contents of the memory is actually readable or writable.  In most cases
  * just checking the vm_map_entry is sufficient within the kernel's address
  * space.
  */
 int
 kernacc(void *addr, int len, int rw)
 {
 	boolean_t rv;
 	vm_offset_t saddr, eaddr;
 	vm_prot_t prot;
 
 	KASSERT((rw & ~VM_PROT_ALL) == 0,
 	    ("illegal ``rw'' argument to kernacc (%x)\n", rw));
 
 	if ((vm_offset_t)addr + len > vm_map_max(kernel_map) ||
 	    (vm_offset_t)addr + len < (vm_offset_t)addr)
 		return (FALSE);
 
 	prot = rw;
 	saddr = trunc_page((vm_offset_t)addr);
 	eaddr = round_page((vm_offset_t)addr + len);
 	vm_map_lock_read(kernel_map);
 	rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
 	vm_map_unlock_read(kernel_map);
 	return (rv == TRUE);
 }
 
 /*
  * MPSAFE
  *
  * WARNING!  This code calls vm_map_check_protection() which only checks
  * the associated vm_map_entry range.  It does not determine whether the
  * contents of the memory is actually readable or writable.  vmapbuf(),
  * vm_fault_quick(), or copyin()/copout()/su*()/fu*() functions should be
  * used in conjunction with this call.
  */
 int
 useracc(void *addr, int len, int rw)
 {
 	boolean_t rv;
 	vm_prot_t prot;
 	vm_map_t map;
 
 	KASSERT((rw & ~VM_PROT_ALL) == 0,
 	    ("illegal ``rw'' argument to useracc (%x)\n", rw));
 	prot = rw;
 	map = &curproc->p_vmspace->vm_map;
 	if ((vm_offset_t)addr + len > vm_map_max(map) ||
 	    (vm_offset_t)addr + len < (vm_offset_t)addr) {
 		return (FALSE);
 	}
 	vm_map_lock_read(map);
 	rv = vm_map_check_protection(map, trunc_page((vm_offset_t)addr),
 	    round_page((vm_offset_t)addr + len), prot);
 	vm_map_unlock_read(map);
 	return (rv == TRUE);
 }
 
 int
 vslock(void *addr, size_t len)
 {
 	vm_offset_t end, last, start;
 	vm_size_t npages;
 	int error;
 
 	last = (vm_offset_t)addr + len;
 	start = trunc_page((vm_offset_t)addr);
 	end = round_page(last);
 	if (last < (vm_offset_t)addr || end < (vm_offset_t)addr)
 		return (EINVAL);
 	npages = atop(end - start);
 	if (npages > vm_page_max_user_wired)
 		return (ENOMEM);
 	error = vm_map_wire(&curproc->p_vmspace->vm_map, start, end,
 	    VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
 	if (error == KERN_SUCCESS) {
 		curthread->td_vslock_sz += len;
 		return (0);
 	}
 
 	/*
 	 * Return EFAULT on error to match copy{in,out}() behaviour
 	 * rather than returning ENOMEM like mlock() would.
 	 */
 	return (EFAULT);
 }
 
 void
 vsunlock(void *addr, size_t len)
 {
 
 	/* Rely on the parameter sanity checks performed by vslock(). */
 	MPASS(curthread->td_vslock_sz >= len);
 	curthread->td_vslock_sz -= len;
 	(void)vm_map_unwire(&curproc->p_vmspace->vm_map,
 	    trunc_page((vm_offset_t)addr), round_page((vm_offset_t)addr + len),
 	    VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
 }
 
 /*
  * Pin the page contained within the given object at the given offset.  If the
  * page is not resident, allocate and load it using the given object's pager.
  * Return the pinned page if successful; otherwise, return NULL.
  */
 static vm_page_t
 vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset)
 {
 	vm_page_t m;
 	vm_pindex_t pindex;
 
 	pindex = OFF_TO_IDX(offset);
 	VM_OBJECT_WLOCK(object);
 	(void)vm_page_grab_valid(&m, object, pindex,
 	    VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED);
 	VM_OBJECT_WUNLOCK(object);
 	return (m);
 }
 
 /*
  * Return a CPU private mapping to the page at the given offset within the
  * given object.  The page is pinned before it is mapped.
  */
 struct sf_buf *
 vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset)
 {
 	vm_page_t m;
 
 	m = vm_imgact_hold_page(object, offset);
 	if (m == NULL)
 		return (NULL);
 	sched_pin();
 	return (sf_buf_alloc(m, SFB_CPUPRIVATE));
 }
 
 /*
  * Destroy the given CPU private mapping and unpin the page that it mapped.
  */
 void
 vm_imgact_unmap_page(struct sf_buf *sf)
 {
 	vm_page_t m;
 
 	m = sf_buf_page(sf);
 	sf_buf_free(sf);
 	sched_unpin();
 	vm_page_unwire(m, PQ_ACTIVE);
 }
 
 void
 vm_sync_icache(vm_map_t map, vm_offset_t va, vm_offset_t sz)
 {
 
 	pmap_sync_icache(map->pmap, va, sz);
 }
 
 static uma_zone_t kstack_cache;
 static int kstack_cache_size = 128;
 static int kstack_domain_iter;
 
 static int
 sysctl_kstack_cache_size(SYSCTL_HANDLER_ARGS)
 {
 	int error, newsize;
 
 	newsize = kstack_cache_size;
 	error = sysctl_handle_int(oidp, &newsize, 0, req);
 	if (error == 0 && req->newptr && newsize != kstack_cache_size)
 		kstack_cache_size =
 		    uma_zone_set_maxcache(kstack_cache, newsize);
 	return (error);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kstack_cache_size, CTLTYPE_INT|CTLFLAG_RW,
 	&kstack_cache_size, 0, sysctl_kstack_cache_size, "IU",
 	"Maximum number of cached kernel stacks");
 
 /*
  * Create the kernel stack (including pcb for i386) for a new thread.
  * This routine directly affects the fork perf for a process and
  * create performance for a thread.
  */
 static vm_offset_t
 vm_thread_stack_create(struct domainset *ds, vm_object_t *ksobjp, int pages)
 {
 	vm_page_t ma[KSTACK_MAX_PAGES];
 	vm_object_t ksobj;
 	vm_offset_t ks;
 	int i;
 
 	/*
 	 * Allocate an object for the kstack.
 	 */
 	ksobj = vm_object_allocate(OBJT_DEFAULT, pages);
 	
 	/*
 	 * Get a kernel virtual address for this thread's kstack.
 	 */
 #if defined(__mips__)
 	/*
 	 * We need to align the kstack's mapped address to fit within
 	 * a single TLB entry.
 	 */
 	if (vmem_xalloc(kernel_arena, (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE,
 	    PAGE_SIZE * 2, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX,
 	    M_BESTFIT | M_NOWAIT, &ks)) {
 		ks = 0;
 	}
 #else
 	ks = kva_alloc((pages + KSTACK_GUARD_PAGES) * PAGE_SIZE);
 #endif
 	if (ks == 0) {
-		printf("vm_thread_new: kstack allocation failed\n");
+		printf("%s: kstack allocation failed\n", __func__);
 		vm_object_deallocate(ksobj);
 		return (0);
 	}
 	if (vm_ndomains > 1) {
 		ksobj->domain.dr_policy = ds;
 		ksobj->domain.dr_iter =
 		    atomic_fetchadd_int(&kstack_domain_iter, 1);
 	}
 
 	if (KSTACK_GUARD_PAGES != 0) {
 		pmap_qremove(ks, KSTACK_GUARD_PAGES);
 		ks += KSTACK_GUARD_PAGES * PAGE_SIZE;
 	}
 
 	/* 
 	 * For the length of the stack, link in a real page of ram for each
 	 * page of stack.
 	 */
 	VM_OBJECT_WLOCK(ksobj);
 	(void)vm_page_grab_pages(ksobj, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY |
 	    VM_ALLOC_WIRED, ma, pages);
 	for (i = 0; i < pages; i++)
 		ma[i]->valid = VM_PAGE_BITS_ALL;
 	VM_OBJECT_WUNLOCK(ksobj);
 	pmap_qenter(ks, ma, pages);
 	*ksobjp = ksobj;
 
 	return (ks);
 }
 
 static void
 vm_thread_stack_dispose(vm_object_t ksobj, vm_offset_t ks, int pages)
 {
 	vm_page_t m;
 	int i;
 
 	pmap_qremove(ks, pages);
 	VM_OBJECT_WLOCK(ksobj);
 	for (i = 0; i < pages; i++) {
 		m = vm_page_lookup(ksobj, i);
 		if (m == NULL)
-			panic("vm_thread_dispose: kstack already missing?");
+			panic("%s: kstack already missing?", __func__);
 		vm_page_unwire_noq(m);
 		vm_page_free(m);
 	}
 	VM_OBJECT_WUNLOCK(ksobj);
 	vm_object_deallocate(ksobj);
 	kva_free(ks - (KSTACK_GUARD_PAGES * PAGE_SIZE),
 	    (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE);
 }
 
 /*
  * Allocate the kernel stack for a new thread.
  */
 int
 vm_thread_new(struct thread *td, int pages)
 {
 	vm_object_t ksobj;
 	vm_offset_t ks;
 
 	/* Bounds check */
 	if (pages <= 1)
 		pages = kstack_pages;
 	else if (pages > KSTACK_MAX_PAGES)
 		pages = KSTACK_MAX_PAGES;
 
 	ks = 0;
 	ksobj = NULL;
 	if (pages == kstack_pages && kstack_cache != NULL) {
 		ks = (vm_offset_t)uma_zalloc(kstack_cache, M_NOWAIT);
 		if (ks != 0) 
 			ksobj = PHYS_TO_VM_PAGE(pmap_kextract(ks))->object;
 	}
 
 	/*
 	 * Ensure that kstack objects can draw pages from any memory
 	 * domain.  Otherwise a local memory shortage can block a process
 	 * swap-in.
 	 */
 	if (ks == 0)
 		ks = vm_thread_stack_create(DOMAINSET_PREF(PCPU_GET(domain)),
 		    &ksobj, pages);
 	if (ks == 0)
 		return (0);
 	td->td_kstack_obj = ksobj;
 	td->td_kstack = ks;
 	td->td_kstack_pages = pages;
 	return (1);
 }
 
 /*
  * Dispose of a thread's kernel stack.
  */
 void
 vm_thread_dispose(struct thread *td)
 {
 	vm_object_t ksobj;
 	vm_offset_t ks;
 	int pages;
 
 	pages = td->td_kstack_pages;
 	ksobj = td->td_kstack_obj;
 	ks = td->td_kstack;
 	td->td_kstack = 0;
 	td->td_kstack_pages = 0;
 	if (pages == kstack_pages)
 		uma_zfree(kstack_cache, (void *)ks);
 	else
 		vm_thread_stack_dispose(ksobj, ks, pages);
 }
 
 static int
 kstack_import(void *arg, void **store, int cnt, int domain, int flags)
 {
 	struct domainset *ds;
 	vm_object_t ksobj;
 	int i;
 
 	if (domain == UMA_ANYDOMAIN)
 		ds = DOMAINSET_RR();
 	else
 		ds = DOMAINSET_PREF(domain);
 
 	for (i = 0; i < cnt; i++) {
 		store[i] = (void *)vm_thread_stack_create(ds, &ksobj,
 		    kstack_pages);
 		if (store[i] == NULL)
 			break;
 	}
 	return (i);
 }
 
 static void
 kstack_release(void *arg, void **store, int cnt)
 {
 	vm_offset_t ks;
 	int i;
 
 	for (i = 0; i < cnt; i++) {
 		ks = (vm_offset_t)store[i];
 		vm_thread_stack_dispose(
 		    PHYS_TO_VM_PAGE(pmap_kextract(ks))->object,
 		    ks, kstack_pages);
 	}
 }
 
 static void
 kstack_cache_init(void *null)
 {
 	kstack_cache = uma_zcache_create("kstack_cache",
 	    kstack_pages * PAGE_SIZE, NULL, NULL, NULL, NULL,
 	    kstack_import, kstack_release, NULL,
 	    UMA_ZONE_NUMA|UMA_ZONE_MINBUCKET);
 	uma_zone_set_maxcache(kstack_cache, kstack_cache_size);
 }
 
 SYSINIT(vm_kstacks, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY, kstack_cache_init, NULL);
 
 #ifdef KSTACK_USAGE_PROF
 /*
  * Track maximum stack used by a thread in kernel.
  */
 static int max_kstack_used;
 
 SYSCTL_INT(_debug, OID_AUTO, max_kstack_used, CTLFLAG_RD,
     &max_kstack_used, 0,
     "Maxiumum stack depth used by a thread in kernel");
 
 void
 intr_prof_stack_use(struct thread *td, struct trapframe *frame)
 {
 	vm_offset_t stack_top;
 	vm_offset_t current;
 	int used, prev_used;
 
 	/*
 	 * Testing for interrupted kernel mode isn't strictly
 	 * needed. It optimizes the execution, since interrupts from
 	 * usermode will have only the trap frame on the stack.
 	 */
 	if (TRAPF_USERMODE(frame))
 		return;
 
 	stack_top = td->td_kstack + td->td_kstack_pages * PAGE_SIZE;
 	current = (vm_offset_t)(uintptr_t)&stack_top;
 
 	/*
 	 * Try to detect if interrupt is using kernel thread stack.
 	 * Hardware could use a dedicated stack for interrupt handling.
 	 */
 	if (stack_top <= current || current < td->td_kstack)
 		return;
 
 	used = stack_top - current;
 	for (;;) {
 		prev_used = max_kstack_used;
 		if (prev_used >= used)
 			break;
 		if (atomic_cmpset_int(&max_kstack_used, prev_used, used))
 			break;
 	}
 }
 #endif /* KSTACK_USAGE_PROF */
 
 /*
  * Implement fork's actions on an address space.
  * Here we arrange for the address space to be copied or referenced,
  * allocate a user struct (pcb and kernel stack), then call the
  * machine-dependent layer to fill those in and make the new process
  * ready to run.  The new process is set up so that it returns directly
  * to user mode to avoid stack copying and relocation problems.
  */
 int
 vm_forkproc(struct thread *td, struct proc *p2, struct thread *td2,
     struct vmspace *vm2, int flags)
 {
 	struct proc *p1 = td->td_proc;
 	struct domainset *dset;
 	int error;
 
 	if ((flags & RFPROC) == 0) {
 		/*
 		 * Divorce the memory, if it is shared, essentially
 		 * this changes shared memory amongst threads, into
 		 * COW locally.
 		 */
 		if ((flags & RFMEM) == 0) {
 			if (p1->p_vmspace->vm_refcnt > 1) {
 				error = vmspace_unshare(p1);
 				if (error)
 					return (error);
 			}
 		}
 		cpu_fork(td, p2, td2, flags);
 		return (0);
 	}
 
 	if (flags & RFMEM) {
 		p2->p_vmspace = p1->p_vmspace;
 		atomic_add_int(&p1->p_vmspace->vm_refcnt, 1);
 	}
 	dset = td2->td_domain.dr_policy;
 	while (vm_page_count_severe_set(&dset->ds_mask)) {
 		vm_wait_doms(&dset->ds_mask);
 	}
 
 	if ((flags & RFMEM) == 0) {
 		p2->p_vmspace = vm2;
 		if (p1->p_vmspace->vm_shm)
 			shmfork(p1, p2);
 	}
 
 	/*
 	 * cpu_fork will copy and update the pcb, set up the kernel stack,
 	 * and make the child ready to run.
 	 */
 	cpu_fork(td, p2, td2, flags);
 	return (0);
 }
 
 /*
  * Called after process has been wait(2)'ed upon and is being reaped.
  * The idea is to reclaim resources that we could not reclaim while
  * the process was still executing.
  */
 void
 vm_waitproc(p)
 	struct proc *p;
 {
 
 	vmspace_exitfree(p);		/* and clean-out the vmspace */
 }
 
 void
 kick_proc0(void)
 {
 
 	wakeup(&proc0);
 }
Index: projects/clang900-import/tests/sys/netpfil/common/forward.sh
===================================================================
--- projects/clang900-import/tests/sys/netpfil/common/forward.sh	(revision 352536)
+++ projects/clang900-import/tests/sys/netpfil/common/forward.sh	(revision 352537)
@@ -1,101 +1,105 @@
 #-
 # SPDX-License-Identifier: BSD-2-Clause-FreeBSD
 #
 # Copyright (c) 2019 Ahsan Barkati
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 # $FreeBSD$
 #
 
 . $(atf_get_srcdir)/utils.subr
 . $(atf_get_srcdir)/runner.subr
 
 v4_head()
 {
 	atf_set descr 'Basic forwarding test'
 	atf_set require.user root
 	atf_set require.progs scapy
 }
 
 v4_body()
 {
 	firewall=$1
+	if [ "$(atf_config_get ci false)" = "true" ] && \
+		[ "$(uname -p)" = "i386" ] && [ "${firewall}" = "pf" ]; then
+		atf_skip "https://bugs.freebsd.org/240085"
+	fi
 	firewall_init $firewall
 
 	epair_send=$(vnet_mkepair)
 	ifconfig ${epair_send}a 192.0.2.1/24 up
 
 	epair_recv=$(vnet_mkepair)
 	ifconfig ${epair_recv}a up
 
 	vnet_mkjail iron ${epair_send}b ${epair_recv}b
 	jexec iron ifconfig ${epair_send}b 192.0.2.2/24 up
 	jexec iron ifconfig ${epair_recv}b 198.51.100.2/24 up
 	jexec iron sysctl net.inet.ip.forwarding=1
 	jexec iron arp -s 198.51.100.3 00:01:02:03:04:05
 	route add -net 198.51.100.0/24 192.0.2.2
 
 
 	atf_check -s exit:0 $(atf_get_srcdir)/pft_ping.py \
 		--sendif ${epair_send}a \
 		--to 198.51.100.3 \
 		--recvif ${epair_recv}a
 
 	firewall_config "iron" ${firewall} \
 		"pf" \
 			"block in" \
 		"ipfw" \
 			"ipfw -q add 100 deny all from any to any in" \
 		"ipf" \
 			"block in all" \
 
 	atf_check -s exit:1 $(atf_get_srcdir)/pft_ping.py \
 		--sendif ${epair_send}a \
 		--to 198.51.100.3 \
 		--recvif ${epair_recv}a
 
 	firewall_config "iron" ${firewall} \
 		"pf" \
 			"block out" \
 		"ipfw" \
 			"ipfw -q add 100 deny all from any to any out" \
 		"ipf" \
 			"block out all" \
 
 	atf_check -s exit:1 $(atf_get_srcdir)/pft_ping.py \
 		--sendif ${epair_send}a \
 		--to 198.51.100.3 \
 		--recv ${epair_recv}a
 }
 
 v4_cleanup()
 {
 	firewall=$1
 	firewall_cleanup $firewall
 }
 
 setup_tests \
 		v4 \
 			pf \
 			ipfw \
 			ipf
Index: projects/clang900-import/tests/sys/netpfil/common/tos.sh
===================================================================
--- projects/clang900-import/tests/sys/netpfil/common/tos.sh	(revision 352536)
+++ projects/clang900-import/tests/sys/netpfil/common/tos.sh	(revision 352537)
@@ -1,118 +1,122 @@
 #-
 # SPDX-License-Identifier: BSD-2-Clause-FreeBSD
 #
 # Copyright (c) 2019 Ahsan Barkati
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 # $FreeBSD$
 #
 
 . $(atf_get_srcdir)/utils.subr
 . $(atf_get_srcdir)/runner.subr
 
 tos_head()
 {
 	atf_set descr 'set-tos test'
 	atf_set require.user root
 	atf_set require.progs scapy
 }
 
 tos_body()
 {
 	firewall=$1
+	if [ "$(atf_config_get ci false)" = "true" ] && \
+		[ "$(uname -p)" = "i386" ] && [ "${firewall}" = "pf" ]; then
+		atf_skip "https://bugs.freebsd.org/240086"
+	fi
 	firewall_init $firewall
 
 	epair_send=$(vnet_mkepair)
 	ifconfig ${epair_send}a 192.0.2.1/24 up
 
 	epair_recv=$(vnet_mkepair)
 	ifconfig ${epair_recv}a up
 
 	vnet_mkjail iron ${epair_send}b ${epair_recv}b
 	jexec iron ifconfig ${epair_send}b 192.0.2.2/24 up
 	jexec iron ifconfig ${epair_recv}b 198.51.100.2/24 up
 	jexec iron sysctl net.inet.ip.forwarding=1
 	jexec iron arp -s 198.51.100.3 00:01:02:03:04:05
 	route add -net 198.51.100.0/24 192.0.2.2
 
 	# Check if the firewall is able to set the ToS bits
 	firewall_config "iron" ${firewall} \
 		"pf" \
 			"scrub out proto icmp set-tos 36" \
 		"ipfw" \
 			"ipfw -q add 100 setdscp 9 ip from any to any"
 		# dscp is set to 9 because last two bits are for
 		# EN and hence tos would be 36
 
 	atf_check -s exit:0 $(atf_get_srcdir)/pft_ping.py \
 		--sendif ${epair_send}a \
 		--to 198.51.100.3 \
 		--recvif ${epair_recv}a \
 		--expect-tos 36
 
 	# Check if the firewall is able to set the ToS bits
 	# and persists the EN bits (if already set)
 	firewall_config "iron" ${firewall} \
 		"pf" \
 			"scrub out proto icmp set-tos 36" \
 		"ipfw" \
 			"ipfw -q add 100 setdscp 9 ip from any to any"
 
 	atf_check -s exit:0 $(atf_get_srcdir)/pft_ping.py \
 		--sendif ${epair_send}a \
 		--to 198.51.100.3 \
 		--recvif ${epair_recv}a \
 		--send-tos 3 \
 		--expect-tos 39
 
 	# Check if the firewall is able to filter the
 	# packets based on the ToS value
 	firewall_config "iron" ${firewall} \
 		"pf" \
 			"block all tos 36" \
 		"ipfw" \
 			"ipfw -q add 100 deny all from any to any dscp 9"
 
 	atf_check -s exit:1 $(atf_get_srcdir)/pft_ping.py \
 		--sendif ${epair_send}a \
 		--to 198.51.100.3 \
 		--recvif ${epair_recv}a \
 		--send-tos 36
 
 	atf_check -s exit:0 $(atf_get_srcdir)/pft_ping.py \
 		--sendif ${epair_send}a \
 		--to 198.51.100.3 \
 		--recvif ${epair_recv}a \
 		--send-tos 32
 }
 
 tos_cleanup()
 {
 	firewall=$1
 	firewall_cleanup $firewall
 }
 
 setup_tests \
 		"tos" \
 			"pf" \
 			"ipfw"
Index: projects/clang900-import/tests/sys/vm/Makefile
===================================================================
--- projects/clang900-import/tests/sys/vm/Makefile	(revision 352536)
+++ projects/clang900-import/tests/sys/vm/Makefile	(revision 352537)
@@ -1,10 +1,11 @@
 # $FreeBSD$
 
 PACKAGE=	tests
 
 TESTSDIR=	${TESTSBASE}/sys/vm
 
 ATF_TESTS_C+=	mlock_test \
-		mmap_test
+		mmap_test \
+		page_fault_signal
 
 .include <bsd.test.mk>
Index: projects/clang900-import/tests/sys/vm/page_fault_signal.c
===================================================================
--- projects/clang900-import/tests/sys/vm/page_fault_signal.c	(nonexistent)
+++ projects/clang900-import/tests/sys/vm/page_fault_signal.c	(revision 352537)
@@ -0,0 +1,184 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2019 Jilles Tjoelker
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/mman.h>
+
+#include <atf-c.h>
+#include <fcntl.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+
+static sigjmp_buf sig_env;
+static volatile int last_sig, last_code;
+
+static void
+sighandler(int sig, siginfo_t *info, void *context __unused)
+{
+
+	last_sig = sig;
+	last_code = info->si_code;
+	siglongjmp(sig_env, 1);
+}
+
+static void
+setup_signals(void)
+{
+	struct sigaction sa;
+	int r;
+
+	sa.sa_sigaction = sighandler;
+	sa.sa_flags = SA_RESTART | SA_RESETHAND | SA_SIGINFO;
+	r = sigfillset(&sa.sa_mask);
+	ATF_REQUIRE(r != -1);
+	r = sigaction(SIGILL, &sa, NULL);
+	ATF_REQUIRE(r != -1);
+	r = sigaction(SIGBUS, &sa, NULL);
+	ATF_REQUIRE(r != -1);
+	r = sigaction(SIGSEGV, &sa, NULL);
+	ATF_REQUIRE(r != -1);
+}
+
+ATF_TC_WITHOUT_HEAD(page_fault_signal__segv_maperr_1);
+ATF_TC_BODY(page_fault_signal__segv_maperr_1, tc)
+{
+	int *p;
+	int r;
+	int sz;
+
+	sz = getpagesize();
+	p = mmap(NULL, sz, PROT_READ, MAP_ANON, -1, 0);
+	ATF_REQUIRE(p != MAP_FAILED);
+	r = munmap(p, sz);
+	ATF_REQUIRE(r != -1);
+	if (sigsetjmp(sig_env, 1) == 0) {
+		setup_signals();
+		*(volatile int *)p = 1;
+	}
+	ATF_CHECK_EQ(SIGSEGV, last_sig);
+	ATF_CHECK_EQ(SEGV_MAPERR, last_code);
+}
+
+ATF_TC_WITHOUT_HEAD(page_fault_signal__segv_accerr_1);
+ATF_TC_BODY(page_fault_signal__segv_accerr_1, tc)
+{
+	int *p;
+	int sz;
+
+	sz = getpagesize();
+	p = mmap(NULL, sz, PROT_READ, MAP_ANON, -1, 0);
+	ATF_REQUIRE(p != MAP_FAILED);
+	if (sigsetjmp(sig_env, 1) == 0) {
+		setup_signals();
+		*(volatile int *)p = 1;
+	}
+	(void)munmap(p, sz);
+	ATF_CHECK_EQ(SIGSEGV, last_sig);
+	ATF_CHECK_EQ(SEGV_ACCERR, last_code);
+}
+
+ATF_TC_WITHOUT_HEAD(page_fault_signal__segv_accerr_2);
+ATF_TC_BODY(page_fault_signal__segv_accerr_2, tc)
+{
+	int *p;
+	volatile int dummy;
+	int sz;
+
+	sz = getpagesize();
+	p = mmap(NULL, sz, PROT_NONE, MAP_ANON, -1, 0);
+	ATF_REQUIRE(p != MAP_FAILED);
+	if (sigsetjmp(sig_env, 1) == 0) {
+		setup_signals();
+		dummy = *p;
+	}
+	(void)munmap(p, sz);
+	ATF_CHECK_EQ(SIGSEGV, last_sig);
+	ATF_CHECK_EQ(SEGV_ACCERR, last_code);
+}
+
+ATF_TC_WITHOUT_HEAD(page_fault_signal__bus_objerr_1);
+ATF_TC_BODY(page_fault_signal__bus_objerr_1, tc)
+{
+	int *p;
+	int fd;
+	int sz;
+
+	atf_tc_expect_fail("bug 211924");
+	sz = getpagesize();
+	fd = shm_open(SHM_ANON, O_RDWR | O_CREAT, 0600);
+	ATF_REQUIRE(fd != -1);
+	p = mmap(NULL, sz, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	ATF_REQUIRE(p != MAP_FAILED);
+	if (sigsetjmp(sig_env, 1) == 0) {
+		setup_signals();
+		*(volatile int *)p = 1;
+	}
+	(void)munmap(p, sz);
+	(void)close(fd);
+	ATF_CHECK_EQ(SIGBUS, last_sig);
+	ATF_CHECK_EQ(BUS_OBJERR, last_code);
+}
+
+ATF_TC_WITHOUT_HEAD(page_fault_signal__bus_objerr_2);
+ATF_TC_BODY(page_fault_signal__bus_objerr_2, tc)
+{
+	int *p;
+	int fd;
+	int r;
+	int sz;
+
+	atf_tc_expect_fail("bug 211924");
+	sz = getpagesize();
+	fd = shm_open(SHM_ANON, O_RDWR | O_CREAT, 0600);
+	ATF_REQUIRE(fd != -1);
+	r = ftruncate(fd, sz);
+	ATF_REQUIRE(r != -1);
+	p = mmap(NULL, sz * 2, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	ATF_REQUIRE(p != MAP_FAILED);
+	if (sigsetjmp(sig_env, 1) == 0) {
+		setup_signals();
+		((volatile int *)p)[sz / sizeof(int)] = 1;
+	}
+	(void)munmap(p, sz * 2);
+	(void)close(fd);
+	ATF_CHECK_EQ(SIGBUS, last_sig);
+	ATF_CHECK_EQ(BUS_OBJERR, last_code);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+	ATF_TP_ADD_TC(tp, page_fault_signal__segv_maperr_1);
+	ATF_TP_ADD_TC(tp, page_fault_signal__segv_accerr_1);
+	ATF_TP_ADD_TC(tp, page_fault_signal__segv_accerr_2);
+	ATF_TP_ADD_TC(tp, page_fault_signal__bus_objerr_1);
+	ATF_TP_ADD_TC(tp, page_fault_signal__bus_objerr_2);
+
+	return (atf_no_error());
+}

Property changes on: projects/clang900-import/tests/sys/vm/page_fault_signal.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/clang900-import/tools/build/options/WITH_GOOGLETEST
===================================================================
--- projects/clang900-import/tools/build/options/WITH_GOOGLETEST	(nonexistent)
+++ projects/clang900-import/tools/build/options/WITH_GOOGLETEST	(revision 352537)
@@ -0,0 +1,5 @@
+.\" $FreeBSD$
+Set to build and install
+.Lb libgmock ,
+.Lb libgtest ,
+and dependent tests.

Property changes on: projects/clang900-import/tools/build/options/WITH_GOOGLETEST
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/clang900-import/usr.bin/truss/syscall.h
===================================================================
--- projects/clang900-import/usr.bin/truss/syscall.h	(revision 352536)
+++ projects/clang900-import/usr.bin/truss/syscall.h	(revision 352537)
@@ -1,276 +1,277 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright 1997 Sean Eric Fagan
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Sean Eric Fagan
  * 4. Neither the name of the author may be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * System call arguments come in several flavours:
  * Hex -- values that should be printed in hex (addresses)
  * Octal -- Same as above, but octal
  * Int -- normal integer values (file descriptors, for example)
  * LongHex -- long value that should be printed in hex
  * Name -- pointer to a NULL-terminated string.
  * BinString -- pointer to an array of chars, printed via strvisx().
  * Ptr -- pointer to some unspecified structure.  Just print as hex for now.
  * Stat -- a pointer to a stat buffer.  Prints a couple fields.
  * Stat11 -- a pointer to a freebsd 11 stat buffer.  Prints a couple fields.
  * StatFs -- a pointer to a statfs buffer.  Prints a few fields.
  * Ioctl -- an ioctl command.  Woefully limited.
  * Quad -- a double-word value.  e.g., lseek(int, offset_t, int)
  * Signal -- a signal number.  Prints the signal name (SIGxxx)
  * Sockaddr -- a pointer to a struct sockaddr.  Prints symbolic AF, and IP:Port
  * StringArray -- a pointer to an array of string pointers.
  * Timespec -- a pointer to a struct timespec.  Prints both elements.
  * Timeval -- a pointer to a struct timeval.  Prints both elements.
  * Timeval2 -- a pointer to two struct timevals.  Prints both elements of both.
  * Itimerval -- a pointer to a struct itimerval.  Prints all elements.
  * Pollfd -- a pointer to an array of struct pollfd.  Prints .fd and .events.
  * Fd_set -- a pointer to an array of fd_set.  Prints the fds that are set.
  * Sigaction -- a pointer to a struct sigaction.  Prints all elements.
  * Sigset -- a pointer to a sigset_t.  Prints the signals that are set.
  * Sigprocmask -- the first argument to sigprocmask().  Prints the name.
  * Kevent -- a pointer to an array of struct kevents.  Prints all elements.
  * Pathconf -- the 2nd argument of pathconf().
  * Utrace -- utrace(2) buffer.
  * CapRights -- a pointer to a cap_rights_t.  Prints all set capabilities.
  *
  * In addition, the pointer types (String, Ptr) may have OUT masked in --
  * this means that the data is set on *return* from the system call -- or
  * IN (meaning that the data is passed *into* the system call).
  */
 
 enum Argtype {
 	None = 1,
 
 	/* Scalar integers. */
 	Socklent,
 	Octal,
 	Int,
 	UInt,
 	Hex,
 	Long,
 	LongHex,
 	Sizet,
 	Quad,
 	QuadHex,
 
 	/* Encoded scalar values. */
 	Accessmode,
 	Acltype,
 	Atfd,
 	Atflags,
 	CapFcntlRights,
 	Extattrnamespace,
 	Fadvice,
 	Fcntl,
 	Fcntlflag,
 	FileFlags,
 	Flockop,
 	Getfsstatmode,
 	Idtype,
 	Ioctl,
 	Kldsymcmd,
 	Kldunloadflags,
 	Madvice,
 	Minherit,
 	Msgflags,
 	Mlockall,
 	Mmapflags,
 	Mountflags,
 	Mprot,
 	Msync,
 	Open,
 	Pathconf,
 	Pipe2,
 	Procctl,
 	Priowhich,
 	Ptraceop,
 	Quotactlcmd,
 	Reboothowto,
 	Resource,
 	Rforkflags,
 	Rtpriofunc,
 	RusageWho,
 	Schedpolicy,
 	Shutdown,
 	Signal,
 	Sigprocmask,
 	Sockdomain,
 	Sockoptlevel,
 	Sockoptname,
 	Sockprotocol,
 	Socktype,
 	Sysarch,
+	Sysctl,
 	Umtxop,
 	Waitoptions,
 	Whence,
 
 	/* Pointers to non-structures. */
 	Ptr,
 	BinString,
 	CapRights,
 	ExecArgs,
 	ExecEnv,
 	ExitStatus,
 	Fd_set,
 	IntArray,
 	Iovec,
 	Name,
 	PipeFds,
 	PSig,
 	PQuadHex,
 	PUInt,
 	Readlinkres,
 	ShmName,
 	StringArray,
 
 	/* Pointers to structures. */
 	Itimerval,
 	Kevent,
 	Kevent11,
 	LinuxSockArgs,
 	Msghdr,
 	Pollfd,
 	Rlimit,
 	Rusage,
 	Schedparam,
 	Sctpsndrcvinfo,
 	Sigaction,
 	Siginfo,
 	Sigset,
 	Sockaddr,
 	Stat,
 	Stat11,
 	StatFs,
 	Timespec,
 	Timespec2,
 	Timeval,
 	Timeval2,
 	Utrace,
 
 	CloudABIAdvice,
 	CloudABIClockID,
 	CloudABIFDSFlags,
 	CloudABIFDStat,
 	CloudABIFileStat,
 	CloudABIFileType,
 	CloudABIFSFlags,
 	CloudABILookup,
 	CloudABIMFlags,
 	CloudABIMProt,
 	CloudABIMSFlags,
 	CloudABIOFlags,
 	CloudABISDFlags,
 	CloudABISignal,
 	CloudABISockStat,
 	CloudABISSFlags,
 	CloudABITimestamp,
 	CloudABIULFlags,
 	CloudABIWhence,
 
 	MAX_ARG_TYPE,
 };
 
 #define	ARG_MASK	0xff
 #define	OUT	0x100
 #define	IN	/*0x20*/0
 
 _Static_assert(ARG_MASK > MAX_ARG_TYPE,
     "ARG_MASK overlaps with Argtype values");
 
 struct syscall_args {
 	enum Argtype type;
 	int offset;
 };
 
 struct syscall {
 	STAILQ_ENTRY(syscall) entries;
 	const char *name;
 	u_int ret_type;	/* 0, 1, or 2 return values */
 	u_int nargs;	/* actual number of meaningful arguments */
 			/* Hopefully, no syscalls with > 10 args */
 	struct syscall_args args[10];
 	struct timespec time; /* Time spent for this call */
 	int ncalls;	/* Number of calls */
 	int nerror;	/* Number of calls that returned with error */
 	bool unknown;	/* Unknown system call */
 };
 
 struct syscall *get_syscall(struct threadinfo *, u_int, u_int);
 char *print_arg(struct syscall_args *, unsigned long*, register_t *,
     struct trussinfo *);
 
 /*
  * Linux Socket defines
  */
 #define LINUX_SOCKET		1
 #define LINUX_BIND		2
 #define LINUX_CONNECT		3
 #define LINUX_LISTEN		4
 #define LINUX_ACCEPT		5
 #define LINUX_GETSOCKNAME	6
 #define LINUX_GETPEERNAME	7
 #define LINUX_SOCKETPAIR	8
 #define LINUX_SEND		9
 #define LINUX_RECV		10
 #define LINUX_SENDTO		11
 #define LINUX_RECVFROM		12
 #define LINUX_SHUTDOWN		13
 #define LINUX_SETSOCKOPT	14
 #define LINUX_GETSOCKOPT	15
 #define LINUX_SENDMSG		16
 #define LINUX_RECVMSG		17
 
 #define PAD_(t) (sizeof(register_t) <= sizeof(t) ? \
     0 : sizeof(register_t) - sizeof(t))
 
 #if BYTE_ORDER == LITTLE_ENDIAN
 #define PADL_(t)	0
 #define PADR_(t)	PAD_(t)
 #else
 #define PADL_(t)	PAD_(t)
 #define PADR_(t)	0
 #endif
 
 typedef int     l_int;
 typedef uint32_t    l_ulong;
 
 struct linux_socketcall_args {
     char what_l_[PADL_(l_int)]; l_int what; char what_r_[PADR_(l_int)];
     char args_l_[PADL_(l_ulong)]; l_ulong args; char args_r_[PADR_(l_ulong)];
 };
 
 void init_syscalls(void);
 void print_syscall(struct trussinfo *);
 void print_syscall_ret(struct trussinfo *, int, register_t *);
 void print_summary(struct trussinfo *trussinfo);
Index: projects/clang900-import/usr.bin/truss/syscalls.c
===================================================================
--- projects/clang900-import/usr.bin/truss/syscalls.c	(revision 352536)
+++ projects/clang900-import/usr.bin/truss/syscalls.c	(revision 352537)
@@ -1,2730 +1,2802 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright 1997 Sean Eric Fagan
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Sean Eric Fagan
  * 4. Neither the name of the author may be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * This file has routines used to print out system calls and their
  * arguments.
  */
 
 #include <sys/capsicum.h>
 #include <sys/types.h>
 #define	_WANT_FREEBSD11_KEVENT
 #include <sys/event.h>
 #include <sys/ioccom.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/ptrace.h>
 #include <sys/resource.h>
 #include <sys/socket.h>
 #define _WANT_FREEBSD11_STAT
 #include <sys/stat.h>
+#include <sys/sysctl.h>
 #include <sys/time.h>
 #include <sys/un.h>
 #include <sys/wait.h>
 #include <netinet/in.h>
 #include <netinet/sctp.h>
 #include <arpa/inet.h>
 
 #include <assert.h>
 #include <ctype.h>
 #include <err.h>
 #define _WANT_KERNEL_ERRNO
 #include <errno.h>
 #include <fcntl.h>
 #include <poll.h>
 #include <sched.h>
 #include <signal.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sysdecode.h>
 #include <unistd.h>
 #include <vis.h>
 
 #include <contrib/cloudabi/cloudabi_types_common.h>
 
 #include "truss.h"
 #include "extern.h"
 #include "syscall.h"
 
 /*
  * This should probably be in its own file, sorted alphabetically.
  */
 static struct syscall decoded_syscalls[] = {
 	/* Native ABI */
 	{ .name = "__acl_aclcheck_fd", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Acltype, 1 }, { Ptr, 2 } } },
 	{ .name = "__acl_aclcheck_file", .ret_type = 1, .nargs = 3,
 	  .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } },
 	{ .name = "__acl_aclcheck_link", .ret_type = 1, .nargs = 3,
 	  .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } },
 	{ .name = "__acl_delete_fd", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Acltype, 1 } } },
 	{ .name = "__acl_delete_file", .ret_type = 1, .nargs = 2,
 	  .args = { { Name, 0 }, { Acltype, 1 } } },
 	{ .name = "__acl_delete_link", .ret_type = 1, .nargs = 2,
 	  .args = { { Name, 0 }, { Acltype, 1 } } },
 	{ .name = "__acl_get_fd", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Acltype, 1 }, { Ptr, 2 } } },
 	{ .name = "__acl_get_file", .ret_type = 1, .nargs = 3,
 	  .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } },
 	{ .name = "__acl_get_link", .ret_type = 1, .nargs = 3,
 	  .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } },
 	{ .name = "__acl_set_fd", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Acltype, 1 }, { Ptr, 2 } } },
 	{ .name = "__acl_set_file", .ret_type = 1, .nargs = 3,
 	  .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } },
 	{ .name = "__acl_set_link", .ret_type = 1, .nargs = 3,
 	  .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } },
 	{ .name = "__cap_rights_get", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Int, 1 }, { CapRights | OUT, 2 } } },
 	{ .name = "__getcwd", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | OUT, 0 }, { Int, 1 } } },
 	{ .name = "_umtx_op", .ret_type = 1, .nargs = 5,
 	  .args = { { Ptr, 0 }, { Umtxop, 1 }, { LongHex, 2 }, { Ptr, 3 },
 		    { Ptr, 4 } } },
 	{ .name = "accept", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } },
 	{ .name = "access", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { Accessmode, 1 } } },
 	{ .name = "bind", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Sockaddr | IN, 1 }, { Socklent, 2 } } },
 	{ .name = "bindat", .ret_type = 1, .nargs = 4,
 	  .args = { { Atfd, 0 }, { Int, 1 }, { Sockaddr | IN, 2 },
 		    { Int, 3 } } },
 	{ .name = "break", .ret_type = 1, .nargs = 1,
 	  .args = { { Ptr, 0 } } },
 	{ .name = "cap_fcntls_get", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { CapFcntlRights | OUT, 1 } } },
 	{ .name = "cap_fcntls_limit", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { CapFcntlRights, 1 } } },
 	{ .name = "cap_getmode", .ret_type = 1, .nargs = 1,
 	  .args = { { PUInt | OUT, 0 } } },
 	{ .name = "cap_rights_limit", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { CapRights, 1 } } },
 	{ .name = "chdir", .ret_type = 1, .nargs = 1,
 	  .args = { { Name, 0 } } },
 	{ .name = "chflags", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { FileFlags, 1 } } },
 	{ .name = "chflagsat", .ret_type = 1, .nargs = 4,
 	  .args = { { Atfd, 0 }, { Name | IN, 1 }, { FileFlags, 2 },
 		    { Atflags, 3 } } },
 	{ .name = "chmod", .ret_type = 1, .nargs = 2,
 	  .args = { { Name, 0 }, { Octal, 1 } } },
 	{ .name = "chown", .ret_type = 1, .nargs = 3,
 	  .args = { { Name, 0 }, { Int, 1 }, { Int, 2 } } },
 	{ .name = "chroot", .ret_type = 1, .nargs = 1,
 	  .args = { { Name, 0 } } },
 	{ .name = "clock_gettime", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Timespec | OUT, 1 } } },
 	{ .name = "close", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
 	{ .name = "compat11.fstat", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Stat11 | OUT, 1 } } },
 	{ .name = "compat11.fstatat", .ret_type = 1, .nargs = 4,
 	  .args = { { Atfd, 0 }, { Name | IN, 1 }, { Stat11 | OUT, 2 },
 		    { Atflags, 3 } } },
 	{ .name = "compat11.kevent", .ret_type = 1, .nargs = 6,
 	  .args = { { Int, 0 }, { Kevent11, 1 }, { Int, 2 },
 		    { Kevent11 | OUT, 3 }, { Int, 4 }, { Timespec, 5 } } },
 	{ .name = "compat11.lstat", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { Stat11 | OUT, 1 } } },
 	{ .name = "compat11.stat", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { Stat11 | OUT, 1 } } },
 	{ .name = "connect", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Sockaddr | IN, 1 }, { Socklent, 2 } } },
 	{ .name = "connectat", .ret_type = 1, .nargs = 4,
 	  .args = { { Atfd, 0 }, { Int, 1 }, { Sockaddr | IN, 2 },
 		    { Int, 3 } } },
 	{ .name = "dup", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
 	{ .name = "dup2", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Int, 1 } } },
 	{ .name = "eaccess", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { Accessmode, 1 } } },
 	{ .name = "execve", .ret_type = 1, .nargs = 3,
 	  .args = { { Name | IN, 0 }, { ExecArgs | IN, 1 },
 		    { ExecEnv | IN, 2 } } },
 	{ .name = "exit", .ret_type = 0, .nargs = 1,
 	  .args = { { Hex, 0 } } },
 	{ .name = "extattr_delete_fd", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Extattrnamespace, 1 }, { Name, 2 } } },
 	{ .name = "extattr_delete_file", .ret_type = 1, .nargs = 3,
 	  .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 } } },
 	{ .name = "extattr_delete_link", .ret_type = 1, .nargs = 3,
 	  .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 } } },
 	{ .name = "extattr_get_fd", .ret_type = 1, .nargs = 5,
 	  .args = { { Int, 0 }, { Extattrnamespace, 1 }, { Name, 2 },
 		    { BinString | OUT, 3 }, { Sizet, 4 } } },
 	{ .name = "extattr_get_file", .ret_type = 1, .nargs = 5,
 	  .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 },
 		    { BinString | OUT, 3 }, { Sizet, 4 } } },
 	{ .name = "extattr_get_link", .ret_type = 1, .nargs = 5,
 	  .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 },
 		    { BinString | OUT, 3 }, { Sizet, 4 } } },
 	{ .name = "extattr_list_fd", .ret_type = 1, .nargs = 4,
 	  .args = { { Int, 0 }, { Extattrnamespace, 1 }, { BinString | OUT, 2 },
 		    { Sizet, 3 } } },
 	{ .name = "extattr_list_file", .ret_type = 1, .nargs = 4,
 	  .args = { { Name, 0 }, { Extattrnamespace, 1 }, { BinString | OUT, 2 },
 		    { Sizet, 3 } } },
 	{ .name = "extattr_list_link", .ret_type = 1, .nargs = 4,
 	  .args = { { Name, 0 }, { Extattrnamespace, 1 }, { BinString | OUT, 2 },
 		    { Sizet, 3 } } },
 	{ .name = "extattr_set_fd", .ret_type = 1, .nargs = 5,
 	  .args = { { Int, 0 }, { Extattrnamespace, 1 }, { Name, 2 },
 		    { BinString | IN, 3 }, { Sizet, 4 } } },
 	{ .name = "extattr_set_file", .ret_type = 1, .nargs = 5,
 	  .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 },
 		    { BinString | IN, 3 }, { Sizet, 4 } } },
 	{ .name = "extattr_set_link", .ret_type = 1, .nargs = 5,
 	  .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 },
 		    { BinString | IN, 3 }, { Sizet, 4 } } },
 	{ .name = "extattrctl", .ret_type = 1, .nargs = 5,
 	  .args = { { Name, 0 }, { Hex, 1 }, { Name, 2 },
 		    { Extattrnamespace, 3 }, { Name, 4 } } },
 	{ .name = "faccessat", .ret_type = 1, .nargs = 4,
 	  .args = { { Atfd, 0 }, { Name | IN, 1 }, { Accessmode, 2 },
 		    { Atflags, 3 } } },
 	{ .name = "fchflags", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { FileFlags, 1 } } },
 	{ .name = "fchmod", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Octal, 1 } } },
 	{ .name = "fchmodat", .ret_type = 1, .nargs = 4,
 	  .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 }, { Atflags, 3 } } },
 	{ .name = "fchown", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Int, 1 }, { Int, 2 } } },
 	{ .name = "fchownat", .ret_type = 1, .nargs = 5,
 	  .args = { { Atfd, 0 }, { Name, 1 }, { Int, 2 }, { Int, 3 },
 		    { Atflags, 4 } } },
 	{ .name = "fcntl", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Fcntl, 1 }, { Fcntlflag, 2 } } },
 	{ .name = "fdatasync", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
 	{ .name = "flock", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Flockop, 1 } } },
 	{ .name = "fstat", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Stat | OUT, 1 } } },
 	{ .name = "fstatat", .ret_type = 1, .nargs = 4,
 	  .args = { { Atfd, 0 }, { Name | IN, 1 }, { Stat | OUT, 2 },
 		    { Atflags, 3 } } },
 	{ .name = "fstatfs", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { StatFs | OUT, 1 } } },
 	{ .name = "fsync", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
 	{ .name = "ftruncate", .ret_type = 1, .nargs = 2,
 	  .args = { { Int | IN, 0 }, { QuadHex | IN, 1 } } },
 	{ .name = "futimens", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Timespec2 | IN, 1 } } },
 	{ .name = "futimes", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Timeval2 | IN, 1 } } },
 	{ .name = "futimesat", .ret_type = 1, .nargs = 3,
 	  .args = { { Atfd, 0 }, { Name | IN, 1 }, { Timeval2 | IN, 2 } } },
 	{ .name = "getdirentries", .ret_type = 1, .nargs = 4,
 	  .args = { { Int, 0 }, { BinString | OUT, 1 }, { Int, 2 },
 		    { PQuadHex | OUT, 3 } } },
 	{ .name = "getfsstat", .ret_type = 1, .nargs = 3,
 	  .args = { { Ptr, 0 }, { Long, 1 }, { Getfsstatmode, 2 } } },
 	{ .name = "getitimer", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Itimerval | OUT, 2 } } },
 	{ .name = "getpeername", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } },
 	{ .name = "getpgid", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
 	{ .name = "getpriority", .ret_type = 1, .nargs = 2,
 	  .args = { { Priowhich, 0 }, { Int, 1 } } },
 	{ .name = "getrandom", .ret_type = 1, .nargs = 3,
 	  .args = { { BinString | OUT, 0 }, { Sizet, 1 }, { UInt, 2 } } },
 	{ .name = "getrlimit", .ret_type = 1, .nargs = 2,
 	  .args = { { Resource, 0 }, { Rlimit | OUT, 1 } } },
 	{ .name = "getrusage", .ret_type = 1, .nargs = 2,
 	  .args = { { RusageWho, 0 }, { Rusage | OUT, 1 } } },
 	{ .name = "getsid", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
 	{ .name = "getsockname", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } },
 	{ .name = "getsockopt", .ret_type = 1, .nargs = 5,
 	  .args = { { Int, 0 }, { Sockoptlevel, 1 }, { Sockoptname, 2 },
 		    { Ptr | OUT, 3 }, { Ptr | OUT, 4 } } },
 	{ .name = "gettimeofday", .ret_type = 1, .nargs = 2,
 	  .args = { { Timeval | OUT, 0 }, { Ptr, 1 } } },
 	{ .name = "ioctl", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Ioctl, 1 }, { Ptr, 2 } } },
 	{ .name = "kevent", .ret_type = 1, .nargs = 6,
 	  .args = { { Int, 0 }, { Kevent, 1 }, { Int, 2 }, { Kevent | OUT, 3 },
 		    { Int, 4 }, { Timespec, 5 } } },
 	{ .name = "kill", .ret_type = 1, .nargs = 2,
 	  .args = { { Int | IN, 0 }, { Signal | IN, 1 } } },
 	{ .name = "kldfind", .ret_type = 1, .nargs = 1,
 	  .args = { { Name | IN, 0 } } },
 	{ .name = "kldfirstmod", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
 	{ .name = "kldload", .ret_type = 1, .nargs = 1,
 	  .args = { { Name | IN, 0 } } },
 	{ .name = "kldnext", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
 	{ .name = "kldstat", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Ptr, 1 } } },
 	{ .name = "kldsym", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Kldsymcmd, 1 }, { Ptr, 2 } } },
 	{ .name = "kldunload", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
 	{ .name = "kldunloadf", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Kldunloadflags, 1 } } },
 	{ .name = "kse_release", .ret_type = 0, .nargs = 1,
 	  .args = { { Timespec, 0 } } },
 	{ .name = "lchflags", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { FileFlags, 1 } } },
 	{ .name = "lchmod", .ret_type = 1, .nargs = 2,
 	  .args = { { Name, 0 }, { Octal, 1 } } },
 	{ .name = "lchown", .ret_type = 1, .nargs = 3,
 	  .args = { { Name, 0 }, { Int, 1 }, { Int, 2 } } },
 	{ .name = "link", .ret_type = 1, .nargs = 2,
 	  .args = { { Name, 0 }, { Name, 1 } } },
 	{ .name = "linkat", .ret_type = 1, .nargs = 5,
 	  .args = { { Atfd, 0 }, { Name, 1 }, { Atfd, 2 }, { Name, 3 },
 		    { Atflags, 4 } } },
 	{ .name = "listen", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Int, 1 } } },
  	{ .name = "lseek", .ret_type = 2, .nargs = 3,
 	  .args = { { Int, 0 }, { QuadHex, 1 }, { Whence, 2 } } },
 	{ .name = "lstat", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { Stat | OUT, 1 } } },
 	{ .name = "lutimes", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { Timeval2 | IN, 1 } } },
 	{ .name = "madvise", .ret_type = 1, .nargs = 3,
 	  .args = { { Ptr, 0 }, { Sizet, 1 }, { Madvice, 2 } } },
 	{ .name = "minherit", .ret_type = 1, .nargs = 3,
 	  .args = { { Ptr, 0 }, { Sizet, 1 }, { Minherit, 2 } } },
 	{ .name = "mkdir", .ret_type = 1, .nargs = 2,
 	  .args = { { Name, 0 }, { Octal, 1 } } },
 	{ .name = "mkdirat", .ret_type = 1, .nargs = 3,
 	  .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 } } },
 	{ .name = "mkfifo", .ret_type = 1, .nargs = 2,
 	  .args = { { Name, 0 }, { Octal, 1 } } },
 	{ .name = "mkfifoat", .ret_type = 1, .nargs = 3,
 	  .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 } } },
 	{ .name = "mknod", .ret_type = 1, .nargs = 3,
 	  .args = { { Name, 0 }, { Octal, 1 }, { Int, 2 } } },
 	{ .name = "mknodat", .ret_type = 1, .nargs = 4,
 	  .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 }, { Int, 3 } } },
 	{ .name = "mlock", .ret_type = 1, .nargs = 2,
 	  .args = { { Ptr, 0 }, { Sizet, 1 } } },
 	{ .name = "mlockall", .ret_type = 1, .nargs = 1,
 	  .args = { { Mlockall, 0 } } },
 	{ .name = "mmap", .ret_type = 1, .nargs = 6,
 	  .args = { { Ptr, 0 }, { Sizet, 1 }, { Mprot, 2 }, { Mmapflags, 3 },
 		    { Int, 4 }, { QuadHex, 5 } } },
 	{ .name = "modfind", .ret_type = 1, .nargs = 1,
 	  .args = { { Name | IN, 0 } } },
 	{ .name = "mount", .ret_type = 1, .nargs = 4,
 	  .args = { { Name, 0 }, { Name, 1 }, { Mountflags, 2 }, { Ptr, 3 } } },
 	{ .name = "mprotect", .ret_type = 1, .nargs = 3,
 	  .args = { { Ptr, 0 }, { Sizet, 1 }, { Mprot, 2 } } },
 	{ .name = "msync", .ret_type = 1, .nargs = 3,
 	  .args = { { Ptr, 0 }, { Sizet, 1 }, { Msync, 2 } } },
 	{ .name = "munlock", .ret_type = 1, .nargs = 2,
 	  .args = { { Ptr, 0 }, { Sizet, 1 } } },
 	{ .name = "munmap", .ret_type = 1, .nargs = 2,
 	  .args = { { Ptr, 0 }, { Sizet, 1 } } },
 	{ .name = "nanosleep", .ret_type = 1, .nargs = 1,
 	  .args = { { Timespec, 0 } } },
 	{ .name = "nmount", .ret_type = 1, .nargs = 3,
 	  .args = { { Ptr, 0 }, { UInt, 1 }, { Mountflags, 2 } } },
 	{ .name = "open", .ret_type = 1, .nargs = 3,
 	  .args = { { Name | IN, 0 }, { Open, 1 }, { Octal, 2 } } },
 	{ .name = "openat", .ret_type = 1, .nargs = 4,
 	  .args = { { Atfd, 0 }, { Name | IN, 1 }, { Open, 2 },
 		    { Octal, 3 } } },
 	{ .name = "pathconf", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { Pathconf, 1 } } },
 	{ .name = "pipe", .ret_type = 1, .nargs = 1,
 	  .args = { { PipeFds | OUT, 0 } } },
 	{ .name = "pipe2", .ret_type = 1, .nargs = 2,
 	  .args = { { Ptr, 0 }, { Pipe2, 1 } } },
 	{ .name = "poll", .ret_type = 1, .nargs = 3,
 	  .args = { { Pollfd, 0 }, { Int, 1 }, { Int, 2 } } },
 	{ .name = "posix_fadvise", .ret_type = 1, .nargs = 4,
 	  .args = { { Int, 0 }, { QuadHex, 1 }, { QuadHex, 2 },
 		    { Fadvice, 3 } } },
 	{ .name = "posix_openpt", .ret_type = 1, .nargs = 1,
 	  .args = { { Open, 0 } } },
 	{ .name = "pread", .ret_type = 1, .nargs = 4,
 	  .args = { { Int, 0 }, { BinString | OUT, 1 }, { Sizet, 2 },
 		    { QuadHex, 3 } } },
 	{ .name = "procctl", .ret_type = 1, .nargs = 4,
 	  .args = { { Idtype, 0 }, { Quad, 1 }, { Procctl, 2 }, { Ptr, 3 } } },
 	{ .name = "ptrace", .ret_type = 1, .nargs = 4,
 	  .args = { { Ptraceop, 0 }, { Int, 1 }, { Ptr, 2 }, { Int, 3 } } },
 	{ .name = "pwrite", .ret_type = 1, .nargs = 4,
 	  .args = { { Int, 0 }, { BinString | IN, 1 }, { Sizet, 2 },
 		    { QuadHex, 3 } } },
 	{ .name = "quotactl", .ret_type = 1, .nargs = 4,
 	  .args = { { Name, 0 }, { Quotactlcmd, 1 }, { Int, 2 }, { Ptr, 3 } } },
 	{ .name = "read", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { BinString | OUT, 1 }, { Sizet, 2 } } },
 	{ .name = "readlink", .ret_type = 1, .nargs = 3,
 	  .args = { { Name, 0 }, { Readlinkres | OUT, 1 }, { Sizet, 2 } } },
 	{ .name = "readlinkat", .ret_type = 1, .nargs = 4,
 	  .args = { { Atfd, 0 }, { Name, 1 }, { Readlinkres | OUT, 2 },
 		    { Sizet, 3 } } },
 	{ .name = "readv", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Iovec | OUT, 1 }, { Int, 2 } } },
 	{ .name = "reboot", .ret_type = 1, .nargs = 1,
 	  .args = { { Reboothowto, 0 } } },
 	{ .name = "recvfrom", .ret_type = 1, .nargs = 6,
 	  .args = { { Int, 0 }, { BinString | OUT, 1 }, { Sizet, 2 },
 	            { Msgflags, 3 }, { Sockaddr | OUT, 4 },
 	            { Ptr | OUT, 5 } } },
 	{ .name = "recvmsg", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Msghdr | OUT, 1 }, { Msgflags, 2 } } },
 	{ .name = "rename", .ret_type = 1, .nargs = 2,
 	  .args = { { Name, 0 }, { Name, 1 } } },
 	{ .name = "renameat", .ret_type = 1, .nargs = 4,
 	  .args = { { Atfd, 0 }, { Name, 1 }, { Atfd, 2 }, { Name, 3 } } },
 	{ .name = "rfork", .ret_type = 1, .nargs = 1,
 	  .args = { { Rforkflags, 0 } } },
 	{ .name = "rmdir", .ret_type = 1, .nargs = 1,
 	  .args = { { Name, 0 } } },
 	{ .name = "rtprio", .ret_type = 1, .nargs = 3,
 	  .args = { { Rtpriofunc, 0 }, { Int, 1 }, { Ptr, 2 } } },
 	{ .name = "rtprio_thread", .ret_type = 1, .nargs = 3,
 	  .args = { { Rtpriofunc, 0 }, { Int, 1 }, { Ptr, 2 } } },
 	{ .name = "sched_get_priority_max", .ret_type = 1, .nargs = 1,
 	  .args = { { Schedpolicy, 0 } } },
 	{ .name = "sched_get_priority_min", .ret_type = 1, .nargs = 1,
 	  .args = { { Schedpolicy, 0 } } },
 	{ .name = "sched_getparam", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Schedparam | OUT, 1 } } },
 	{ .name = "sched_getscheduler", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
 	{ .name = "sched_rr_get_interval", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Timespec | OUT, 1 } } },
 	{ .name = "sched_setparam", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Schedparam, 1 } } },
 	{ .name = "sched_setscheduler", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Schedpolicy, 1 }, { Schedparam, 2 } } },
 	{ .name = "sctp_generic_recvmsg", .ret_type = 1, .nargs = 7,
 	  .args = { { Int, 0 }, { Iovec | OUT, 1 }, { Int, 2 },
 	            { Sockaddr | OUT, 3 }, { Ptr | OUT, 4 },
 	            { Sctpsndrcvinfo | OUT, 5 }, { Ptr | OUT, 6 } } },
 	{ .name = "sctp_generic_sendmsg", .ret_type = 1, .nargs = 7,
 	  .args = { { Int, 0 }, { BinString | IN, 1 }, { Int, 2 },
 	            { Sockaddr | IN, 3 }, { Socklent, 4 },
 	            { Sctpsndrcvinfo | IN, 5 }, { Msgflags, 6 } } },
 	{ .name = "sctp_generic_sendmsg_iov", .ret_type = 1, .nargs = 7,
 	  .args = { { Int, 0 }, { Iovec | IN, 1 }, { Int, 2 },
 	            { Sockaddr | IN, 3 }, { Socklent, 4 },
 	            { Sctpsndrcvinfo | IN, 5 }, { Msgflags, 6 } } },
 	{ .name = "select", .ret_type = 1, .nargs = 5,
 	  .args = { { Int, 0 }, { Fd_set, 1 }, { Fd_set, 2 }, { Fd_set, 3 },
 		    { Timeval, 4 } } },
 	{ .name = "sendmsg", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Msghdr | IN, 1 }, { Msgflags, 2 } } },
 	{ .name = "sendto", .ret_type = 1, .nargs = 6,
 	  .args = { { Int, 0 }, { BinString | IN, 1 }, { Sizet, 2 },
 	            { Msgflags, 3 }, { Sockaddr | IN, 4 },
 	            { Socklent | IN, 5 } } },
 	{ .name = "setitimer", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Itimerval, 1 }, { Itimerval | OUT, 2 } } },
 	{ .name = "setpriority", .ret_type = 1, .nargs = 3,
 	  .args = { { Priowhich, 0 }, { Int, 1 }, { Int, 2 } } },
 	{ .name = "setrlimit", .ret_type = 1, .nargs = 2,
 	  .args = { { Resource, 0 }, { Rlimit | IN, 1 } } },
 	{ .name = "setsockopt", .ret_type = 1, .nargs = 5,
 	  .args = { { Int, 0 }, { Sockoptlevel, 1 }, { Sockoptname, 2 },
 		    { Ptr | IN, 3 }, { Socklent, 4 } } },
 	{ .name = "shm_open", .ret_type = 1, .nargs = 3,
 	  .args = { { ShmName | IN, 0 }, { Open, 1 }, { Octal, 2 } } },
 	{ .name = "shm_unlink", .ret_type = 1, .nargs = 1,
 	  .args = { { Name | IN, 0 } } },
 	{ .name = "shutdown", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Shutdown, 1 } } },
 	{ .name = "sigaction", .ret_type = 1, .nargs = 3,
 	  .args = { { Signal, 0 }, { Sigaction | IN, 1 },
 		    { Sigaction | OUT, 2 } } },
 	{ .name = "sigpending", .ret_type = 1, .nargs = 1,
 	  .args = { { Sigset | OUT, 0 } } },
 	{ .name = "sigprocmask", .ret_type = 1, .nargs = 3,
 	  .args = { { Sigprocmask, 0 }, { Sigset, 1 }, { Sigset | OUT, 2 } } },
 	{ .name = "sigqueue", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Signal, 1 }, { LongHex, 2 } } },
 	{ .name = "sigreturn", .ret_type = 1, .nargs = 1,
 	  .args = { { Ptr, 0 } } },
 	{ .name = "sigsuspend", .ret_type = 1, .nargs = 1,
 	  .args = { { Sigset | IN, 0 } } },
 	{ .name = "sigtimedwait", .ret_type = 1, .nargs = 3,
 	  .args = { { Sigset | IN, 0 }, { Siginfo | OUT, 1 },
 		    { Timespec | IN, 2 } } },
 	{ .name = "sigwait", .ret_type = 1, .nargs = 2,
 	  .args = { { Sigset | IN, 0 }, { PSig | OUT, 1 } } },
 	{ .name = "sigwaitinfo", .ret_type = 1, .nargs = 2,
 	  .args = { { Sigset | IN, 0 }, { Siginfo | OUT, 1 } } },
 	{ .name = "socket", .ret_type = 1, .nargs = 3,
 	  .args = { { Sockdomain, 0 }, { Socktype, 1 }, { Sockprotocol, 2 } } },
 	{ .name = "stat", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { Stat | OUT, 1 } } },
 	{ .name = "statfs", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { StatFs | OUT, 1 } } },
 	{ .name = "symlink", .ret_type = 1, .nargs = 2,
 	  .args = { { Name, 0 }, { Name, 1 } } },
 	{ .name = "symlinkat", .ret_type = 1, .nargs = 3,
 	  .args = { { Name, 0 }, { Atfd, 1 }, { Name, 2 } } },
 	{ .name = "sysarch", .ret_type = 1, .nargs = 2,
 	  .args = { { Sysarch, 0 }, { Ptr, 1 } } },
+	{ .name = "__sysctl", .ret_type = 1, .nargs = 6,
+	  .args = { { Sysctl, 0 }, { Sizet, 1 }, { Ptr, 2 }, { Ptr, 3 },
+	            { Ptr, 4 }, { Sizet, 5 } } },
+	{ .name = "__sysctlbyname", .ret_type = 1, .nargs = 6,
+	  .args = { { Name, 0 }, { Sizet, 1 }, { Ptr, 2 }, { Ptr, 3 },
+	            { Ptr, 4}, { Sizet, 5 } } },
 	{ .name = "thr_kill", .ret_type = 1, .nargs = 2,
 	  .args = { { Long, 0 }, { Signal, 1 } } },
 	{ .name = "thr_self", .ret_type = 1, .nargs = 1,
 	  .args = { { Ptr, 0 } } },
 	{ .name = "thr_set_name", .ret_type = 1, .nargs = 2,
 	  .args = { { Long, 0 }, { Name, 1 } } },
 	{ .name = "truncate", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { QuadHex | IN, 1 } } },
 #if 0
 	/* Does not exist */
 	{ .name = "umount", .ret_type = 1, .nargs = 2,
 	  .args = { { Name, 0 }, { Int, 2 } } },
 #endif
 	{ .name = "unlink", .ret_type = 1, .nargs = 1,
 	  .args = { { Name, 0 } } },
 	{ .name = "unlinkat", .ret_type = 1, .nargs = 3,
 	  .args = { { Atfd, 0 }, { Name, 1 }, { Atflags, 2 } } },
 	{ .name = "unmount", .ret_type = 1, .nargs = 2,
 	  .args = { { Name, 0 }, { Mountflags, 1 } } },
 	{ .name = "utimensat", .ret_type = 1, .nargs = 4,
 	  .args = { { Atfd, 0 }, { Name | IN, 1 }, { Timespec2 | IN, 2 },
 		    { Atflags, 3 } } },
 	{ .name = "utimes", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { Timeval2 | IN, 1 } } },
 	{ .name = "utrace", .ret_type = 1, .nargs = 1,
 	  .args = { { Utrace, 0 } } },
 	{ .name = "wait4", .ret_type = 1, .nargs = 4,
 	  .args = { { Int, 0 }, { ExitStatus | OUT, 1 }, { Waitoptions, 2 },
 		    { Rusage | OUT, 3 } } },
 	{ .name = "wait6", .ret_type = 1, .nargs = 6,
 	  .args = { { Idtype, 0 }, { Quad, 1 }, { ExitStatus | OUT, 2 },
 		    { Waitoptions, 3 }, { Rusage | OUT, 4 },
 		    { Siginfo | OUT, 5 } } },
 	{ .name = "write", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { BinString | IN, 1 }, { Sizet, 2 } } },
 	{ .name = "writev", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Iovec | IN, 1 }, { Int, 2 } } },
 
 	/* Linux ABI */
 	{ .name = "linux_access", .ret_type = 1, .nargs = 2,
 	  .args = { { Name, 0 }, { Accessmode, 1 } } },
 	{ .name = "linux_execve", .ret_type = 1, .nargs = 3,
 	  .args = { { Name | IN, 0 }, { ExecArgs | IN, 1 },
 		    { ExecEnv | IN, 2 } } },
 	{ .name = "linux_lseek", .ret_type = 2, .nargs = 3,
 	  .args = { { Int, 0 }, { Int, 1 }, { Whence, 2 } } },
 	{ .name = "linux_mkdir", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { Int, 1 } } },
 	{ .name = "linux_newfstat", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Ptr | OUT, 1 } } },
 	{ .name = "linux_newstat", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { Ptr | OUT, 1 } } },
 	{ .name = "linux_open", .ret_type = 1, .nargs = 3,
 	  .args = { { Name, 0 }, { Hex, 1 }, { Octal, 2 } } },
 	{ .name = "linux_readlink", .ret_type = 1, .nargs = 3,
 	  .args = { { Name, 0 }, { Name | OUT, 1 }, { Sizet, 2 } } },
 	{ .name = "linux_socketcall", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { LinuxSockArgs, 1 } } },
 	{ .name = "linux_stat64", .ret_type = 1, .nargs = 2,
 	  .args = { { Name | IN, 0 }, { Ptr | OUT, 1 } } },
 
 	/* CloudABI system calls. */
 	{ .name = "cloudabi_sys_clock_res_get", .ret_type = 1, .nargs = 1,
 	  .args = { { CloudABIClockID, 0 } } },
 	{ .name = "cloudabi_sys_clock_time_get", .ret_type = 1, .nargs = 2,
 	  .args = { { CloudABIClockID, 0 }, { CloudABITimestamp, 1 } } },
 	{ .name = "cloudabi_sys_condvar_signal", .ret_type = 1, .nargs = 3,
 	  .args = { { Ptr, 0 }, { CloudABIMFlags, 1 }, { UInt, 2 } } },
 	{ .name = "cloudabi_sys_fd_close", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
 	{ .name = "cloudabi_sys_fd_create1", .ret_type = 1, .nargs = 1,
 	  .args = { { CloudABIFileType, 0 } } },
 	{ .name = "cloudabi_sys_fd_create2", .ret_type = 1, .nargs = 2,
 	  .args = { { CloudABIFileType, 0 }, { PipeFds | OUT, 0 } } },
 	{ .name = "cloudabi_sys_fd_datasync", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
 	{ .name = "cloudabi_sys_fd_dup", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
 	{ .name = "cloudabi_sys_fd_replace", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Int, 1 } } },
 	{ .name = "cloudabi_sys_fd_seek", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Int, 1 }, { CloudABIWhence, 2 } } },
 	{ .name = "cloudabi_sys_fd_stat_get", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { CloudABIFDStat | OUT, 1 } } },
 	{ .name = "cloudabi_sys_fd_stat_put", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { CloudABIFDStat | IN, 1 },
 	            { CloudABIFDSFlags, 2 } } },
 	{ .name = "cloudabi_sys_fd_sync", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
 	{ .name = "cloudabi_sys_file_advise", .ret_type = 1, .nargs = 4,
 	  .args = { { Int, 0 }, { Int, 1 }, { Int, 2 },
 	            { CloudABIAdvice, 3 } } },
 	{ .name = "cloudabi_sys_file_allocate", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { Int, 1 }, { Int, 2 } } },
 	{ .name = "cloudabi_sys_file_create", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { BinString | IN, 1 },
 	            { CloudABIFileType, 3 } } },
 	{ .name = "cloudabi_sys_file_link", .ret_type = 1, .nargs = 4,
 	  .args = { { CloudABILookup, 0 }, { BinString | IN, 1 },
 	            { Int, 3 }, { BinString | IN, 4 } } },
 	{ .name = "cloudabi_sys_file_open", .ret_type = 1, .nargs = 4,
 	  .args = { { Int, 0 }, { BinString | IN, 1 },
 	            { CloudABIOFlags, 3 }, { CloudABIFDStat | IN, 4 } } },
 	{ .name = "cloudabi_sys_file_readdir", .ret_type = 1, .nargs = 4,
 	  .args = { { Int, 0 }, { BinString | OUT, 1 }, { Int, 2 },
 	            { Int, 3 } } },
 	{ .name = "cloudabi_sys_file_readlink", .ret_type = 1, .nargs = 4,
 	  .args = { { Int, 0 }, { BinString | IN, 1 },
 	            { BinString | OUT, 3 }, { Int, 4 } } },
 	{ .name = "cloudabi_sys_file_rename", .ret_type = 1, .nargs = 4,
 	  .args = { { Int, 0 }, { BinString | IN, 1 },
 	            { Int, 3 }, { BinString | IN, 4 } } },
 	{ .name = "cloudabi_sys_file_stat_fget", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { CloudABIFileStat | OUT, 1 } } },
 	{ .name = "cloudabi_sys_file_stat_fput", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { CloudABIFileStat | IN, 1 },
 	            { CloudABIFSFlags, 2 } } },
 	{ .name = "cloudabi_sys_file_stat_get", .ret_type = 1, .nargs = 3,
 	  .args = { { CloudABILookup, 0 }, { BinString | IN, 1 },
 	            { CloudABIFileStat | OUT, 3 } } },
 	{ .name = "cloudabi_sys_file_stat_put", .ret_type = 1, .nargs = 4,
 	  .args = { { CloudABILookup, 0 }, { BinString | IN, 1 },
 	            { CloudABIFileStat | IN, 3 }, { CloudABIFSFlags, 4 } } },
 	{ .name = "cloudabi_sys_file_symlink", .ret_type = 1, .nargs = 3,
 	  .args = { { BinString | IN, 0 },
 	            { Int, 2 }, { BinString | IN, 3 } } },
 	{ .name = "cloudabi_sys_file_unlink", .ret_type = 1, .nargs = 3,
 	  .args = { { Int, 0 }, { BinString | IN, 1 },
 	            { CloudABIULFlags, 3 } } },
 	{ .name = "cloudabi_sys_lock_unlock", .ret_type = 1, .nargs = 2,
 	  .args = { { Ptr, 0 }, { CloudABIMFlags, 1 } } },
 	{ .name = "cloudabi_sys_mem_advise", .ret_type = 1, .nargs = 3,
 	  .args = { { Ptr, 0 }, { Int, 1 }, { CloudABIAdvice, 2 } } },
 	{ .name = "cloudabi_sys_mem_map", .ret_type = 1, .nargs = 6,
 	  .args = { { Ptr, 0 }, { Int, 1 }, { CloudABIMProt, 2 },
 	            { CloudABIMFlags, 3 }, { Int, 4 }, { Int, 5 } } },
 	{ .name = "cloudabi_sys_mem_protect", .ret_type = 1, .nargs = 3,
 	  .args = { { Ptr, 0 }, { Int, 1 }, { CloudABIMProt, 2 } } },
 	{ .name = "cloudabi_sys_mem_sync", .ret_type = 1, .nargs = 3,
 	  .args = { { Ptr, 0 }, { Int, 1 }, { CloudABIMSFlags, 2 } } },
 	{ .name = "cloudabi_sys_mem_unmap", .ret_type = 1, .nargs = 2,
 	  .args = { { Ptr, 0 }, { Int, 1 } } },
 	{ .name = "cloudabi_sys_proc_exec", .ret_type = 1, .nargs = 5,
 	  .args = { { Int, 0 }, { BinString | IN, 1 }, { Int, 2 },
 	            { IntArray, 3 }, { Int, 4 } } },
 	{ .name = "cloudabi_sys_proc_exit", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
 	{ .name = "cloudabi_sys_proc_fork", .ret_type = 1, .nargs = 0 },
 	{ .name = "cloudabi_sys_proc_raise", .ret_type = 1, .nargs = 1,
 	  .args = { { CloudABISignal, 0 } } },
 	{ .name = "cloudabi_sys_random_get", .ret_type = 1, .nargs = 2,
 	  .args = { { BinString | OUT, 0 }, { Int, 1 } } },
 	{ .name = "cloudabi_sys_sock_shutdown", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { CloudABISDFlags, 1 } } },
 	{ .name = "cloudabi_sys_thread_exit", .ret_type = 1, .nargs = 2,
 	  .args = { { Ptr, 0 }, { CloudABIMFlags, 1 } } },
 	{ .name = "cloudabi_sys_thread_yield", .ret_type = 1, .nargs = 0 },
 
 	{ .name = 0 },
 };
 static STAILQ_HEAD(, syscall) syscalls;
 
 /* Xlat idea taken from strace */
 struct xlat {
 	int val;
 	const char *str;
 };
 
 #define	X(a)	{ a, #a },
 #define	XEND	{ 0, NULL }
 
 static struct xlat poll_flags[] = {
 	X(POLLSTANDARD) X(POLLIN) X(POLLPRI) X(POLLOUT) X(POLLERR)
 	X(POLLHUP) X(POLLNVAL) X(POLLRDNORM) X(POLLRDBAND)
 	X(POLLWRBAND) X(POLLINIGNEOF) XEND
 };
 
 static struct xlat sigaction_flags[] = {
 	X(SA_ONSTACK) X(SA_RESTART) X(SA_RESETHAND) X(SA_NOCLDSTOP)
 	X(SA_NODEFER) X(SA_NOCLDWAIT) X(SA_SIGINFO) XEND
 };
 
 static struct xlat linux_socketcall_ops[] = {
 	X(LINUX_SOCKET) X(LINUX_BIND) X(LINUX_CONNECT) X(LINUX_LISTEN)
 	X(LINUX_ACCEPT) X(LINUX_GETSOCKNAME) X(LINUX_GETPEERNAME)
 	X(LINUX_SOCKETPAIR) X(LINUX_SEND) X(LINUX_RECV) X(LINUX_SENDTO)
 	X(LINUX_RECVFROM) X(LINUX_SHUTDOWN) X(LINUX_SETSOCKOPT)
 	X(LINUX_GETSOCKOPT) X(LINUX_SENDMSG) X(LINUX_RECVMSG)
 	XEND
 };
 
 #undef X
 #define	X(a)	{ CLOUDABI_##a, #a },
 
 static struct xlat cloudabi_advice[] = {
 	X(ADVICE_DONTNEED) X(ADVICE_NOREUSE) X(ADVICE_NORMAL)
 	X(ADVICE_RANDOM) X(ADVICE_SEQUENTIAL) X(ADVICE_WILLNEED)
 	XEND
 };
 
 static struct xlat cloudabi_clockid[] = {
 	X(CLOCK_MONOTONIC) X(CLOCK_PROCESS_CPUTIME_ID)
 	X(CLOCK_REALTIME) X(CLOCK_THREAD_CPUTIME_ID)
 	XEND
 };
 
 static struct xlat cloudabi_fdflags[] = {
 	X(FDFLAG_APPEND) X(FDFLAG_DSYNC) X(FDFLAG_NONBLOCK)
 	X(FDFLAG_RSYNC) X(FDFLAG_SYNC)
 	XEND
 };
 
 static struct xlat cloudabi_fdsflags[] = {
 	X(FDSTAT_FLAGS) X(FDSTAT_RIGHTS)
 	XEND
 };
 
 static struct xlat cloudabi_filetype[] = {
 	X(FILETYPE_UNKNOWN) X(FILETYPE_BLOCK_DEVICE)
 	X(FILETYPE_CHARACTER_DEVICE) X(FILETYPE_DIRECTORY)
 	X(FILETYPE_PROCESS) X(FILETYPE_REGULAR_FILE)
 	X(FILETYPE_SHARED_MEMORY) X(FILETYPE_SOCKET_DGRAM)
 	X(FILETYPE_SOCKET_STREAM) X(FILETYPE_SYMBOLIC_LINK)
 	XEND
 };
 
 static struct xlat cloudabi_fsflags[] = {
 	X(FILESTAT_ATIM) X(FILESTAT_ATIM_NOW) X(FILESTAT_MTIM)
 	X(FILESTAT_MTIM_NOW) X(FILESTAT_SIZE)
 	XEND
 };
 
 static struct xlat cloudabi_mflags[] = {
 	X(MAP_ANON) X(MAP_FIXED) X(MAP_PRIVATE) X(MAP_SHARED)
 	XEND
 };
 
 static struct xlat cloudabi_mprot[] = {
 	X(PROT_EXEC) X(PROT_WRITE) X(PROT_READ)
 	XEND
 };
 
 static struct xlat cloudabi_msflags[] = {
 	X(MS_ASYNC) X(MS_INVALIDATE) X(MS_SYNC)
 	XEND
 };
 
 static struct xlat cloudabi_oflags[] = {
 	X(O_CREAT) X(O_DIRECTORY) X(O_EXCL) X(O_TRUNC)
 	XEND
 };
 
 static struct xlat cloudabi_sdflags[] = {
 	X(SHUT_RD) X(SHUT_WR)
 	XEND
 };
 
 static struct xlat cloudabi_signal[] = {
 	X(SIGABRT) X(SIGALRM) X(SIGBUS) X(SIGCHLD) X(SIGCONT) X(SIGFPE)
 	X(SIGHUP) X(SIGILL) X(SIGINT) X(SIGKILL) X(SIGPIPE) X(SIGQUIT)
 	X(SIGSEGV) X(SIGSTOP) X(SIGSYS) X(SIGTERM) X(SIGTRAP) X(SIGTSTP)
 	X(SIGTTIN) X(SIGTTOU) X(SIGURG) X(SIGUSR1) X(SIGUSR2)
 	X(SIGVTALRM) X(SIGXCPU) X(SIGXFSZ)
 	XEND
 };
 
 static struct xlat cloudabi_ulflags[] = {
 	X(UNLINK_REMOVEDIR)
 	XEND
 };
 
 static struct xlat cloudabi_whence[] = {
 	X(WHENCE_CUR) X(WHENCE_END) X(WHENCE_SET)
 	XEND
 };
 
 #undef X
 #undef XEND
 
 /*
  * Searches an xlat array for a value, and returns it if found.  Otherwise
  * return a string representation.
  */
 static const char *
 lookup(struct xlat *xlat, int val, int base)
 {
 	static char tmp[16];
 
 	for (; xlat->str != NULL; xlat++)
 		if (xlat->val == val)
 			return (xlat->str);
 	switch (base) {
 		case 8:
 			sprintf(tmp, "0%o", val);
 			break;
 		case 16:
 			sprintf(tmp, "0x%x", val);
 			break;
 		case 10:
 			sprintf(tmp, "%u", val);
 			break;
 		default:
 			errx(1,"Unknown lookup base");
 			break;
 	}
 	return (tmp);
 }
 
 static const char *
 xlookup(struct xlat *xlat, int val)
 {
 
 	return (lookup(xlat, val, 16));
 }
 
 /*
  * Searches an xlat array containing bitfield values.  Remaining bits
  * set after removing the known ones are printed at the end:
  * IN|0x400.
  */
 static char *
 xlookup_bits(struct xlat *xlat, int val)
 {
 	int len, rem;
 	static char str[512];
 
 	len = 0;
 	rem = val;
 	for (; xlat->str != NULL; xlat++) {
 		if ((xlat->val & rem) == xlat->val) {
 			/*
 			 * Don't print the "all-bits-zero" string unless all
 			 * bits are really zero.
 			 */
 			if (xlat->val == 0 && val != 0)
 				continue;
 			len += sprintf(str + len, "%s|", xlat->str);
 			rem &= ~(xlat->val);
 		}
 	}
 
 	/*
 	 * If we have leftover bits or didn't match anything, print
 	 * the remainder.
 	 */
 	if (rem || len == 0)
 		len += sprintf(str + len, "0x%x", rem);
 	if (len && str[len - 1] == '|')
 		len--;
 	str[len] = 0;
 	return (str);
 }
 
 static void
 print_integer_arg(const char *(*decoder)(int), FILE *fp, int value)
 {
 	const char *str;
 
 	str = decoder(value);
 	if (str != NULL)
 		fputs(str, fp);
 	else
 		fprintf(fp, "%d", value);
 }
 
 static void
 print_mask_arg(bool (*decoder)(FILE *, int, int *), FILE *fp, int value)
 {
 	int rem;
 
 	if (!decoder(fp, value, &rem))
 		fprintf(fp, "0x%x", rem);
 	else if (rem != 0)
 		fprintf(fp, "|0x%x", rem);
 }
 
 static void
 print_mask_arg32(bool (*decoder)(FILE *, uint32_t, uint32_t *), FILE *fp,
     uint32_t value)
 {
 	uint32_t rem;
 
 	if (!decoder(fp, value, &rem))
 		fprintf(fp, "0x%x", rem);
 	else if (rem != 0)
 		fprintf(fp, "|0x%x", rem);
 }
 
 #ifndef __LP64__
 /*
  * Add argument padding to subsequent system calls afater a Quad
  * syscall arguments as needed.  This used to be done by hand in the
  * decoded_syscalls table which was ugly and error prone.  It is
  * simpler to do the fixup of offsets at initalization time than when
  * decoding arguments.
  */
 static void
 quad_fixup(struct syscall *sc)
 {
 	int offset, prev;
 	u_int i;
 
 	offset = 0;
 	prev = -1;
 	for (i = 0; i < sc->nargs; i++) {
 		/* This arg type is a dummy that doesn't use offset. */
 		if ((sc->args[i].type & ARG_MASK) == PipeFds)
 			continue;
 
 		assert(prev < sc->args[i].offset);
 		prev = sc->args[i].offset;
 		sc->args[i].offset += offset;
 		switch (sc->args[i].type & ARG_MASK) {
 		case Quad:
 		case QuadHex:
 #ifdef __powerpc__
 			/*
 			 * 64-bit arguments on 32-bit powerpc must be
 			 * 64-bit aligned.  If the current offset is
 			 * not aligned, the calling convention inserts
 			 * a 32-bit pad argument that should be skipped.
 			 */
 			if (sc->args[i].offset % 2 == 1) {
 				sc->args[i].offset++;
 				offset++;
 			}
 #endif
 			offset++;
 		default:
 			break;
 		}
 	}
 }
 #endif
 
 void
 init_syscalls(void)
 {
 	struct syscall *sc;
 
 	STAILQ_INIT(&syscalls);
 	for (sc = decoded_syscalls; sc->name != NULL; sc++) {
 #ifndef __LP64__
 		quad_fixup(sc);
 #endif
 		STAILQ_INSERT_HEAD(&syscalls, sc, entries);
 	}
 }
 
 static struct syscall *
 find_syscall(struct procabi *abi, u_int number)
 {
 	struct extra_syscall *es;
 
 	if (number < nitems(abi->syscalls))
 		return (abi->syscalls[number]);
 	STAILQ_FOREACH(es, &abi->extra_syscalls, entries) {
 		if (es->number == number)
 			return (es->sc);
 	}
 	return (NULL);
 }
 
 static void
 add_syscall(struct procabi *abi, u_int number, struct syscall *sc)
 {
 	struct extra_syscall *es;
 
 	if (number < nitems(abi->syscalls)) {
 		assert(abi->syscalls[number] == NULL);
 		abi->syscalls[number] = sc;
 	} else {
 		es = malloc(sizeof(*es));
 		es->sc = sc;
 		es->number = number;
 		STAILQ_INSERT_TAIL(&abi->extra_syscalls, es, entries);
 	}
 }
 
 /*
  * If/when the list gets big, it might be desirable to do it
  * as a hash table or binary search.
  */
 struct syscall *
 get_syscall(struct threadinfo *t, u_int number, u_int nargs)
 {
 	struct syscall *sc;
 	const char *name;
 	char *new_name;
 	u_int i;
 
 	sc = find_syscall(t->proc->abi, number);
 	if (sc != NULL)
 		return (sc);
 
 	name = sysdecode_syscallname(t->proc->abi->abi, number);
 	if (name == NULL) {
 		asprintf(&new_name, "#%d", number);
 		name = new_name;
 	} else
 		new_name = NULL;
 	STAILQ_FOREACH(sc, &syscalls, entries) {
 		if (strcmp(name, sc->name) == 0) {
 			add_syscall(t->proc->abi, number, sc);
 			free(new_name);
 			return (sc);
 		}
 	}
 
 	/* It is unknown.  Add it into the list. */
 #if DEBUG
 	fprintf(stderr, "unknown syscall %s -- setting args to %d\n", name,
 	    nargs);
 #endif
 
 	sc = calloc(1, sizeof(struct syscall));
 	sc->name = name;
 	if (new_name != NULL)
 		sc->unknown = true;
 	sc->ret_type = 1;
 	sc->nargs = nargs;
 	for (i = 0; i < nargs; i++) {
 		sc->args[i].offset = i;
 		/* Treat all unknown arguments as LongHex. */
 		sc->args[i].type = LongHex;
 	}
 	STAILQ_INSERT_HEAD(&syscalls, sc, entries);
 	add_syscall(t->proc->abi, number, sc);
 
 	return (sc);
 }
 
 /*
  * Copy a fixed amount of bytes from the process.
  */
 static int
 get_struct(pid_t pid, void *offset, void *buf, int len)
 {
 	struct ptrace_io_desc iorequest;
 
 	iorequest.piod_op = PIOD_READ_D;
 	iorequest.piod_offs = offset;
 	iorequest.piod_addr = buf;
 	iorequest.piod_len = len;
 	if (ptrace(PT_IO, pid, (caddr_t)&iorequest, 0) < 0)
 		return (-1);
 	return (0);
 }
 
 #define	MAXSIZE		4096
 
 /*
  * Copy a string from the process.  Note that it is
  * expected to be a C string, but if max is set, it will
  * only get that much.
  */
 static char *
 get_string(pid_t pid, void *addr, int max)
 {
 	struct ptrace_io_desc iorequest;
 	char *buf, *nbuf;
 	size_t offset, size, totalsize;
 
 	offset = 0;
 	if (max)
 		size = max + 1;
 	else {
 		/* Read up to the end of the current page. */
 		size = PAGE_SIZE - ((uintptr_t)addr % PAGE_SIZE);
 		if (size > MAXSIZE)
 			size = MAXSIZE;
 	}
 	totalsize = size;
 	buf = malloc(totalsize);
 	if (buf == NULL)
 		return (NULL);
 	for (;;) {
 		iorequest.piod_op = PIOD_READ_D;
 		iorequest.piod_offs = (char *)addr + offset;
 		iorequest.piod_addr = buf + offset;
 		iorequest.piod_len = size;
 		if (ptrace(PT_IO, pid, (caddr_t)&iorequest, 0) < 0) {
 			free(buf);
 			return (NULL);
 		}
 		if (memchr(buf + offset, '\0', size) != NULL)
 			return (buf);
 		offset += size;
 		if (totalsize < MAXSIZE && max == 0) {
 			size = MAXSIZE - totalsize;
 			if (size > PAGE_SIZE)
 				size = PAGE_SIZE;
 			nbuf = realloc(buf, totalsize + size);
 			if (nbuf == NULL) {
 				buf[totalsize - 1] = '\0';
 				return (buf);
 			}
 			buf = nbuf;
 			totalsize += size;
 		} else {
 			buf[totalsize - 1] = '\0';
 			return (buf);
 		}
 	}
 }
 
 static const char *
 strsig2(int sig)
 {
 	static char tmp[32];
 	const char *signame;
 
 	signame = sysdecode_signal(sig);
 	if (signame == NULL) {
 		snprintf(tmp, sizeof(tmp), "%d", sig);
 		signame = tmp;
 	}
 	return (signame);
 }
 
 static void
 print_kevent(FILE *fp, struct kevent *ke)
 {
 
 	switch (ke->filter) {
 	case EVFILT_READ:
 	case EVFILT_WRITE:
 	case EVFILT_VNODE:
 	case EVFILT_PROC:
 	case EVFILT_TIMER:
 	case EVFILT_PROCDESC:
 	case EVFILT_EMPTY:
 		fprintf(fp, "%ju", (uintmax_t)ke->ident);
 		break;
 	case EVFILT_SIGNAL:
 		fputs(strsig2(ke->ident), fp);
 		break;
 	default:
 		fprintf(fp, "%p", (void *)ke->ident);
 	}
 	fprintf(fp, ",");
 	print_integer_arg(sysdecode_kevent_filter, fp, ke->filter);
 	fprintf(fp, ",");
 	print_mask_arg(sysdecode_kevent_flags, fp, ke->flags);
 	fprintf(fp, ",");
 	sysdecode_kevent_fflags(fp, ke->filter, ke->fflags, 16);
 	fprintf(fp, ",%#jx,%p", (uintmax_t)ke->data, ke->udata);
 }
 
 static void
 print_utrace(FILE *fp, void *utrace_addr, size_t len)
 {
 	unsigned char *utrace_buffer;
 
 	fprintf(fp, "{ ");
 	if (sysdecode_utrace(fp, utrace_addr, len)) {
 		fprintf(fp, " }");
 		return;
 	}
 
 	utrace_buffer = utrace_addr;
 	fprintf(fp, "%zu:", len);
 	while (len--)
 		fprintf(fp, " %02x", *utrace_buffer++);
 	fprintf(fp, " }");
 }
 
 static void
 print_sockaddr(FILE *fp, struct trussinfo *trussinfo, void *arg, socklen_t len)
 {
 	char addr[64];
 	struct sockaddr_in *lsin;
 	struct sockaddr_in6 *lsin6;
 	struct sockaddr_un *sun;
 	struct sockaddr *sa;
 	u_char *q;
 	pid_t pid = trussinfo->curthread->proc->pid;
 
 	if (arg == NULL) {
 		fputs("NULL", fp);
 		return;
 	}
 	/* If the length is too small, just bail. */
 	if (len < sizeof(*sa)) {
 		fprintf(fp, "%p", arg);
 		return;
 	}
 
 	sa = calloc(1, len);
 	if (get_struct(pid, arg, sa, len) == -1) {
 		free(sa);
 		fprintf(fp, "%p", arg);
 		return;
 	}
 
 	switch (sa->sa_family) {
 	case AF_INET:
 		if (len < sizeof(*lsin))
 			goto sockaddr_short;
 		lsin = (struct sockaddr_in *)(void *)sa;
 		inet_ntop(AF_INET, &lsin->sin_addr, addr, sizeof(addr));
 		fprintf(fp, "{ AF_INET %s:%d }", addr,
 		    htons(lsin->sin_port));
 		break;
 	case AF_INET6:
 		if (len < sizeof(*lsin6))
 			goto sockaddr_short;
 		lsin6 = (struct sockaddr_in6 *)(void *)sa;
 		inet_ntop(AF_INET6, &lsin6->sin6_addr, addr,
 		    sizeof(addr));
 		fprintf(fp, "{ AF_INET6 [%s]:%d }", addr,
 		    htons(lsin6->sin6_port));
 		break;
 	case AF_UNIX:
 		sun = (struct sockaddr_un *)sa;
 		fprintf(fp, "{ AF_UNIX \"%.*s\" }",
 		    (int)(len - offsetof(struct sockaddr_un, sun_path)),
 		    sun->sun_path);
 		break;
 	default:
 	sockaddr_short:
 		fprintf(fp,
 		    "{ sa_len = %d, sa_family = %d, sa_data = {",
 		    (int)sa->sa_len, (int)sa->sa_family);
 		for (q = (u_char *)sa->sa_data;
 		     q < (u_char *)sa + len; q++)
 			fprintf(fp, "%s 0x%02x",
 			    q == (u_char *)sa->sa_data ? "" : ",",
 			    *q);
 		fputs(" } }", fp);
 	}
 	free(sa);
 }
 
 #define IOV_LIMIT 16
 
 static void
 print_iovec(FILE *fp, struct trussinfo *trussinfo, void *arg, int iovcnt)
 {
 	struct iovec iov[IOV_LIMIT];
 	size_t max_string = trussinfo->strsize;
 	char tmp2[max_string + 1], *tmp3;
 	size_t len;
 	pid_t pid = trussinfo->curthread->proc->pid;
 	int i;
 	bool buf_truncated, iov_truncated;
 
 	if (iovcnt <= 0) {
 		fprintf(fp, "%p", arg);
 		return;
 	}
 	if (iovcnt > IOV_LIMIT) {
 		iovcnt = IOV_LIMIT;
 		iov_truncated = true;
 	} else {
 		iov_truncated = false;
 	}
 	if (get_struct(pid, arg, &iov, iovcnt * sizeof(struct iovec)) == -1) {
 		fprintf(fp, "%p", arg);
 		return;
 	}
 
 	fputs("[", fp);
 	for (i = 0; i < iovcnt; i++) {
 		len = iov[i].iov_len;
 		if (len > max_string) {
 			len = max_string;
 			buf_truncated = true;
 		} else {
 			buf_truncated = false;
 		}
 		fprintf(fp, "%s{", (i > 0) ? "," : "");
 		if (len && get_struct(pid, iov[i].iov_base, &tmp2, len) != -1) {
 			tmp3 = malloc(len * 4 + 1);
 			while (len) {
 				if (strvisx(tmp3, tmp2, len,
 				    VIS_CSTYLE|VIS_TAB|VIS_NL) <=
 				    (int)max_string)
 					break;
 				len--;
 				buf_truncated = true;
 			}
 			fprintf(fp, "\"%s\"%s", tmp3,
 			    buf_truncated ? "..." : "");
 			free(tmp3);
 		} else {
 			fprintf(fp, "%p", iov[i].iov_base);
 		}
 		fprintf(fp, ",%zu}", iov[i].iov_len);
 	}
 	fprintf(fp, "%s%s", iov_truncated ? ",..." : "", "]");
 }
 
 static void
 print_gen_cmsg(FILE *fp, struct cmsghdr *cmsghdr)
 {
 	u_char *q;
 
 	fputs("{", fp);
 	for (q = CMSG_DATA(cmsghdr);
 	     q < (u_char *)cmsghdr + cmsghdr->cmsg_len; q++) {
 		fprintf(fp, "%s0x%02x", q == CMSG_DATA(cmsghdr) ? "" : ",", *q);
 	}
 	fputs("}", fp);
 }
 
 static void
 print_sctp_initmsg(FILE *fp, struct sctp_initmsg *init)
 {
 	fprintf(fp, "{out=%u,", init->sinit_num_ostreams);
 	fprintf(fp, "in=%u,", init->sinit_max_instreams);
 	fprintf(fp, "max_rtx=%u,", init->sinit_max_attempts);
 	fprintf(fp, "max_rto=%u}", init->sinit_max_init_timeo);
 }
 
 static void
 print_sctp_sndrcvinfo(FILE *fp, bool receive, struct sctp_sndrcvinfo *info)
 {
 	fprintf(fp, "{sid=%u,", info->sinfo_stream);
 	if (receive) {
 		fprintf(fp, "ssn=%u,", info->sinfo_ssn);
 	}
 	fputs("flgs=", fp);
 	sysdecode_sctp_sinfo_flags(fp, info->sinfo_flags);
 	fprintf(fp, ",ppid=%u,", ntohl(info->sinfo_ppid));
 	if (!receive) {
 		fprintf(fp, "ctx=%u,", info->sinfo_context);
 		fprintf(fp, "ttl=%u,", info->sinfo_timetolive);
 	}
 	if (receive) {
 		fprintf(fp, "tsn=%u,", info->sinfo_tsn);
 		fprintf(fp, "cumtsn=%u,", info->sinfo_cumtsn);
 	}
 	fprintf(fp, "id=%u}", info->sinfo_assoc_id);
 }
 
 static void
 print_sctp_sndinfo(FILE *fp, struct sctp_sndinfo *info)
 {
 	fprintf(fp, "{sid=%u,", info->snd_sid);
 	fputs("flgs=", fp);
 	print_mask_arg(sysdecode_sctp_snd_flags, fp, info->snd_flags);
 	fprintf(fp, ",ppid=%u,", ntohl(info->snd_ppid));
 	fprintf(fp, "ctx=%u,", info->snd_context);
 	fprintf(fp, "id=%u}", info->snd_assoc_id);
 }
 
 static void
 print_sctp_rcvinfo(FILE *fp, struct sctp_rcvinfo *info)
 {
 	fprintf(fp, "{sid=%u,", info->rcv_sid);
 	fprintf(fp, "ssn=%u,", info->rcv_ssn);
 	fputs("flgs=", fp);
 	print_mask_arg(sysdecode_sctp_rcv_flags, fp, info->rcv_flags);
 	fprintf(fp, ",ppid=%u,", ntohl(info->rcv_ppid));
 	fprintf(fp, "tsn=%u,", info->rcv_tsn);
 	fprintf(fp, "cumtsn=%u,", info->rcv_cumtsn);
 	fprintf(fp, "ctx=%u,", info->rcv_context);
 	fprintf(fp, "id=%u}", info->rcv_assoc_id);
 }
 
 static void
 print_sctp_nxtinfo(FILE *fp, struct sctp_nxtinfo *info)
 {
 	fprintf(fp, "{sid=%u,", info->nxt_sid);
 	fputs("flgs=", fp);
 	print_mask_arg(sysdecode_sctp_nxt_flags, fp, info->nxt_flags);
 	fprintf(fp, ",ppid=%u,", ntohl(info->nxt_ppid));
 	fprintf(fp, "len=%u,", info->nxt_length);
 	fprintf(fp, "id=%u}", info->nxt_assoc_id);
 }
 
 static void
 print_sctp_prinfo(FILE *fp, struct sctp_prinfo *info)
 {
 	fputs("{pol=", fp);
 	print_integer_arg(sysdecode_sctp_pr_policy, fp, info->pr_policy);
 	fprintf(fp, ",val=%u}", info->pr_value);
 }
 
 static void
 print_sctp_authinfo(FILE *fp, struct sctp_authinfo *info)
 {
 	fprintf(fp, "{num=%u}", info->auth_keynumber);
 }
 
 static void
 print_sctp_ipv4_addr(FILE *fp, struct in_addr *addr)
 {
 	char buf[INET_ADDRSTRLEN];
 	const char *s;
 
 	s = inet_ntop(AF_INET, addr, buf, INET_ADDRSTRLEN);
 	if (s != NULL)
 		fprintf(fp, "{addr=%s}", s);
 	else
 		fputs("{addr=???}", fp);
 }
 
 static void
 print_sctp_ipv6_addr(FILE *fp, struct in6_addr *addr)
 {
 	char buf[INET6_ADDRSTRLEN];
 	const char *s;
 
 	s = inet_ntop(AF_INET6, addr, buf, INET6_ADDRSTRLEN);
 	if (s != NULL)
 		fprintf(fp, "{addr=%s}", s);
 	else
 		fputs("{addr=???}", fp);
 }
 
 static void
 print_sctp_cmsg(FILE *fp, bool receive, struct cmsghdr *cmsghdr)
 {
 	void *data;
 	socklen_t len;
 
 	len = cmsghdr->cmsg_len;
 	data = CMSG_DATA(cmsghdr);
 	switch (cmsghdr->cmsg_type) {
 	case SCTP_INIT:
 		if (len == CMSG_LEN(sizeof(struct sctp_initmsg)))
 			print_sctp_initmsg(fp, (struct sctp_initmsg *)data);
 		else
 			print_gen_cmsg(fp, cmsghdr);
 		break;
 	case SCTP_SNDRCV:
 		if (len == CMSG_LEN(sizeof(struct sctp_sndrcvinfo)))
 			print_sctp_sndrcvinfo(fp, receive,
 			    (struct sctp_sndrcvinfo *)data);
 		else
 			print_gen_cmsg(fp, cmsghdr);
 		break;
 #if 0
 	case SCTP_EXTRCV:
 		if (len == CMSG_LEN(sizeof(struct sctp_extrcvinfo)))
 			print_sctp_extrcvinfo(fp,
 			    (struct sctp_extrcvinfo *)data);
 		else
 			print_gen_cmsg(fp, cmsghdr);
 		break;
 #endif
 	case SCTP_SNDINFO:
 		if (len == CMSG_LEN(sizeof(struct sctp_sndinfo)))
 			print_sctp_sndinfo(fp, (struct sctp_sndinfo *)data);
 		else
 			print_gen_cmsg(fp, cmsghdr);
 		break;
 	case SCTP_RCVINFO:
 		if (len == CMSG_LEN(sizeof(struct sctp_rcvinfo)))
 			print_sctp_rcvinfo(fp, (struct sctp_rcvinfo *)data);
 		else
 			print_gen_cmsg(fp, cmsghdr);
 		break;
 	case SCTP_NXTINFO:
 		if (len == CMSG_LEN(sizeof(struct sctp_nxtinfo)))
 			print_sctp_nxtinfo(fp, (struct sctp_nxtinfo *)data);
 		else
 			print_gen_cmsg(fp, cmsghdr);
 		break;
 	case SCTP_PRINFO:
 		if (len == CMSG_LEN(sizeof(struct sctp_prinfo)))
 			print_sctp_prinfo(fp, (struct sctp_prinfo *)data);
 		else
 			print_gen_cmsg(fp, cmsghdr);
 		break;
 	case SCTP_AUTHINFO:
 		if (len == CMSG_LEN(sizeof(struct sctp_authinfo)))
 			print_sctp_authinfo(fp, (struct sctp_authinfo *)data);
 		else
 			print_gen_cmsg(fp, cmsghdr);
 		break;
 	case SCTP_DSTADDRV4:
 		if (len == CMSG_LEN(sizeof(struct in_addr)))
 			print_sctp_ipv4_addr(fp, (struct in_addr *)data);
 		else
 			print_gen_cmsg(fp, cmsghdr);
 		break;
 	case SCTP_DSTADDRV6:
 		if (len == CMSG_LEN(sizeof(struct in6_addr)))
 			print_sctp_ipv6_addr(fp, (struct in6_addr *)data);
 		else
 			print_gen_cmsg(fp, cmsghdr);
 		break;
 	default:
 		print_gen_cmsg(fp, cmsghdr);
 	}
 }
 
 static void
 print_cmsgs(FILE *fp, pid_t pid, bool receive, struct msghdr *msghdr)
 {
 	struct cmsghdr *cmsghdr;
 	char *cmsgbuf;
 	const char *temp;
 	socklen_t len;
 	int level, type;
 	bool first;
 
 	len = msghdr->msg_controllen;
 	if (len == 0) {
 		fputs("{}", fp);
 		return;
 	}
 	cmsgbuf = calloc(1, len);
 	if (get_struct(pid, msghdr->msg_control, cmsgbuf, len) == -1) {
 		fprintf(fp, "%p", msghdr->msg_control);
 		free(cmsgbuf);
 		return;
 	}
 	msghdr->msg_control = cmsgbuf;
 	first = true;
 	fputs("{", fp);
 	for (cmsghdr = CMSG_FIRSTHDR(msghdr);
 	   cmsghdr != NULL;
 	   cmsghdr = CMSG_NXTHDR(msghdr, cmsghdr)) {
 		level = cmsghdr->cmsg_level;
 		type = cmsghdr->cmsg_type;
 		len = cmsghdr->cmsg_len;
 		fprintf(fp, "%s{level=", first ? "" : ",");
 		print_integer_arg(sysdecode_sockopt_level, fp, level);
 		fputs(",type=", fp);
 		temp = sysdecode_cmsg_type(level, type);
 		if (temp) {
 			fputs(temp, fp);
 		} else {
 			fprintf(fp, "%d", type);
 		}
 		fputs(",data=", fp);
 		switch (level) {
 		case IPPROTO_SCTP:
 			print_sctp_cmsg(fp, receive, cmsghdr);
 			break;
 		default:
 			print_gen_cmsg(fp, cmsghdr);
 			break;
 		}
 		fputs("}", fp);
 		first = false;
 	}
 	fputs("}", fp);
 	free(cmsgbuf);
 }
 
+static void
+print_sysctl_oid(FILE *fp, int *oid, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		fprintf(fp, ".%d", oid[i]);
+}
+
 /*
  * Converts a syscall argument into a string.  Said string is
  * allocated via malloc(), so needs to be free()'d.  sc is
  * a pointer to the syscall description (see above); args is
  * an array of all of the system call arguments.
  */
 char *
 print_arg(struct syscall_args *sc, unsigned long *args, register_t *retval,
     struct trussinfo *trussinfo)
 {
 	FILE *fp;
 	char *tmp;
 	size_t tmplen;
 	pid_t pid;
 
 	fp = open_memstream(&tmp, &tmplen);
 	pid = trussinfo->curthread->proc->pid;
 	switch (sc->type & ARG_MASK) {
 	case Hex:
 		fprintf(fp, "0x%x", (int)args[sc->offset]);
 		break;
 	case Octal:
 		fprintf(fp, "0%o", (int)args[sc->offset]);
 		break;
 	case Int:
 		fprintf(fp, "%d", (int)args[sc->offset]);
 		break;
 	case UInt:
 		fprintf(fp, "%u", (unsigned int)args[sc->offset]);
 		break;
 	case PUInt: {
 		unsigned int val;
 
 		if (get_struct(pid, (void *)args[sc->offset], &val,
 		    sizeof(val)) == 0) 
 			fprintf(fp, "{ %u }", val);
 		else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case LongHex:
 		fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	case Long:
 		fprintf(fp, "%ld", args[sc->offset]);
 		break;
 	case Sizet:
 		fprintf(fp, "%zu", (size_t)args[sc->offset]);
 		break;
 	case ShmName:
 		/* Handle special SHM_ANON value. */
 		if ((char *)args[sc->offset] == SHM_ANON) {
 			fprintf(fp, "SHM_ANON");
 			break;
 		}
 		/* FALLTHROUGH */
 	case Name: {
 		/* NULL-terminated string. */
 		char *tmp2;
 
 		tmp2 = get_string(pid, (void*)args[sc->offset], 0);
 		fprintf(fp, "\"%s\"", tmp2);
 		free(tmp2);
 		break;
 	}
 	case BinString: {
 		/*
 		 * Binary block of data that might have printable characters.
 		 * XXX If type|OUT, assume that the length is the syscall's
 		 * return value.  Otherwise, assume that the length of the block
 		 * is in the next syscall argument.
 		 */
 		int max_string = trussinfo->strsize;
 		char tmp2[max_string + 1], *tmp3;
 		int len;
 		int truncated = 0;
 
 		if (sc->type & OUT)
 			len = retval[0];
 		else
 			len = args[sc->offset + 1];
 
 		/*
 		 * Don't print more than max_string characters, to avoid word
 		 * wrap.  If we have to truncate put some ... after the string.
 		 */
 		if (len > max_string) {
 			len = max_string;
 			truncated = 1;
 		}
 		if (len && get_struct(pid, (void*)args[sc->offset], &tmp2, len)
 		    != -1) {
 			tmp3 = malloc(len * 4 + 1);
 			while (len) {
 				if (strvisx(tmp3, tmp2, len,
 				    VIS_CSTYLE|VIS_TAB|VIS_NL) <= max_string)
 					break;
 				len--;
 				truncated = 1;
 			}
 			fprintf(fp, "\"%s\"%s", tmp3, truncated ?
 			    "..." : "");
 			free(tmp3);
 		} else {
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		}
 		break;
 	}
 	case ExecArgs:
 	case ExecEnv:
 	case StringArray: {
 		uintptr_t addr;
 		union {
 			char *strarray[0];
 			char buf[PAGE_SIZE];
 		} u;
 		char *string;
 		size_t len;
 		u_int first, i;
 
 		/*
 		 * Only parse argv[] and environment arrays from exec calls
 		 * if requested.
 		 */
 		if (((sc->type & ARG_MASK) == ExecArgs &&
 		    (trussinfo->flags & EXECVEARGS) == 0) ||
 		    ((sc->type & ARG_MASK) == ExecEnv &&
 		    (trussinfo->flags & EXECVEENVS) == 0)) {
 			fprintf(fp, "0x%lx", args[sc->offset]);
 			break;
 		}
 
 		/*
 		 * Read a page of pointers at a time.  Punt if the top-level
 		 * pointer is not aligned.  Note that the first read is of
 		 * a partial page.
 		 */
 		addr = args[sc->offset];
 		if (addr % sizeof(char *) != 0) {
 			fprintf(fp, "0x%lx", args[sc->offset]);
 			break;
 		}
 
 		len = PAGE_SIZE - (addr & PAGE_MASK);
 		if (get_struct(pid, (void *)addr, u.buf, len) == -1) {
 			fprintf(fp, "0x%lx", args[sc->offset]);
 			break;
 		}
 
 		fputc('[', fp);
 		first = 1;
 		i = 0;
 		while (u.strarray[i] != NULL) {
 			string = get_string(pid, u.strarray[i], 0);
 			fprintf(fp, "%s \"%s\"", first ? "" : ",", string);
 			free(string);
 			first = 0;
 
 			i++;
 			if (i == len / sizeof(char *)) {
 				addr += len;
 				len = PAGE_SIZE;
 				if (get_struct(pid, (void *)addr, u.buf, len) ==
 				    -1) {
 					fprintf(fp, ", <inval>");
 					break;
 				}
 				i = 0;
 			}
 		}
 		fputs(" ]", fp);
 		break;
 	}
 #ifdef __LP64__
 	case Quad:
 		fprintf(fp, "%ld", args[sc->offset]);
 		break;
 	case QuadHex:
 		fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 #else
 	case Quad:
 	case QuadHex: {
 		unsigned long long ll;
 
 #if _BYTE_ORDER == _LITTLE_ENDIAN
 		ll = (unsigned long long)args[sc->offset + 1] << 32 |
 		    args[sc->offset];
 #else
 		ll = (unsigned long long)args[sc->offset] << 32 |
 		    args[sc->offset + 1];
 #endif
 		if ((sc->type & ARG_MASK) == Quad)
 			fprintf(fp, "%lld", ll);
 		else
 			fprintf(fp, "0x%llx", ll);
 		break;
 	}
 #endif
 	case PQuadHex: {
 		uint64_t val;
 
 		if (get_struct(pid, (void *)args[sc->offset], &val,
 		    sizeof(val)) == 0) 
 			fprintf(fp, "{ 0x%jx }", (uintmax_t)val);
 		else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case Ptr:
 		fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	case Readlinkres: {
 		char *tmp2;
 
 		if (retval[0] == -1)
 			break;
 		tmp2 = get_string(pid, (void*)args[sc->offset], retval[0]);
 		fprintf(fp, "\"%s\"", tmp2);
 		free(tmp2);
 		break;
 	}
 	case Ioctl: {
 		const char *temp;
 		unsigned long cmd;
 
 		cmd = args[sc->offset];
 		temp = sysdecode_ioctlname(cmd);
 		if (temp)
 			fputs(temp, fp);
 		else {
 			fprintf(fp, "0x%lx { IO%s%s 0x%lx('%c'), %lu, %lu }",
 			    cmd, cmd & IOC_OUT ? "R" : "",
 			    cmd & IOC_IN ? "W" : "", IOCGROUP(cmd),
 			    isprint(IOCGROUP(cmd)) ? (char)IOCGROUP(cmd) : '?',
 			    cmd & 0xFF, IOCPARM_LEN(cmd));
 		}
 		break;
 	}
 	case Timespec: {
 		struct timespec ts;
 
 		if (get_struct(pid, (void *)args[sc->offset], &ts,
 		    sizeof(ts)) != -1)
 			fprintf(fp, "{ %jd.%09ld }", (intmax_t)ts.tv_sec,
 			    ts.tv_nsec);
 		else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case Timespec2: {
 		struct timespec ts[2];
 		const char *sep;
 		unsigned int i;
 
 		if (get_struct(pid, (void *)args[sc->offset], &ts, sizeof(ts))
 		    != -1) {
 			fputs("{ ", fp);
 			sep = "";
 			for (i = 0; i < nitems(ts); i++) {
 				fputs(sep, fp);
 				sep = ", ";
 				switch (ts[i].tv_nsec) {
 				case UTIME_NOW:
 					fprintf(fp, "UTIME_NOW");
 					break;
 				case UTIME_OMIT:
 					fprintf(fp, "UTIME_OMIT");
 					break;
 				default:
 					fprintf(fp, "%jd.%09ld",
 					    (intmax_t)ts[i].tv_sec,
 					    ts[i].tv_nsec);
 					break;
 				}
 			}
 			fputs(" }", fp);
 		} else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case Timeval: {
 		struct timeval tv;
 
 		if (get_struct(pid, (void *)args[sc->offset], &tv, sizeof(tv))
 		    != -1)
 			fprintf(fp, "{ %jd.%06ld }", (intmax_t)tv.tv_sec,
 			    tv.tv_usec);
 		else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case Timeval2: {
 		struct timeval tv[2];
 
 		if (get_struct(pid, (void *)args[sc->offset], &tv, sizeof(tv))
 		    != -1)
 			fprintf(fp, "{ %jd.%06ld, %jd.%06ld }",
 			    (intmax_t)tv[0].tv_sec, tv[0].tv_usec,
 			    (intmax_t)tv[1].tv_sec, tv[1].tv_usec);
 		else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case Itimerval: {
 		struct itimerval itv;
 
 		if (get_struct(pid, (void *)args[sc->offset], &itv,
 		    sizeof(itv)) != -1)
 			fprintf(fp, "{ %jd.%06ld, %jd.%06ld }",
 			    (intmax_t)itv.it_interval.tv_sec,
 			    itv.it_interval.tv_usec,
 			    (intmax_t)itv.it_value.tv_sec,
 			    itv.it_value.tv_usec);
 		else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case LinuxSockArgs:
 	{
 		struct linux_socketcall_args largs;
 
 		if (get_struct(pid, (void *)args[sc->offset], (void *)&largs,
 		    sizeof(largs)) != -1)
 			fprintf(fp, "{ %s, 0x%lx }",
 			    lookup(linux_socketcall_ops, largs.what, 10),
 			    (long unsigned int)largs.args);
 		else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case Pollfd: {
 		/*
 		 * XXX: A Pollfd argument expects the /next/ syscall argument
 		 * to be the number of fds in the array. This matches the poll
 		 * syscall.
 		 */
 		struct pollfd *pfd;
 		int numfds = args[sc->offset + 1];
 		size_t bytes = sizeof(struct pollfd) * numfds;
 		int i;
 
 		if ((pfd = malloc(bytes)) == NULL)
 			err(1, "Cannot malloc %zu bytes for pollfd array",
 			    bytes);
 		if (get_struct(pid, (void *)args[sc->offset], pfd, bytes)
 		    != -1) {
 			fputs("{", fp);
 			for (i = 0; i < numfds; i++) {
 				fprintf(fp, " %d/%s", pfd[i].fd,
 				    xlookup_bits(poll_flags, pfd[i].events));
 			}
 			fputs(" }", fp);
 		} else {
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		}
 		free(pfd);
 		break;
 	}
 	case Fd_set: {
 		/*
 		 * XXX: A Fd_set argument expects the /first/ syscall argument
 		 * to be the number of fds in the array.  This matches the
 		 * select syscall.
 		 */
 		fd_set *fds;
 		int numfds = args[0];
 		size_t bytes = _howmany(numfds, _NFDBITS) * _NFDBITS;
 		int i;
 
 		if ((fds = malloc(bytes)) == NULL)
 			err(1, "Cannot malloc %zu bytes for fd_set array",
 			    bytes);
 		if (get_struct(pid, (void *)args[sc->offset], fds, bytes)
 		    != -1) {
 			fputs("{", fp);
 			for (i = 0; i < numfds; i++) {
 				if (FD_ISSET(i, fds))
 					fprintf(fp, " %d", i);
 			}
 			fputs(" }", fp);
 		} else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		free(fds);
 		break;
 	}
 	case Signal:
 		fputs(strsig2(args[sc->offset]), fp);
 		break;
 	case Sigset: {
 		long sig;
 		sigset_t ss;
 		int i, first;
 
 		sig = args[sc->offset];
 		if (get_struct(pid, (void *)args[sc->offset], (void *)&ss,
 		    sizeof(ss)) == -1) {
 			fprintf(fp, "0x%lx", args[sc->offset]);
 			break;
 		}
 		fputs("{ ", fp);
 		first = 1;
 		for (i = 1; i < sys_nsig; i++) {
 			if (sigismember(&ss, i)) {
 				fprintf(fp, "%s%s", !first ? "|" : "",
 				    strsig2(i));
 				first = 0;
 			}
 		}
 		if (!first)
 			fputc(' ', fp);
 		fputc('}', fp);
 		break;
 	}
 	case Sigprocmask:
 		print_integer_arg(sysdecode_sigprocmask_how, fp,
 		    args[sc->offset]);
 		break;
 	case Fcntlflag:
 		/* XXX: Output depends on the value of the previous argument. */
 		if (sysdecode_fcntl_arg_p(args[sc->offset - 1]))
 			sysdecode_fcntl_arg(fp, args[sc->offset - 1],
 			    args[sc->offset], 16);
 		break;
 	case Open:
 		print_mask_arg(sysdecode_open_flags, fp, args[sc->offset]);
 		break;
 	case Fcntl:
 		print_integer_arg(sysdecode_fcntl_cmd, fp, args[sc->offset]);
 		break;
 	case Mprot:
 		print_mask_arg(sysdecode_mmap_prot, fp, args[sc->offset]);
 		break;
 	case Mmapflags:
 		print_mask_arg(sysdecode_mmap_flags, fp, args[sc->offset]);
 		break;
 	case Whence:
 		print_integer_arg(sysdecode_whence, fp, args[sc->offset]);
 		break;
 	case Sockdomain:
 		print_integer_arg(sysdecode_socketdomain, fp, args[sc->offset]);
 		break;
 	case Socktype:
 		print_mask_arg(sysdecode_socket_type, fp, args[sc->offset]);
 		break;
 	case Shutdown:
 		print_integer_arg(sysdecode_shutdown_how, fp, args[sc->offset]);
 		break;
 	case Resource:
 		print_integer_arg(sysdecode_rlimit, fp, args[sc->offset]);
 		break;
 	case RusageWho:
 		print_integer_arg(sysdecode_getrusage_who, fp, args[sc->offset]);
 		break;
 	case Pathconf:
 		print_integer_arg(sysdecode_pathconf_name, fp, args[sc->offset]);
 		break;
 	case Rforkflags:
 		print_mask_arg(sysdecode_rfork_flags, fp, args[sc->offset]);
 		break;
 	case Sockaddr: {
 		socklen_t len;
 
 		if (args[sc->offset] == 0) {
 			fputs("NULL", fp);
 			break;
 		}
 
 		/*
 		 * Extract the address length from the next argument.  If
 		 * this is an output sockaddr (OUT is set), then the
 		 * next argument is a pointer to a socklen_t.  Otherwise
 		 * the next argument contains a socklen_t by value.
 		 */
 		if (sc->type & OUT) {
 			if (get_struct(pid, (void *)args[sc->offset + 1],
 			    &len, sizeof(len)) == -1) {
 				fprintf(fp, "0x%lx", args[sc->offset]);
 				break;
 			}
 		} else
 			len = args[sc->offset + 1];
 
 		print_sockaddr(fp, trussinfo, (void *)args[sc->offset], len);
 		break;
 	}
 	case Sigaction: {
 		struct sigaction sa;
 
 		if (get_struct(pid, (void *)args[sc->offset], &sa, sizeof(sa))
 		    != -1) {
 			fputs("{ ", fp);
 			if (sa.sa_handler == SIG_DFL)
 				fputs("SIG_DFL", fp);
 			else if (sa.sa_handler == SIG_IGN)
 				fputs("SIG_IGN", fp);
 			else
 				fprintf(fp, "%p", sa.sa_handler);
 			fprintf(fp, " %s ss_t }",
 			    xlookup_bits(sigaction_flags, sa.sa_flags));
 		} else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case Kevent: {
 		/*
 		 * XXX XXX: The size of the array is determined by either the
 		 * next syscall argument, or by the syscall return value,
 		 * depending on which argument number we are.  This matches the
 		 * kevent syscall, but luckily that's the only syscall that uses
 		 * them.
 		 */
 		struct kevent *ke;
 		int numevents = -1;
 		size_t bytes;
 		int i;
 
 		if (sc->offset == 1)
 			numevents = args[sc->offset+1];
 		else if (sc->offset == 3 && retval[0] != -1)
 			numevents = retval[0];
 
 		if (numevents >= 0) {
 			bytes = sizeof(struct kevent) * numevents;
 			if ((ke = malloc(bytes)) == NULL)
 				err(1,
 				    "Cannot malloc %zu bytes for kevent array",
 				    bytes);
 		} else
 			ke = NULL;
 		if (numevents >= 0 && get_struct(pid, (void *)args[sc->offset],
 		    ke, bytes) != -1) {
 			fputc('{', fp);
 			for (i = 0; i < numevents; i++) {
 				fputc(' ', fp);
 				print_kevent(fp, &ke[i]);
 			}
 			fputs(" }", fp);
 		} else {
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		}
 		free(ke);
 		break;
 	}
 	case Kevent11: {
 		struct kevent_freebsd11 *ke11;
 		struct kevent ke;
 		int numevents = -1;
 		size_t bytes;
 		int i;
 
 		if (sc->offset == 1)
 			numevents = args[sc->offset+1];
 		else if (sc->offset == 3 && retval[0] != -1)
 			numevents = retval[0];
 
 		if (numevents >= 0) {
 			bytes = sizeof(struct kevent_freebsd11) * numevents;
 			if ((ke11 = malloc(bytes)) == NULL)
 				err(1,
 				    "Cannot malloc %zu bytes for kevent array",
 				    bytes);
 		} else
 			ke11 = NULL;
 		memset(&ke, 0, sizeof(ke));
 		if (numevents >= 0 && get_struct(pid, (void *)args[sc->offset],
 		    ke11, bytes) != -1) {
 			fputc('{', fp);
 			for (i = 0; i < numevents; i++) {
 				fputc(' ', fp);
 				ke.ident = ke11[i].ident;
 				ke.filter = ke11[i].filter;
 				ke.flags = ke11[i].flags;
 				ke.fflags = ke11[i].fflags;
 				ke.data = ke11[i].data;
 				ke.udata = ke11[i].udata;
 				print_kevent(fp, &ke);
 			}
 			fputs(" }", fp);
 		} else {
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		}
 		free(ke11);
 		break;
 	}
 	case Stat: {
 		struct stat st;
 
 		if (get_struct(pid, (void *)args[sc->offset], &st, sizeof(st))
 		    != -1) {
 			char mode[12];
 
 			strmode(st.st_mode, mode);
 			fprintf(fp,
 			    "{ mode=%s,inode=%ju,size=%jd,blksize=%ld }", mode,
 			    (uintmax_t)st.st_ino, (intmax_t)st.st_size,
 			    (long)st.st_blksize);
 		} else {
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		}
 		break;
 	}
 	case Stat11: {
 		struct freebsd11_stat st;
 
 		if (get_struct(pid, (void *)args[sc->offset], &st, sizeof(st))
 		    != -1) {
 			char mode[12];
 
 			strmode(st.st_mode, mode);
 			fprintf(fp,
 			    "{ mode=%s,inode=%ju,size=%jd,blksize=%ld }", mode,
 			    (uintmax_t)st.st_ino, (intmax_t)st.st_size,
 			    (long)st.st_blksize);
 		} else {
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		}
 		break;
 	}
 	case StatFs: {
 		unsigned int i;
 		struct statfs buf;
 
 		if (get_struct(pid, (void *)args[sc->offset], &buf,
 		    sizeof(buf)) != -1) {
 			char fsid[17];
 
 			bzero(fsid, sizeof(fsid));
 			if (buf.f_fsid.val[0] != 0 || buf.f_fsid.val[1] != 0) {
 			        for (i = 0; i < sizeof(buf.f_fsid); i++)
 					snprintf(&fsid[i*2],
 					    sizeof(fsid) - (i*2), "%02x",
 					    ((u_char *)&buf.f_fsid)[i]);
 			}
 			fprintf(fp,
 			    "{ fstypename=%s,mntonname=%s,mntfromname=%s,"
 			    "fsid=%s }", buf.f_fstypename, buf.f_mntonname,
 			    buf.f_mntfromname, fsid);
 		} else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 
 	case Rusage: {
 		struct rusage ru;
 
 		if (get_struct(pid, (void *)args[sc->offset], &ru, sizeof(ru))
 		    != -1) {
 			fprintf(fp,
 			    "{ u=%jd.%06ld,s=%jd.%06ld,in=%ld,out=%ld }",
 			    (intmax_t)ru.ru_utime.tv_sec, ru.ru_utime.tv_usec,
 			    (intmax_t)ru.ru_stime.tv_sec, ru.ru_stime.tv_usec,
 			    ru.ru_inblock, ru.ru_oublock);
 		} else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case Rlimit: {
 		struct rlimit rl;
 
 		if (get_struct(pid, (void *)args[sc->offset], &rl, sizeof(rl))
 		    != -1) {
 			fprintf(fp, "{ cur=%ju,max=%ju }",
 			    rl.rlim_cur, rl.rlim_max);
 		} else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case ExitStatus: {
 		int status;
 
 		if (get_struct(pid, (void *)args[sc->offset], &status,
 		    sizeof(status)) != -1) {
 			fputs("{ ", fp);
 			if (WIFCONTINUED(status))
 				fputs("CONTINUED", fp);
 			else if (WIFEXITED(status))
 				fprintf(fp, "EXITED,val=%d",
 				    WEXITSTATUS(status));
 			else if (WIFSIGNALED(status))
 				fprintf(fp, "SIGNALED,sig=%s%s",
 				    strsig2(WTERMSIG(status)),
 				    WCOREDUMP(status) ? ",cored" : "");
 			else
 				fprintf(fp, "STOPPED,sig=%s",
 				    strsig2(WTERMSIG(status)));
 			fputs(" }", fp);
 		} else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case Waitoptions:
 		print_mask_arg(sysdecode_wait6_options, fp, args[sc->offset]);
 		break;
 	case Idtype:
 		print_integer_arg(sysdecode_idtype, fp, args[sc->offset]);
 		break;
 	case Procctl:
 		print_integer_arg(sysdecode_procctl_cmd, fp, args[sc->offset]);
 		break;
 	case Umtxop:
 		print_integer_arg(sysdecode_umtx_op, fp, args[sc->offset]);
 		break;
 	case Atfd:
 		print_integer_arg(sysdecode_atfd, fp, args[sc->offset]);
 		break;
 	case Atflags:
 		print_mask_arg(sysdecode_atflags, fp, args[sc->offset]);
 		break;
 	case Accessmode:
 		print_mask_arg(sysdecode_access_mode, fp, args[sc->offset]);
 		break;
 	case Sysarch:
 		print_integer_arg(sysdecode_sysarch_number, fp,
 		    args[sc->offset]);
 		break;
+	case Sysctl: {
+		char name[BUFSIZ];
+		int oid[CTL_MAXNAME + 2], qoid[CTL_MAXNAME + 2];
+		size_t i;
+		int len;
+
+		memset(name, 0, sizeof(name));
+		len = args[sc->offset + 1];
+		if (get_struct(pid, (void *)args[sc->offset], oid,
+		    len * sizeof(oid[0])) != -1) {
+		    	fprintf(fp, "\"");
+			if (oid[0] == CTL_SYSCTL) {
+				fprintf(fp, "sysctl.");
+				switch (oid[1]) {
+				case CTL_SYSCTL_DEBUG:
+					fprintf(fp, "debug");
+					break;
+				case CTL_SYSCTL_NAME:
+					fprintf(fp, "name");
+					print_sysctl_oid(fp, oid + 2, len - 2);
+					break;
+				case CTL_SYSCTL_NEXT:
+					fprintf(fp, "next");
+					break;
+				case CTL_SYSCTL_NAME2OID:
+					fprintf(fp, "name2oid");
+					break;
+				case CTL_SYSCTL_OIDFMT:
+					fprintf(fp, "oidfmt");
+					print_sysctl_oid(fp, oid + 2, len - 2);
+					break;
+				case CTL_SYSCTL_OIDDESCR:
+					fprintf(fp, "oiddescr");
+					print_sysctl_oid(fp, oid + 2, len - 2);
+					break;
+				case CTL_SYSCTL_OIDLABEL:
+					fprintf(fp, "oidlabel");
+					print_sysctl_oid(fp, oid + 2, len - 2);
+					break;
+				default:
+					print_sysctl_oid(fp, oid + 1, len - 1);
+				}
+			} else {
+				qoid[0] = CTL_SYSCTL;
+				qoid[1] = CTL_SYSCTL_NAME;
+				memcpy(qoid + 2, oid, len * sizeof(int));
+				i = sizeof(name);
+				if (sysctl(qoid, len + 2, name, &i, 0, 0) == -1)
+					print_sysctl_oid(fp, qoid + 2, len);
+				else
+					fprintf(fp, "%s", name);
+			}
+		    	fprintf(fp, "\"");
+		}
+		break;
+	}
 	case PipeFds:
 		/*
 		 * The pipe() system call in the kernel returns its
 		 * two file descriptors via return values.  However,
 		 * the interface exposed by libc is that pipe()
 		 * accepts a pointer to an array of descriptors.
 		 * Format the output to match the libc API by printing
 		 * the returned file descriptors as a fake argument.
 		 *
 		 * Overwrite the first retval to signal a successful
 		 * return as well.
 		 */
 		fprintf(fp, "{ %d, %d }", (int)retval[0], (int)retval[1]);
 		retval[0] = 0;
 		break;
 	case Utrace: {
 		size_t len;
 		void *utrace_addr;
 
 		len = args[sc->offset + 1];
 		utrace_addr = calloc(1, len);
 		if (get_struct(pid, (void *)args[sc->offset],
 		    (void *)utrace_addr, len) != -1)
 			print_utrace(fp, utrace_addr, len);
 		else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		free(utrace_addr);
 		break;
 	}
 	case IntArray: {
 		int descriptors[16];
 		unsigned long i, ndescriptors;
 		bool truncated;
 
 		ndescriptors = args[sc->offset + 1];
 		truncated = false;
 		if (ndescriptors > nitems(descriptors)) {
 			ndescriptors = nitems(descriptors);
 			truncated = true;
 		}
 		if (get_struct(pid, (void *)args[sc->offset],
 		    descriptors, ndescriptors * sizeof(descriptors[0])) != -1) {
 			fprintf(fp, "{");
 			for (i = 0; i < ndescriptors; i++)
 				fprintf(fp, i == 0 ? " %d" : ", %d",
 				    descriptors[i]);
 			fprintf(fp, truncated ? ", ... }" : " }");
 		} else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case Pipe2:
 		print_mask_arg(sysdecode_pipe2_flags, fp, args[sc->offset]);
 		break;
 	case CapFcntlRights: {
 		uint32_t rights;
 
 		if (sc->type & OUT) {
 			if (get_struct(pid, (void *)args[sc->offset], &rights,
 			    sizeof(rights)) == -1) {
 				fprintf(fp, "0x%lx", args[sc->offset]);
 				break;
 			}
 		} else
 			rights = args[sc->offset];
 		print_mask_arg32(sysdecode_cap_fcntlrights, fp, rights);
 		break;
 	}
 	case Fadvice:
 		print_integer_arg(sysdecode_fadvice, fp, args[sc->offset]);
 		break;
 	case FileFlags: {
 		fflags_t rem;
 
 		if (!sysdecode_fileflags(fp, args[sc->offset], &rem))
 			fprintf(fp, "0x%x", rem);
 		else if (rem != 0)
 			fprintf(fp, "|0x%x", rem);
 		break;
 	}
 	case Flockop:
 		print_mask_arg(sysdecode_flock_operation, fp, args[sc->offset]);
 		break;
 	case Getfsstatmode:
 		print_integer_arg(sysdecode_getfsstat_mode, fp,
 		    args[sc->offset]);
 		break;
 	case Kldsymcmd:
 		print_integer_arg(sysdecode_kldsym_cmd, fp, args[sc->offset]);
 		break;
 	case Kldunloadflags:
 		print_integer_arg(sysdecode_kldunload_flags, fp,
 		    args[sc->offset]);
 		break;
 	case Madvice:
 		print_integer_arg(sysdecode_madvice, fp, args[sc->offset]);
 		break;
 	case Socklent:
 		fprintf(fp, "%u", (socklen_t)args[sc->offset]);
 		break;
 	case Sockprotocol: {
 		const char *temp;
 		int domain, protocol;
 
 		domain = args[sc->offset - 2];
 		protocol = args[sc->offset];
 		if (protocol == 0) {
 			fputs("0", fp);
 		} else {
 			temp = sysdecode_socket_protocol(domain, protocol);
 			if (temp) {
 				fputs(temp, fp);
 			} else {
 				fprintf(fp, "%d", protocol);
 			}
 		}
 		break;
 	}
 	case Sockoptlevel:
 		print_integer_arg(sysdecode_sockopt_level, fp,
 		    args[sc->offset]);
 		break;
 	case Sockoptname: {
 		const char *temp;
 		int level, name;
 
 		level = args[sc->offset - 1];
 		name = args[sc->offset];
 		temp = sysdecode_sockopt_name(level, name);
 		if (temp) {
 			fputs(temp, fp);
 		} else {
 			fprintf(fp, "%d", name);
 		}
 		break;
 	}
 	case Msgflags:
 		print_mask_arg(sysdecode_msg_flags, fp, args[sc->offset]);
 		break;
 	case CapRights: {
 		cap_rights_t rights;
 
 		if (get_struct(pid, (void *)args[sc->offset], &rights,
 		    sizeof(rights)) != -1) {
 			fputs("{ ", fp);
 			sysdecode_cap_rights(fp, &rights);
 			fputs(" }", fp);
 		} else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case Acltype:
 		print_integer_arg(sysdecode_acltype, fp, args[sc->offset]);
 		break;
 	case Extattrnamespace:
 		print_integer_arg(sysdecode_extattrnamespace, fp,
 		    args[sc->offset]);
 		break;
 	case Minherit:
 		print_integer_arg(sysdecode_minherit_inherit, fp,
 		    args[sc->offset]);
 		break;
 	case Mlockall:
 		print_mask_arg(sysdecode_mlockall_flags, fp, args[sc->offset]);
 		break;
 	case Mountflags:
 		print_mask_arg(sysdecode_mount_flags, fp, args[sc->offset]);
 		break;
 	case Msync:
 		print_mask_arg(sysdecode_msync_flags, fp, args[sc->offset]);
 		break;
 	case Priowhich:
 		print_integer_arg(sysdecode_prio_which, fp, args[sc->offset]);
 		break;
 	case Ptraceop:
 		print_integer_arg(sysdecode_ptrace_request, fp,
 		    args[sc->offset]);
 		break;
 	case Quotactlcmd:
 		if (!sysdecode_quotactl_cmd(fp, args[sc->offset]))
 			fprintf(fp, "%#x", (int)args[sc->offset]);
 		break;
 	case Reboothowto:
 		print_mask_arg(sysdecode_reboot_howto, fp, args[sc->offset]);
 		break;
 	case Rtpriofunc:
 		print_integer_arg(sysdecode_rtprio_function, fp,
 		    args[sc->offset]);
 		break;
 	case Schedpolicy:
 		print_integer_arg(sysdecode_scheduler_policy, fp,
 		    args[sc->offset]);
 		break;
 	case Schedparam: {
 		struct sched_param sp;
 
 		if (get_struct(pid, (void *)args[sc->offset], &sp,
 		    sizeof(sp)) != -1)
 			fprintf(fp, "{ %d }", sp.sched_priority);
 		else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case PSig: {
 		int sig;
 
 		if (get_struct(pid, (void *)args[sc->offset], &sig,
 		    sizeof(sig)) == 0) 
 			fprintf(fp, "{ %s }", strsig2(sig));
 		else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case Siginfo: {
 		siginfo_t si;
 
 		if (get_struct(pid, (void *)args[sc->offset], &si,
 		    sizeof(si)) != -1) {
 			fprintf(fp, "{ signo=%s", strsig2(si.si_signo));
 			decode_siginfo(fp, &si);
 			fprintf(fp, " }");
 		} else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case Iovec:
 		/*
 		 * Print argument as an array of struct iovec, where the next
 		 * syscall argument is the number of elements of the array.
 		 */
 
 		print_iovec(fp, trussinfo, (void *)args[sc->offset],
 		    (int)args[sc->offset + 1]);
 		break;
 	case Sctpsndrcvinfo: {
 		struct sctp_sndrcvinfo info;
 
 		if (get_struct(pid, (void *)args[sc->offset],
 		    &info, sizeof(struct sctp_sndrcvinfo)) == -1) {
 			fprintf(fp, "0x%lx", args[sc->offset]);
 			break;
 		}
 		print_sctp_sndrcvinfo(fp, sc->type & OUT, &info);
 		break;
 	}
 	case Msghdr: {
 		struct msghdr msghdr;
 
 		if (get_struct(pid, (void *)args[sc->offset],
 		    &msghdr, sizeof(struct msghdr)) == -1) {
 			fprintf(fp, "0x%lx", args[sc->offset]);
 			break;
 		}
 		fputs("{", fp);
 		print_sockaddr(fp, trussinfo, msghdr.msg_name, msghdr.msg_namelen);
 		fprintf(fp, ",%d,", msghdr.msg_namelen);
 		print_iovec(fp, trussinfo, msghdr.msg_iov, msghdr.msg_iovlen);
 		fprintf(fp, ",%d,", msghdr.msg_iovlen);
 		print_cmsgs(fp, pid, sc->type & OUT, &msghdr);
 		fprintf(fp, ",%u,", msghdr.msg_controllen);
 		print_mask_arg(sysdecode_msg_flags, fp, msghdr.msg_flags);
 		fputs("}", fp);
 		break;
 	}
 
 	case CloudABIAdvice:
 		fputs(xlookup(cloudabi_advice, args[sc->offset]), fp);
 		break;
 	case CloudABIClockID:
 		fputs(xlookup(cloudabi_clockid, args[sc->offset]), fp);
 		break;
 	case CloudABIFDSFlags:
 		fputs(xlookup_bits(cloudabi_fdsflags, args[sc->offset]), fp);
 		break;
 	case CloudABIFDStat: {
 		cloudabi_fdstat_t fds;
 		if (get_struct(pid, (void *)args[sc->offset], &fds, sizeof(fds))
 		    != -1) {
 			fprintf(fp, "{ %s, ",
 			    xlookup(cloudabi_filetype, fds.fs_filetype));
 			fprintf(fp, "%s, ... }",
 			    xlookup_bits(cloudabi_fdflags, fds.fs_flags));
 		} else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case CloudABIFileStat: {
 		cloudabi_filestat_t fsb;
 		if (get_struct(pid, (void *)args[sc->offset], &fsb, sizeof(fsb))
 		    != -1)
 			fprintf(fp, "{ %s, %ju }",
 			    xlookup(cloudabi_filetype, fsb.st_filetype),
 			    (uintmax_t)fsb.st_size);
 		else
 			fprintf(fp, "0x%lx", args[sc->offset]);
 		break;
 	}
 	case CloudABIFileType:
 		fputs(xlookup(cloudabi_filetype, args[sc->offset]), fp);
 		break;
 	case CloudABIFSFlags:
 		fputs(xlookup_bits(cloudabi_fsflags, args[sc->offset]), fp);
 		break;
 	case CloudABILookup:
 		if ((args[sc->offset] & CLOUDABI_LOOKUP_SYMLINK_FOLLOW) != 0)
 			fprintf(fp, "%d|LOOKUP_SYMLINK_FOLLOW",
 			    (int)args[sc->offset]);
 		else
 			fprintf(fp, "%d", (int)args[sc->offset]);
 		break;
 	case CloudABIMFlags:
 		fputs(xlookup_bits(cloudabi_mflags, args[sc->offset]), fp);
 		break;
 	case CloudABIMProt:
 		fputs(xlookup_bits(cloudabi_mprot, args[sc->offset]), fp);
 		break;
 	case CloudABIMSFlags:
 		fputs(xlookup_bits(cloudabi_msflags, args[sc->offset]), fp);
 		break;
 	case CloudABIOFlags:
 		fputs(xlookup_bits(cloudabi_oflags, args[sc->offset]), fp);
 		break;
 	case CloudABISDFlags:
 		fputs(xlookup_bits(cloudabi_sdflags, args[sc->offset]), fp);
 		break;
 	case CloudABISignal:
 		fputs(xlookup(cloudabi_signal, args[sc->offset]), fp);
 		break;
 	case CloudABITimestamp:
 		fprintf(fp, "%lu.%09lus", args[sc->offset] / 1000000000,
 		    args[sc->offset] % 1000000000);
 		break;
 	case CloudABIULFlags:
 		fputs(xlookup_bits(cloudabi_ulflags, args[sc->offset]), fp);
 		break;
 	case CloudABIWhence:
 		fputs(xlookup(cloudabi_whence, args[sc->offset]), fp);
 		break;
 
 	default:
 		errx(1, "Invalid argument type %d\n", sc->type & ARG_MASK);
 	}
 	fclose(fp);
 	return (tmp);
 }
 
 /*
  * Print (to outfile) the system call and its arguments.
  */
 void
 print_syscall(struct trussinfo *trussinfo)
 {
 	struct threadinfo *t;
 	const char *name;
 	char **s_args;
 	int i, len, nargs;
 
 	t = trussinfo->curthread;
 
 	name = t->cs.sc->name;
 	nargs = t->cs.nargs;
 	s_args = t->cs.s_args;
 
 	len = print_line_prefix(trussinfo);
 	len += fprintf(trussinfo->outfile, "%s(", name);
 
 	for (i = 0; i < nargs; i++) {
 		if (s_args[i] != NULL)
 			len += fprintf(trussinfo->outfile, "%s", s_args[i]);
 		else
 			len += fprintf(trussinfo->outfile,
 			    "<missing argument>");
 		len += fprintf(trussinfo->outfile, "%s", i < (nargs - 1) ?
 		    "," : "");
 	}
 	len += fprintf(trussinfo->outfile, ")");
 	for (i = 0; i < 6 - (len / 8); i++)
 		fprintf(trussinfo->outfile, "\t");
 }
 
 void
 print_syscall_ret(struct trussinfo *trussinfo, int error, register_t *retval)
 {
 	struct timespec timediff;
 	struct threadinfo *t;
 	struct syscall *sc;
 
 	t = trussinfo->curthread;
 	sc = t->cs.sc;
 	if (trussinfo->flags & COUNTONLY) {
 		timespecsub(&t->after, &t->before, &timediff);
 		timespecadd(&sc->time, &timediff, &sc->time);
 		sc->ncalls++;
 		if (error != 0)
 			sc->nerror++;
 		return;
 	}
 
 	print_syscall(trussinfo);
 	fflush(trussinfo->outfile);
 
 	if (retval == NULL) {
 		/*
 		 * This system call resulted in the current thread's exit,
 		 * so there is no return value or error to display.
 		 */
 		fprintf(trussinfo->outfile, "\n");
 		return;
 	}
 
 	if (error == ERESTART)
 		fprintf(trussinfo->outfile, " ERESTART\n");
 	else if (error == EJUSTRETURN)
 		fprintf(trussinfo->outfile, " EJUSTRETURN\n");
 	else if (error != 0) {
 		fprintf(trussinfo->outfile, " ERR#%d '%s'\n",
 		    sysdecode_freebsd_to_abi_errno(t->proc->abi->abi, error),
 		    strerror(error));
 	}
 #ifndef __LP64__
 	else if (sc->ret_type == 2) {
 		off_t off;
 
 #if _BYTE_ORDER == _LITTLE_ENDIAN
 		off = (off_t)retval[1] << 32 | retval[0];
 #else
 		off = (off_t)retval[0] << 32 | retval[1];
 #endif
 		fprintf(trussinfo->outfile, " = %jd (0x%jx)\n", (intmax_t)off,
 		    (intmax_t)off);
 	}
 #endif
 	else
 		fprintf(trussinfo->outfile, " = %jd (0x%jx)\n",
 		    (intmax_t)retval[0], (intmax_t)retval[0]);
 }
 
 void
 print_summary(struct trussinfo *trussinfo)
 {
 	struct timespec total = {0, 0};
 	struct syscall *sc;
 	int ncall, nerror;
 
 	fprintf(trussinfo->outfile, "%-20s%15s%8s%8s\n",
 	    "syscall", "seconds", "calls", "errors");
 	ncall = nerror = 0;
 	STAILQ_FOREACH(sc, &syscalls, entries)
 		if (sc->ncalls) {
 			fprintf(trussinfo->outfile, "%-20s%5jd.%09ld%8d%8d\n",
 			    sc->name, (intmax_t)sc->time.tv_sec,
 			    sc->time.tv_nsec, sc->ncalls, sc->nerror);
 			timespecadd(&total, &sc->time, &total);
 			ncall += sc->ncalls;
 			nerror += sc->nerror;
 		}
 	fprintf(trussinfo->outfile, "%20s%15s%8s%8s\n",
 	    "", "-------------", "-------", "-------");
 	fprintf(trussinfo->outfile, "%-20s%5jd.%09ld%8d%8d\n",
 	    "", (intmax_t)total.tv_sec, total.tv_nsec, ncall, nerror);
 }
Index: projects/clang900-import/usr.sbin/freebsd-update/freebsd-update.8
===================================================================
--- projects/clang900-import/usr.sbin/freebsd-update/freebsd-update.8	(revision 352536)
+++ projects/clang900-import/usr.sbin/freebsd-update/freebsd-update.8	(revision 352537)
@@ -1,200 +1,200 @@
 .\"-
 .\" Copyright 2006, 2007 Colin Percival
 .\" All rights reserved
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted providing that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 .\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 .\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 .\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 .\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd June 14, 2017
 .Dt FREEBSD-UPDATE 8
 .Os
 .Sh NAME
 .Nm freebsd-update
 .Nd fetch and install binary updates to FreeBSD
 .Sh SYNOPSIS
 .Nm
 .Op Fl b Ar basedir
 .Op Fl d Ar workdir
 .Op Fl f Ar conffile
 .Op Fl F
 .Op Fl k Ar KEY
 .Op Fl r Ar newrelease
 .Op Fl s Ar server
 .Op Fl t Ar address
 .Op Fl -not-running-from-cron
 .Cm command ...
 .Sh DESCRIPTION
 The
 .Nm
 tool is used to fetch, install, and rollback binary
 updates to the
 .Fx
 base system.
 Note that updates are only available if they are being built for the
 .Fx
 release and architecture being used; in particular, the
 .Fx
 Security Team only builds updates for releases shipped in binary form
 by the
 .Fx
 Release Engineering Team, e.g.,
 .Fx
 11.2-RELEASE and
 .Fx
 12.0-RELEASE, but not
 .Fx
 11.2-STABLE or
 .Fx
 13.0-CURRENT.
 .Sh OPTIONS
 The following options are supported:
 .Bl -tag -width "-r newrelease"
 .It Fl b Ar basedir
 Operate on a system mounted at
 .Ar basedir .
 (default:
 .Pa / ,
 or as given in the configuration file.)
 .It Fl d Ar workdir
 Store working files in
 .Ar workdir .
 (default:
 .Pa /var/db/freebsd-update/ ,
 or as given in the configuration file.)
 .It Fl f Ar conffile
 Read configuration options from
 .Ar conffile .
 (default:
 .Pa /etc/freebsd-update.conf )
 .It Fl F
 Force
 .Nm Cm fetch
 to proceed in the case of an unfinished upgrade.
 .It Fl k Ar KEY
 Trust an RSA key with SHA256 of
 .Ar KEY .
 (default: read value from configuration file.)
 .It Fl r Ar newrelease
-Specify the new release (e.g. 11.2-RELEASE) to which
+Specify the new release (e.g., 11.2-RELEASE) to which
 .Nm
 should upgrade (upgrade command only).
 .It Fl s Ar server
 Fetch files from the specified server or server pool.
 (default: read value from configuration file.)
 .It Fl t Ar address
 Mail output of
 .Cm cron
 command, if any, to
 .Ar address .
 (default: root, or as given in the configuration file.)
 .It Fl -not-running-from-cron
 Force
 .Nm Cm fetch
 to proceed when there is no controlling tty.
 This is for use by automated scripts and orchestration tools.
 Please do not run
 .Nm Cm fetch
 from crontab or similar using this flag, see:
 .Nm Cm cron
 .It Fl -currently-running Ar release
 Do not detect the currently-running release; instead, assume that the
 system is running the specified
 .Ar release .
 This is most likely to be useful when upgrading jails.
 .El
 .Sh COMMANDS
 The
 .Cm command
 can be any one of the following:
 .Bl -tag -width "rollback"
 .It Cm fetch
 Based on the currently installed world and the configuration
 options set, fetch all available binary updates.
 .It Cm cron
 Sleep a random amount of time between 1 and 3600 seconds,
 then download updates as if the
 .Cm fetch
 command was used.
 If updates are downloaded, an email will be sent
 (to root or a different address if specified via the
 .Fl t
 option or in the configuration file).
 As the name suggests, this command is designed for running
 from
 .Xr cron 8 ;
 the random delay serves to minimize the probability that
 a large number of machines will simultaneously attempt to
 fetch updates.
 .It Cm upgrade
 Fetch files necessary for upgrading to a new release.
 Before using this command, make sure that you read the
 announcement and release notes for the new release in
 case there are any special steps needed for upgrading.
 Note that this command may require up to 500 MB of space in
 .Ar workdir
 depending on which components of the
 .Fx
 base system are installed.
 .It Cm install
 Install the most recently fetched updates or upgrade.
 .It Cm rollback
 Uninstall the most recently installed updates.
 .It Cm IDS
 Compare the system against a "known good" index of the
 installed release.
 .El
 .Sh TIPS
 .Bl -bullet
 .It
 If your clock is set to local time, adding the line
 .Pp
 .Dl 0 3 * * * root /usr/sbin/freebsd-update cron
 .Pp
 to /etc/crontab will check for updates every night.
 If your clock is set to UTC, please pick a random time
 other than 3AM, to avoid overly imposing an uneven load
 on the server(s) hosting the updates.
 .It
 In spite of its name,
 .Nm
 IDS should not be relied upon as an "Intrusion Detection
 System", since if the system has been tampered with
 it cannot be trusted to operate correctly.
 If you intend to use this command for intrusion-detection
 purposes, make sure you boot from a secure disk (e.g., a CD).
 .El
 .Sh FILES
 .Bl -tag -width "/etc/freebsd-update.conf"
 .It Pa /etc/freebsd-update.conf
 Default location of the
 .Nm
 configuration file.
 .It Pa /var/db/freebsd-update/
 Default location where
 .Nm
 stores temporary files and downloaded updates.
 .El
 .Sh SEE ALSO
 .Xr freebsd-update.conf 5
 .Sh AUTHORS
 .An Colin Percival Aq Mt cperciva@FreeBSD.org
Index: projects/clang900-import/usr.sbin/freebsd-update/freebsd-update.sh
===================================================================
--- projects/clang900-import/usr.sbin/freebsd-update/freebsd-update.sh	(revision 352536)
+++ projects/clang900-import/usr.sbin/freebsd-update/freebsd-update.sh	(revision 352537)
@@ -1,3361 +1,3361 @@
 #!/bin/sh
 
 #-
 # SPDX-License-Identifier: BSD-2-Clause-FreeBSD
 #
 # Copyright 2004-2007 Colin Percival
 # All rights reserved
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted providing that the following conditions 
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
 # $FreeBSD$
 
 #### Usage function -- called from command-line handling code.
 
 # Usage instructions.  Options not listed:
 # --debug	-- don't filter output from utilities
 # --no-stats	-- don't show progress statistics while fetching files
 usage () {
 	cat <<EOF
 usage: `basename $0` [options] command ... [path]
 
 Options:
   -b basedir   -- Operate on a system mounted at basedir
                   (default: /)
   -d workdir   -- Store working files in workdir
                   (default: /var/db/freebsd-update/)
   -f conffile  -- Read configuration options from conffile
                   (default: /etc/freebsd-update.conf)
   -F           -- Force a fetch operation to proceed in the
                   case of an unfinished upgrade
   -k KEY       -- Trust an RSA key with SHA256 hash of KEY
   -r release   -- Target for upgrade (e.g., 11.1-RELEASE)
   -s server    -- Server from which to fetch updates
                   (default: update.FreeBSD.org)
   -t address   -- Mail output of cron command, if any, to address
                   (default: root)
   --not-running-from-cron
                -- Run without a tty, for use by automated tools
   --currently-running release
                -- Update as if currently running this release
 Commands:
   fetch        -- Fetch updates from server
   cron         -- Sleep rand(3600) seconds, fetch updates, and send an
                   email if updates were found
   upgrade      -- Fetch upgrades to FreeBSD version specified via -r option
   install      -- Install downloaded updates or upgrades
   rollback     -- Uninstall most recently installed updates
-  IDS          -- Compare the system against an index of "known good" files.
+  IDS          -- Compare the system against an index of "known good" files
 EOF
 	exit 0
 }
 
 #### Configuration processing functions
 
 #-
 # Configuration options are set in the following order of priority:
 # 1. Command line options
 # 2. Configuration file options
 # 3. Default options
 # In addition, certain options (e.g., IgnorePaths) can be specified multiple
 # times and (as long as these are all in the same place, e.g., inside the
 # configuration file) they will accumulate.  Finally, because the path to the
 # configuration file can be specified at the command line, the entire command
 # line must be processed before we start reading the configuration file.
 #
 # Sound like a mess?  It is.  Here's how we handle this:
 # 1. Initialize CONFFILE and all the options to "".
 # 2. Process the command line.  Throw an error if a non-accumulating option
 #    is specified twice.
 # 3. If CONFFILE is "", set CONFFILE to /etc/freebsd-update.conf .
 # 4. For all the configuration options X, set X_saved to X.
 # 5. Initialize all the options to "".
 # 6. Read CONFFILE line by line, parsing options.
 # 7. For each configuration option X, set X to X_saved iff X_saved is not "".
 # 8. Repeat steps 4-7, except setting options to their default values at (6).
 
 CONFIGOPTIONS="KEYPRINT WORKDIR SERVERNAME MAILTO ALLOWADD ALLOWDELETE
     KEEPMODIFIEDMETADATA COMPONENTS IGNOREPATHS UPDATEIFUNMODIFIED
     BASEDIR VERBOSELEVEL TARGETRELEASE STRICTCOMPONENTS MERGECHANGES
     IDSIGNOREPATHS BACKUPKERNEL BACKUPKERNELDIR BACKUPKERNELSYMBOLFILES"
 
 # Set all the configuration options to "".
 nullconfig () {
 	for X in ${CONFIGOPTIONS}; do
 		eval ${X}=""
 	done
 }
 
 # For each configuration option X, set X_saved to X.
 saveconfig () {
 	for X in ${CONFIGOPTIONS}; do
 		eval ${X}_saved=\$${X}
 	done
 }
 
 # For each configuration option X, set X to X_saved if X_saved is not "".
 mergeconfig () {
 	for X in ${CONFIGOPTIONS}; do
 		eval _=\$${X}_saved
 		if ! [ -z "${_}" ]; then
 			eval ${X}=\$${X}_saved
 		fi
 	done
 }
 
 # Set the trusted keyprint.
 config_KeyPrint () {
 	if [ -z ${KEYPRINT} ]; then
 		KEYPRINT=$1
 	else
 		return 1
 	fi
 }
 
 # Set the working directory.
 config_WorkDir () {
 	if [ -z ${WORKDIR} ]; then
 		WORKDIR=$1
 	else
 		return 1
 	fi
 }
 
 # Set the name of the server (pool) from which to fetch updates
 config_ServerName () {
 	if [ -z ${SERVERNAME} ]; then
 		SERVERNAME=$1
 	else
 		return 1
 	fi
 }
 
 # Set the address to which 'cron' output will be mailed.
 config_MailTo () {
 	if [ -z ${MAILTO} ]; then
 		MAILTO=$1
 	else
 		return 1
 	fi
 }
 
 # Set whether FreeBSD Update is allowed to add files (or directories, or
 # symlinks) which did not previously exist.
 config_AllowAdd () {
 	if [ -z ${ALLOWADD} ]; then
 		case $1 in
 		[Yy][Ee][Ss])
 			ALLOWADD=yes
 			;;
 		[Nn][Oo])
 			ALLOWADD=no
 			;;
 		*)
 			return 1
 			;;
 		esac
 	else
 		return 1
 	fi
 }
 
 # Set whether FreeBSD Update is allowed to remove files/directories/symlinks.
 config_AllowDelete () {
 	if [ -z ${ALLOWDELETE} ]; then
 		case $1 in
 		[Yy][Ee][Ss])
 			ALLOWDELETE=yes
 			;;
 		[Nn][Oo])
 			ALLOWDELETE=no
 			;;
 		*)
 			return 1
 			;;
 		esac
 	else
 		return 1
 	fi
 }
 
 # Set whether FreeBSD Update should keep existing inode ownership,
 # permissions, and flags, in the event that they have been modified locally
 # after the release.
 config_KeepModifiedMetadata () {
 	if [ -z ${KEEPMODIFIEDMETADATA} ]; then
 		case $1 in
 		[Yy][Ee][Ss])
 			KEEPMODIFIEDMETADATA=yes
 			;;
 		[Nn][Oo])
 			KEEPMODIFIEDMETADATA=no
 			;;
 		*)
 			return 1
 			;;
 		esac
 	else
 		return 1
 	fi
 }
 
 # Add to the list of components which should be kept updated.
 config_Components () {
 	for C in $@; do
 		if [ "$C" = "src" ]; then
 			if [ -e "${BASEDIR}/usr/src/COPYRIGHT" ]; then
 				COMPONENTS="${COMPONENTS} ${C}"
 			else
 				echo "src component not installed, skipped"
 			fi
 		else
 			COMPONENTS="${COMPONENTS} ${C}"
 		fi
 	done
 }
 
 # Add to the list of paths under which updates will be ignored.
 config_IgnorePaths () {
 	for C in $@; do
 		IGNOREPATHS="${IGNOREPATHS} ${C}"
 	done
 }
 
 # Add to the list of paths which IDS should ignore.
 config_IDSIgnorePaths () {
 	for C in $@; do
 		IDSIGNOREPATHS="${IDSIGNOREPATHS} ${C}"
 	done
 }
 
 # Add to the list of paths within which updates will be performed only if the
 # file on disk has not been modified locally.
 config_UpdateIfUnmodified () {
 	for C in $@; do
 		UPDATEIFUNMODIFIED="${UPDATEIFUNMODIFIED} ${C}"
 	done
 }
 
 # Add to the list of paths within which updates to text files will be merged
 # instead of overwritten.
 config_MergeChanges () {
 	for C in $@; do
 		MERGECHANGES="${MERGECHANGES} ${C}"
 	done
 }
 
 # Work on a FreeBSD installation mounted under $1
 config_BaseDir () {
 	if [ -z ${BASEDIR} ]; then
 		BASEDIR=$1
 	else
 		return 1
 	fi
 }
 
 # When fetching upgrades, should we assume the user wants exactly the
 # components listed in COMPONENTS, rather than trying to guess based on
 # what's currently installed?
 config_StrictComponents () {
 	if [ -z ${STRICTCOMPONENTS} ]; then
 		case $1 in
 		[Yy][Ee][Ss])
 			STRICTCOMPONENTS=yes
 			;;
 		[Nn][Oo])
 			STRICTCOMPONENTS=no
 			;;
 		*)
 			return 1
 			;;
 		esac
 	else
 		return 1
 	fi
 }
 
 # Upgrade to FreeBSD $1
 config_TargetRelease () {
 	if [ -z ${TARGETRELEASE} ]; then
 		TARGETRELEASE=$1
 	else
 		return 1
 	fi
 	if echo ${TARGETRELEASE} | grep -qE '^[0-9.]+$'; then
 		TARGETRELEASE="${TARGETRELEASE}-RELEASE"
 	fi
 }
 
 # Pretend current release is FreeBSD $1
 config_SourceRelease () {
 	UNAME_r=$1
 	if echo ${UNAME_r} | grep -qE '^[0-9.]+$'; then
 		UNAME_r="${UNAME_r}-RELEASE"
 	fi
 	export UNAME_r
 }
 
 # Define what happens to output of utilities
 config_VerboseLevel () {
 	if [ -z ${VERBOSELEVEL} ]; then
 		case $1 in
 		[Dd][Ee][Bb][Uu][Gg])
 			VERBOSELEVEL=debug
 			;;
 		[Nn][Oo][Ss][Tt][Aa][Tt][Ss])
 			VERBOSELEVEL=nostats
 			;;
 		[Ss][Tt][Aa][Tt][Ss])
 			VERBOSELEVEL=stats
 			;;
 		*)
 			return 1
 			;;
 		esac
 	else
 		return 1
 	fi
 }
 
 config_BackupKernel () {
 	if [ -z ${BACKUPKERNEL} ]; then
 		case $1 in
 		[Yy][Ee][Ss])
 			BACKUPKERNEL=yes
 			;;
 		[Nn][Oo])
 			BACKUPKERNEL=no
 			;;
 		*)
 			return 1
 			;;
 		esac
 	else
 		return 1
 	fi
 }
 
 config_BackupKernelDir () {
 	if [ -z ${BACKUPKERNELDIR} ]; then
 		if [ -z "$1" ]; then
 			echo "BackupKernelDir set to empty dir"
 			return 1
 		fi
 
 		# We check for some paths which would be extremely odd
 		# to use, but which could cause a lot of problems if
 		# used.
 		case $1 in
 		/|/bin|/boot|/etc|/lib|/libexec|/sbin|/usr|/var)
 			echo "BackupKernelDir set to invalid path $1"
 			return 1
 			;;
 		/*)
 			BACKUPKERNELDIR=$1
 			;;
 		*)
 			echo "BackupKernelDir ($1) is not an absolute path"
 			return 1
 			;;
 		esac
 	else
 		return 1
 	fi
 }
 
 config_BackupKernelSymbolFiles () {
 	if [ -z ${BACKUPKERNELSYMBOLFILES} ]; then
 		case $1 in
 		[Yy][Ee][Ss])
 			BACKUPKERNELSYMBOLFILES=yes
 			;;
 		[Nn][Oo])
 			BACKUPKERNELSYMBOLFILES=no
 			;;
 		*)
 			return 1
 			;;
 		esac
 	else
 		return 1
 	fi
 }
 
 # Handle one line of configuration
 configline () {
 	if [ $# -eq 0 ]; then
 		return
 	fi
 
 	OPT=$1
 	shift
 	config_${OPT} $@
 }
 
 #### Parameter handling functions.
 
 # Initialize parameters to null, just in case they're
 # set in the environment.
 init_params () {
 	# Configration settings
 	nullconfig
 
 	# No configuration file set yet
 	CONFFILE=""
 
 	# No commands specified yet
 	COMMANDS=""
 
 	# Force fetch to proceed
 	FORCEFETCH=0
 
 	# Run without a TTY
 	NOTTYOK=0
 
 	# Fetched first in a chain of commands
 	ISFETCHED=0
 }
 
 # Parse the command line
 parse_cmdline () {
 	while [ $# -gt 0 ]; do
 		case "$1" in
 		# Location of configuration file
 		-f)
 			if [ $# -eq 1 ]; then usage; fi
 			if [ ! -z "${CONFFILE}" ]; then usage; fi
 			shift; CONFFILE="$1"
 			;;
 		-F)
 			FORCEFETCH=1
 			;;
 		--not-running-from-cron)
 			NOTTYOK=1
 			;;
 		--currently-running)
 			shift
 			config_SourceRelease $1 || usage
 			;;
 
 		# Configuration file equivalents
 		-b)
 			if [ $# -eq 1 ]; then usage; fi; shift
 			config_BaseDir $1 || usage
 			;;
 		-d)
 			if [ $# -eq 1 ]; then usage; fi; shift
 			config_WorkDir $1 || usage
 			;;
 		-k)
 			if [ $# -eq 1 ]; then usage; fi; shift
 			config_KeyPrint $1 || usage
 			;;
 		-s)
 			if [ $# -eq 1 ]; then usage; fi; shift
 			config_ServerName $1 || usage
 			;;
 		-r)
 			if [ $# -eq 1 ]; then usage; fi; shift
 			config_TargetRelease $1 || usage
 			;;
 		-t)
 			if [ $# -eq 1 ]; then usage; fi; shift
 			config_MailTo $1 || usage
 			;;
 		-v)
 			if [ $# -eq 1 ]; then usage; fi; shift
 			config_VerboseLevel $1 || usage
 			;;
 
 		# Aliases for "-v debug" and "-v nostats"
 		--debug)
 			config_VerboseLevel debug || usage
 			;;
 		--no-stats)
 			config_VerboseLevel nostats || usage
 			;;
 
 		# Commands
 		cron | fetch | upgrade | install | rollback | IDS)
 			COMMANDS="${COMMANDS} $1"
 			;;
 
 		# Anything else is an error
 		*)
 			usage
 			;;
 		esac
 		shift
 	done
 
 	# Make sure we have at least one command
 	if [ -z "${COMMANDS}" ]; then
 		usage
 	fi
 }
 
 # Parse the configuration file
 parse_conffile () {
 	# If a configuration file was specified on the command line, check
 	# that it exists and is readable.
 	if [ ! -z "${CONFFILE}" ] && [ ! -r "${CONFFILE}" ]; then
 		echo -n "File does not exist "
 		echo -n "or is not readable: "
 		echo ${CONFFILE}
 		exit 1
 	fi
 
 	# If a configuration file was not specified on the command line,
 	# use the default configuration file path.  If that default does
 	# not exist, give up looking for any configuration.
 	if [ -z "${CONFFILE}" ]; then
 		CONFFILE="/etc/freebsd-update.conf"
 		if [ ! -r "${CONFFILE}" ]; then
 			return
 		fi
 	fi
 
 	# Save the configuration options specified on the command line, and
 	# clear all the options in preparation for reading the config file.
 	saveconfig
 	nullconfig
 
 	# Read the configuration file.  Anything after the first '#' is
 	# ignored, and any blank lines are ignored.
 	L=0
 	while read LINE; do
 		L=$(($L + 1))
 		LINEX=`echo "${LINE}" | cut -f 1 -d '#'`
 		if ! configline ${LINEX}; then
 			echo "Error processing configuration file, line $L:"
 			echo "==> ${LINE}"
 			exit 1
 		fi
 	done < ${CONFFILE}
 
 	# Merge the settings read from the configuration file with those
 	# provided at the command line.
 	mergeconfig
 }
 
 # Provide some default parameters
 default_params () {
 	# Save any parameters already configured, and clear the slate
 	saveconfig
 	nullconfig
 
 	# Default configurations
 	config_WorkDir /var/db/freebsd-update
 	config_MailTo root
 	config_AllowAdd yes
 	config_AllowDelete yes
 	config_KeepModifiedMetadata yes
 	config_BaseDir /
 	config_VerboseLevel stats
 	config_StrictComponents no
 	config_BackupKernel yes
 	config_BackupKernelDir /boot/kernel.old
 	config_BackupKernelSymbolFiles no
 
 	# Merge these defaults into the earlier-configured settings
 	mergeconfig
 }
 
 # Set utility output filtering options, based on ${VERBOSELEVEL}
 fetch_setup_verboselevel () {
 	case ${VERBOSELEVEL} in
 	debug)
 		QUIETREDIR="/dev/stderr"
 		QUIETFLAG=" "
 		STATSREDIR="/dev/stderr"
 		DDSTATS=".."
 		XARGST="-t"
 		NDEBUG=" "
 		;;
 	nostats)
 		QUIETREDIR=""
 		QUIETFLAG=""
 		STATSREDIR="/dev/null"
 		DDSTATS=".."
 		XARGST=""
 		NDEBUG=""
 		;;
 	stats)
 		QUIETREDIR="/dev/null"
 		QUIETFLAG="-q"
 		STATSREDIR="/dev/stdout"
 		DDSTATS=""
 		XARGST=""
 		NDEBUG="-n"
 		;;
 	esac
 }
 
 # Perform sanity checks and set some final parameters
 # in preparation for fetching files.  Figure out which
 # set of updates should be downloaded: If the user is
 # running *-p[0-9]+, strip off the last part; if the
 # user is running -SECURITY, call it -RELEASE.  Chdir
 # into the working directory.
 fetchupgrade_check_params () {
 	export HTTP_USER_AGENT="freebsd-update (${COMMAND}, `uname -r`)"
 
 	_SERVERNAME_z=\
 "SERVERNAME must be given via command line or configuration file."
 	_KEYPRINT_z="Key must be given via -k option or configuration file."
 	_KEYPRINT_bad="Invalid key fingerprint: "
 	_WORKDIR_bad="Directory does not exist or is not writable: "
 	_WORKDIR_bad2="Directory is not on a persistent filesystem: "
 
 	if [ -z "${SERVERNAME}" ]; then
 		echo -n "`basename $0`: "
 		echo "${_SERVERNAME_z}"
 		exit 1
 	fi
 	if [ -z "${KEYPRINT}" ]; then
 		echo -n "`basename $0`: "
 		echo "${_KEYPRINT_z}"
 		exit 1
 	fi
 	if ! echo "${KEYPRINT}" | grep -qE "^[0-9a-f]{64}$"; then
 		echo -n "`basename $0`: "
 		echo -n "${_KEYPRINT_bad}"
 		echo ${KEYPRINT}
 		exit 1
 	fi
 	if ! [ -d "${WORKDIR}" -a -w "${WORKDIR}" ]; then
 		echo -n "`basename $0`: "
 		echo -n "${_WORKDIR_bad}"
 		echo ${WORKDIR}
 		exit 1
 	fi
 	case `df -T ${WORKDIR}` in */dev/md[0-9]* | *tmpfs*)
 		echo -n "`basename $0`: "
 		echo -n "${_WORKDIR_bad2}"
 		echo ${WORKDIR}
 		exit 1
 		;;
 	esac
 	chmod 700 ${WORKDIR}
 	cd ${WORKDIR} || exit 1
 
 	# Generate release number.  The s/SECURITY/RELEASE/ bit exists
 	# to provide an upgrade path for FreeBSD Update 1.x users, since
 	# the kernels provided by FreeBSD Update 1.x are always labelled
 	# as X.Y-SECURITY.
 	RELNUM=`uname -r |
 	    sed -E 's,-p[0-9]+,,' |
 	    sed -E 's,-SECURITY,-RELEASE,'`
 	ARCH=`uname -m`
 	FETCHDIR=${RELNUM}/${ARCH}
 	PATCHDIR=${RELNUM}/${ARCH}/bp
 
 	# Disallow upgrade from a version that is not a release
 	case ${RELNUM} in
 	*-RELEASE | *-ALPHA*  | *-BETA* | *-RC*)
 		;;
 	*)
 		echo -n "`basename $0`: "
 		cat <<- EOF
 			Cannot upgrade from a version that is not a release
 			(including alpha, beta and release candidates)
 			using `basename $0`. Instead, FreeBSD can be directly
 			upgraded by source or upgraded to a RELEASE/RELENG version
 			prior to running `basename $0`.
 			Currently running: ${RELNUM}
 		EOF
 		exit 1
 		;;
 	esac
 
 	# Figure out what directory contains the running kernel
 	BOOTFILE=`sysctl -n kern.bootfile`
 	KERNELDIR=${BOOTFILE%/kernel}
 	if ! [ -d ${KERNELDIR} ]; then
 		echo "Cannot identify running kernel"
 		exit 1
 	fi
 
 	# Figure out what kernel configuration is running.  We start with
 	# the output of `uname -i`, and then make the following adjustments:
 	# 1. Replace "SMP-GENERIC" with "SMP".  Why the SMP kernel config
 	# file says "ident SMP-GENERIC", I don't know...
 	# 2. If the kernel claims to be GENERIC _and_ ${ARCH} is "amd64"
 	# _and_ `sysctl kern.version` contains a line which ends "/SMP", then
 	# we're running an SMP kernel.  This mis-identification is a bug
 	# which was fixed in 6.2-STABLE.
 	KERNCONF=`uname -i`
 	if [ ${KERNCONF} = "SMP-GENERIC" ]; then
 		KERNCONF=SMP
 	fi
 	if [ ${KERNCONF} = "GENERIC" ] && [ ${ARCH} = "amd64" ]; then
 		if sysctl kern.version | grep -qE '/SMP$'; then
 			KERNCONF=SMP
 		fi
 	fi
 
 	# Define some paths
 	BSPATCH=/usr/bin/bspatch
 	SHA256=/sbin/sha256
 	PHTTPGET=/usr/libexec/phttpget
 
 	# Set up variables relating to VERBOSELEVEL
 	fetch_setup_verboselevel
 
 	# Construct a unique name from ${BASEDIR}
 	BDHASH=`echo ${BASEDIR} | sha256 -q`
 }
 
 # Perform sanity checks etc. before fetching updates.
 fetch_check_params () {
 	fetchupgrade_check_params
 
 	if ! [ -z "${TARGETRELEASE}" ]; then
 		echo -n "`basename $0`: "
 		echo -n "-r option is meaningless with 'fetch' command.  "
 		echo "(Did you mean 'upgrade' instead?)"
 		exit 1
 	fi
 
 	# Check that we have updates ready to install
 	if [ -f ${BDHASH}-install/kerneldone -a $FORCEFETCH -eq 0 ]; then
 		echo "You have a partially completed upgrade pending"
 		echo "Run '$0 install' first."
 		echo "Run '$0 fetch -F' to proceed anyway."
 		exit 1
 	fi
 }
 
 # Perform sanity checks etc. before fetching upgrades.
 upgrade_check_params () {
 	fetchupgrade_check_params
 
 	# Unless set otherwise, we're upgrading to the same kernel config.
 	NKERNCONF=${KERNCONF}
 
 	# We need TARGETRELEASE set
 	_TARGETRELEASE_z="Release target must be specified via -r option."
 	if [ -z "${TARGETRELEASE}" ]; then
 		echo -n "`basename $0`: "
 		echo "${_TARGETRELEASE_z}"
 		exit 1
 	fi
 
 	# The target release should be != the current release.
 	if [ "${TARGETRELEASE}" = "${RELNUM}" ]; then
 		echo -n "`basename $0`: "
 		echo "Cannot upgrade from ${RELNUM} to itself"
 		exit 1
 	fi
 
 	# Turning off AllowAdd or AllowDelete is a bad idea for upgrades.
 	if [ "${ALLOWADD}" = "no" ]; then
 		echo -n "`basename $0`: "
 		echo -n "WARNING: \"AllowAdd no\" is a bad idea "
 		echo "when upgrading between releases."
 		echo
 	fi
 	if [ "${ALLOWDELETE}" = "no" ]; then
 		echo -n "`basename $0`: "
 		echo -n "WARNING: \"AllowDelete no\" is a bad idea "
 		echo "when upgrading between releases."
 		echo
 	fi
 
 	# Set EDITOR to /usr/bin/vi if it isn't already set
 	: ${EDITOR:='/usr/bin/vi'}
 }
 
 # Perform sanity checks and set some final parameters in
 # preparation for installing updates.
 install_check_params () {
 	# Check that we are root.  All sorts of things won't work otherwise.
 	if [ `id -u` != 0 ]; then
 		echo "You must be root to run this."
 		exit 1
 	fi
 
 	# Check that securelevel <= 0.  Otherwise we can't update schg files.
 	if [ `sysctl -n kern.securelevel` -gt 0 ]; then
 		echo "Updates cannot be installed when the system securelevel"
 		echo "is greater than zero."
 		exit 1
 	fi
 
 	# Check that we have a working directory
 	_WORKDIR_bad="Directory does not exist or is not writable: "
 	if ! [ -d "${WORKDIR}" -a -w "${WORKDIR}" ]; then
 		echo -n "`basename $0`: "
 		echo -n "${_WORKDIR_bad}"
 		echo ${WORKDIR}
 		exit 1
 	fi
 	cd ${WORKDIR} || exit 1
 
 	# Construct a unique name from ${BASEDIR}
 	BDHASH=`echo ${BASEDIR} | sha256 -q`
 
 	# Check that we have updates ready to install
 	if ! [ -L ${BDHASH}-install ]; then
 		echo "No updates are available to install."
 		if [ $ISFETCHED -eq 0 ]; then
 			echo "Run '$0 fetch' first."
 			exit 1
 		fi
 		exit 0
 	fi
 	if ! [ -f ${BDHASH}-install/INDEX-OLD ] ||
 	    ! [ -f ${BDHASH}-install/INDEX-NEW ]; then
 		echo "Update manifest is corrupt -- this should never happen."
 		echo "Re-run '$0 fetch'."
 		exit 1
 	fi
 
 	# Figure out what directory contains the running kernel
 	BOOTFILE=`sysctl -n kern.bootfile`
 	KERNELDIR=${BOOTFILE%/kernel}
 	if ! [ -d ${KERNELDIR} ]; then
 		echo "Cannot identify running kernel"
 		exit 1
 	fi
 }
 
 # Perform sanity checks and set some final parameters in
 # preparation for UNinstalling updates.
 rollback_check_params () {
 	# Check that we are root.  All sorts of things won't work otherwise.
 	if [ `id -u` != 0 ]; then
 		echo "You must be root to run this."
 		exit 1
 	fi
 
 	# Check that we have a working directory
 	_WORKDIR_bad="Directory does not exist or is not writable: "
 	if ! [ -d "${WORKDIR}" -a -w "${WORKDIR}" ]; then
 		echo -n "`basename $0`: "
 		echo -n "${_WORKDIR_bad}"
 		echo ${WORKDIR}
 		exit 1
 	fi
 	cd ${WORKDIR} || exit 1
 
 	# Construct a unique name from ${BASEDIR}
 	BDHASH=`echo ${BASEDIR} | sha256 -q`
 
 	# Check that we have updates ready to rollback
 	if ! [ -L ${BDHASH}-rollback ]; then
 		echo "No rollback directory found."
 		exit 1
 	fi
 	if ! [ -f ${BDHASH}-rollback/INDEX-OLD ] ||
 	    ! [ -f ${BDHASH}-rollback/INDEX-NEW ]; then
 		echo "Update manifest is corrupt -- this should never happen."
 		exit 1
 	fi
 }
 
 # Perform sanity checks and set some final parameters
 # in preparation for comparing the system against the
 # published index.  Figure out which index we should
 # compare against: If the user is running *-p[0-9]+,
 # strip off the last part; if the user is running
 # -SECURITY, call it -RELEASE.  Chdir into the working
 # directory.
 IDS_check_params () {
 	export HTTP_USER_AGENT="freebsd-update (${COMMAND}, `uname -r`)"
 
 	_SERVERNAME_z=\
 "SERVERNAME must be given via command line or configuration file."
 	_KEYPRINT_z="Key must be given via -k option or configuration file."
 	_KEYPRINT_bad="Invalid key fingerprint: "
 	_WORKDIR_bad="Directory does not exist or is not writable: "
 
 	if [ -z "${SERVERNAME}" ]; then
 		echo -n "`basename $0`: "
 		echo "${_SERVERNAME_z}"
 		exit 1
 	fi
 	if [ -z "${KEYPRINT}" ]; then
 		echo -n "`basename $0`: "
 		echo "${_KEYPRINT_z}"
 		exit 1
 	fi
 	if ! echo "${KEYPRINT}" | grep -qE "^[0-9a-f]{64}$"; then
 		echo -n "`basename $0`: "
 		echo -n "${_KEYPRINT_bad}"
 		echo ${KEYPRINT}
 		exit 1
 	fi
 	if ! [ -d "${WORKDIR}" -a -w "${WORKDIR}" ]; then
 		echo -n "`basename $0`: "
 		echo -n "${_WORKDIR_bad}"
 		echo ${WORKDIR}
 		exit 1
 	fi
 	cd ${WORKDIR} || exit 1
 
 	# Generate release number.  The s/SECURITY/RELEASE/ bit exists
 	# to provide an upgrade path for FreeBSD Update 1.x users, since
 	# the kernels provided by FreeBSD Update 1.x are always labelled
 	# as X.Y-SECURITY.
 	RELNUM=`uname -r |
 	    sed -E 's,-p[0-9]+,,' |
 	    sed -E 's,-SECURITY,-RELEASE,'`
 	ARCH=`uname -m`
 	FETCHDIR=${RELNUM}/${ARCH}
 	PATCHDIR=${RELNUM}/${ARCH}/bp
 
 	# Figure out what directory contains the running kernel
 	BOOTFILE=`sysctl -n kern.bootfile`
 	KERNELDIR=${BOOTFILE%/kernel}
 	if ! [ -d ${KERNELDIR} ]; then
 		echo "Cannot identify running kernel"
 		exit 1
 	fi
 
 	# Figure out what kernel configuration is running.  We start with
 	# the output of `uname -i`, and then make the following adjustments:
 	# 1. Replace "SMP-GENERIC" with "SMP".  Why the SMP kernel config
 	# file says "ident SMP-GENERIC", I don't know...
 	# 2. If the kernel claims to be GENERIC _and_ ${ARCH} is "amd64"
 	# _and_ `sysctl kern.version` contains a line which ends "/SMP", then
 	# we're running an SMP kernel.  This mis-identification is a bug
 	# which was fixed in 6.2-STABLE.
 	KERNCONF=`uname -i`
 	if [ ${KERNCONF} = "SMP-GENERIC" ]; then
 		KERNCONF=SMP
 	fi
 	if [ ${KERNCONF} = "GENERIC" ] && [ ${ARCH} = "amd64" ]; then
 		if sysctl kern.version | grep -qE '/SMP$'; then
 			KERNCONF=SMP
 		fi
 	fi
 
 	# Define some paths
 	SHA256=/sbin/sha256
 	PHTTPGET=/usr/libexec/phttpget
 
 	# Set up variables relating to VERBOSELEVEL
 	fetch_setup_verboselevel
 }
 
 #### Core functionality -- the actual work gets done here
 
 # Use an SRV query to pick a server.  If the SRV query doesn't provide
 # a useful answer, use the server name specified by the user.
 # Put another way... look up _http._tcp.${SERVERNAME} and pick a server
 # from that; or if no servers are returned, use ${SERVERNAME}.
 # This allows a user to specify "portsnap.freebsd.org" (in which case
 # portsnap will select one of the mirrors) or "portsnap5.tld.freebsd.org"
 # (in which case portsnap will use that particular server, since there
 # won't be an SRV entry for that name).
 #
 # We ignore the Port field, since we are always going to use port 80.
 
 # Fetch the mirror list, but do not pick a mirror yet.  Returns 1 if
 # no mirrors are available for any reason.
 fetch_pick_server_init () {
 	: > serverlist_tried
 
 # Check that host(1) exists (i.e., that the system wasn't built with the
 # WITHOUT_BIND set) and don't try to find a mirror if it doesn't exist.
 	if ! which -s host; then
 		: > serverlist_full
 		return 1
 	fi
 
 	echo -n "Looking up ${SERVERNAME} mirrors... "
 
 # Issue the SRV query and pull out the Priority, Weight, and Target fields.
 # BIND 9 prints "$name has SRV record ..." while BIND 8 prints
 # "$name server selection ..."; we allow either format.
 	MLIST="_http._tcp.${SERVERNAME}"
 	host -t srv "${MLIST}" |
 	    sed -nE "s/${MLIST} (has SRV record|server selection) //Ip" |
 	    cut -f 1,2,4 -d ' ' |
 	    sed -e 's/\.$//' |
 	    sort > serverlist_full
 
 # If no records, give up -- we'll just use the server name we were given.
 	if [ `wc -l < serverlist_full` -eq 0 ]; then
 		echo "none found."
 		return 1
 	fi
 
 # Report how many mirrors we found.
 	echo `wc -l < serverlist_full` "mirrors found."
 
 # Generate a random seed for use in picking mirrors.  If HTTP_PROXY
 # is set, this will be used to generate the seed; otherwise, the seed
 # will be random.
 	if [ -n "${HTTP_PROXY}${http_proxy}" ]; then
 		RANDVALUE=`sha256 -qs "${HTTP_PROXY}${http_proxy}" |
 		    tr -d 'a-f' |
 		    cut -c 1-9`
 	else
 		RANDVALUE=`jot -r 1 0 999999999`
 	fi
 }
 
 # Pick a mirror.  Returns 1 if we have run out of mirrors to try.
 fetch_pick_server () {
 # Generate a list of not-yet-tried mirrors
 	sort serverlist_tried |
 	    comm -23 serverlist_full - > serverlist
 
 # Have we run out of mirrors?
 	if [ `wc -l < serverlist` -eq 0 ]; then
 		cat <<- EOF
 			No mirrors remaining, giving up.
 
 			This may be because upgrading from this platform (${ARCH})
 			or release (${RELNUM}) is unsupported by `basename $0`. Only
 			platforms with Tier 1 support can be upgraded by `basename $0`.
 			See https://www.freebsd.org/platforms/index.html for more info.
 
 			If unsupported, FreeBSD must be upgraded by source.
 		EOF
 		return 1
 	fi
 
 # Find the highest priority level (lowest numeric value).
 	SRV_PRIORITY=`cut -f 1 -d ' ' serverlist | sort -n | head -1`
 
 # Add up the weights of the response lines at that priority level.
 	SRV_WSUM=0;
 	while read X; do
 		case "$X" in
 		${SRV_PRIORITY}\ *)
 			SRV_W=`echo $X | cut -f 2 -d ' '`
 			SRV_WSUM=$(($SRV_WSUM + $SRV_W))
 			;;
 		esac
 	done < serverlist
 
 # If all the weights are 0, pretend that they are all 1 instead.
 	if [ ${SRV_WSUM} -eq 0 ]; then
 		SRV_WSUM=`grep -E "^${SRV_PRIORITY} " serverlist | wc -l`
 		SRV_W_ADD=1
 	else
 		SRV_W_ADD=0
 	fi
 
 # Pick a value between 0 and the sum of the weights - 1
 	SRV_RND=`expr ${RANDVALUE} % ${SRV_WSUM}`
 
 # Read through the list of mirrors and set SERVERNAME.  Write the line
 # corresponding to the mirror we selected into serverlist_tried so that
 # we won't try it again.
 	while read X; do
 		case "$X" in
 		${SRV_PRIORITY}\ *)
 			SRV_W=`echo $X | cut -f 2 -d ' '`
 			SRV_W=$(($SRV_W + $SRV_W_ADD))
 			if [ $SRV_RND -lt $SRV_W ]; then
 				SERVERNAME=`echo $X | cut -f 3 -d ' '`
 				echo "$X" >> serverlist_tried
 				break
 			else
 				SRV_RND=$(($SRV_RND - $SRV_W))
 			fi
 			;;
 		esac
 	done < serverlist
 }
 
 # Take a list of ${oldhash}|${newhash} and output a list of needed patches,
 # i.e., those for which we have ${oldhash} and don't have ${newhash}.
 fetch_make_patchlist () {
 	grep -vE "^([0-9a-f]{64})\|\1$" |
 	    tr '|' ' ' |
 		while read X Y; do
 			if [ -f "files/${Y}.gz" ] ||
 			    [ ! -f "files/${X}.gz" ]; then
 				continue
 			fi
 			echo "${X}|${Y}"
 		done | sort -u
 }
 
 # Print user-friendly progress statistics
 fetch_progress () {
 	LNC=0
 	while read x; do
 		LNC=$(($LNC + 1))
 		if [ $(($LNC % 10)) = 0 ]; then
 			echo -n $LNC
 		elif [ $(($LNC % 2)) = 0 ]; then
 			echo -n .
 		fi
 	done
 	echo -n " "
 }
 
 # Function for asking the user if everything is ok
 continuep () {
 	while read -p "Does this look reasonable (y/n)? " CONTINUE; do
 		case "${CONTINUE}" in
 		y*)
 			return 0
 			;;
 		n*)
 			return 1
 			;;
 		esac
 	done
 }
 
 # Initialize the working directory
 workdir_init () {
 	mkdir -p files
 	touch tINDEX.present
 }
 
 # Check that we have a public key with an appropriate hash, or
 # fetch the key if it doesn't exist.  Returns 1 if the key has
 # not yet been fetched.
 fetch_key () {
 	if [ -r pub.ssl ] && [ `${SHA256} -q pub.ssl` = ${KEYPRINT} ]; then
 		return 0
 	fi
 
 	echo -n "Fetching public key from ${SERVERNAME}... "
 	rm -f pub.ssl
 	fetch ${QUIETFLAG} http://${SERVERNAME}/${FETCHDIR}/pub.ssl \
 	    2>${QUIETREDIR} || true
 	if ! [ -r pub.ssl ]; then
 		echo "failed."
 		return 1
 	fi
 	if ! [ `${SHA256} -q pub.ssl` = ${KEYPRINT} ]; then
 		echo "key has incorrect hash."
 		rm -f pub.ssl
 		return 1
 	fi
 	echo "done."
 }
 
 # Fetch metadata signature, aka "tag".
 fetch_tag () {
 	echo -n "Fetching metadata signature "
 	echo ${NDEBUG} "for ${RELNUM} from ${SERVERNAME}... "
 	rm -f latest.ssl
 	fetch ${QUIETFLAG} http://${SERVERNAME}/${FETCHDIR}/latest.ssl	\
 	    2>${QUIETREDIR} || true
 	if ! [ -r latest.ssl ]; then
 		echo "failed."
 		return 1
 	fi
 
 	openssl rsautl -pubin -inkey pub.ssl -verify		\
 	    < latest.ssl > tag.new 2>${QUIETREDIR} || true
 	rm latest.ssl
 
 	if ! [ `wc -l < tag.new` = 1 ] ||
 	    ! grep -qE	\
     "^freebsd-update\|${ARCH}\|${RELNUM}\|[0-9]+\|[0-9a-f]{64}\|[0-9]{10}" \
 		tag.new; then
 		echo "invalid signature."
 		return 1
 	fi
 
 	echo "done."
 
 	RELPATCHNUM=`cut -f 4 -d '|' < tag.new`
 	TINDEXHASH=`cut -f 5 -d '|' < tag.new`
 	EOLTIME=`cut -f 6 -d '|' < tag.new`
 }
 
 # Sanity-check the patch number in a tag, to make sure that we're not
 # going to "update" backwards and to prevent replay attacks.
 fetch_tagsanity () {
 	# Check that we're not going to move from -pX to -pY with Y < X.
 	RELPX=`uname -r | sed -E 's,.*-,,'`
 	if echo ${RELPX} | grep -qE '^p[0-9]+$'; then
 		RELPX=`echo ${RELPX} | cut -c 2-`
 	else
 		RELPX=0
 	fi
 	if [ "${RELPATCHNUM}" -lt "${RELPX}" ]; then
 		echo
 		echo -n "Files on mirror (${RELNUM}-p${RELPATCHNUM})"
 		echo " appear older than what"
 		echo "we are currently running (`uname -r`)!"
 		echo "Cowardly refusing to proceed any further."
 		return 1
 	fi
 
 	# If "tag" exists and corresponds to ${RELNUM}, make sure that
 	# it contains a patch number <= RELPATCHNUM, in order to protect
 	# against rollback (replay) attacks.
 	if [ -f tag ] &&
 	    grep -qE	\
     "^freebsd-update\|${ARCH}\|${RELNUM}\|[0-9]+\|[0-9a-f]{64}\|[0-9]{10}" \
 		tag; then
 		LASTRELPATCHNUM=`cut -f 4 -d '|' < tag`
 
 		if [ "${RELPATCHNUM}" -lt "${LASTRELPATCHNUM}" ]; then
 			echo
 			echo -n "Files on mirror (${RELNUM}-p${RELPATCHNUM})"
 			echo " are older than the"
 			echo -n "most recently seen updates"
 			echo " (${RELNUM}-p${LASTRELPATCHNUM})."
 			echo "Cowardly refusing to proceed any further."
 			return 1
 		fi
 	fi
 }
 
 # Fetch metadata index file
 fetch_metadata_index () {
 	echo ${NDEBUG} "Fetching metadata index... "
 	rm -f ${TINDEXHASH}
 	fetch ${QUIETFLAG} http://${SERVERNAME}/${FETCHDIR}/t/${TINDEXHASH}
 	    2>${QUIETREDIR}
 	if ! [ -f ${TINDEXHASH} ]; then
 		echo "failed."
 		return 1
 	fi
 	if [ `${SHA256} -q ${TINDEXHASH}` != ${TINDEXHASH} ]; then
 		echo "update metadata index corrupt."
 		return 1
 	fi
 	echo "done."
 }
 
 # Print an error message about signed metadata being bogus.
 fetch_metadata_bogus () {
 	echo
 	echo "The update metadata$1 is correctly signed, but"
 	echo "failed an integrity check."
 	echo "Cowardly refusing to proceed any further."
 	return 1
 }
 
 # Construct tINDEX.new by merging the lines named in $1 from ${TINDEXHASH}
 # with the lines not named in $@ from tINDEX.present (if that file exists).
 fetch_metadata_index_merge () {
 	for METAFILE in $@; do
 		if [ `grep -E "^${METAFILE}\|" ${TINDEXHASH} | wc -l`	\
 		    -ne 1 ]; then
 			fetch_metadata_bogus " index"
 			return 1
 		fi
 
 		grep -E "${METAFILE}\|" ${TINDEXHASH}
 	done |
 	    sort > tINDEX.wanted
 
 	if [ -f tINDEX.present ]; then
 		join -t '|' -v 2 tINDEX.wanted tINDEX.present |
 		    sort -m - tINDEX.wanted > tINDEX.new
 		rm tINDEX.wanted
 	else
 		mv tINDEX.wanted tINDEX.new
 	fi
 }
 
 # Sanity check all the lines of tINDEX.new.  Even if more metadata lines
 # are added by future versions of the server, this won't cause problems,
 # since the only lines which appear in tINDEX.new are the ones which we
 # specifically grepped out of ${TINDEXHASH}.
 fetch_metadata_index_sanity () {
 	if grep -qvE '^[0-9A-Z.-]+\|[0-9a-f]{64}$' tINDEX.new; then
 		fetch_metadata_bogus " index"
 		return 1
 	fi
 }
 
 # Sanity check the metadata file $1.
 fetch_metadata_sanity () {
 	# Some aliases to save space later: ${P} is a character which can
 	# appear in a path; ${M} is the four numeric metadata fields; and
 	# ${H} is a sha256 hash.
 	P="[-+./:=,%@_[~[:alnum:]]"
 	M="[0-9]+\|[0-9]+\|[0-9]+\|[0-9]+"
 	H="[0-9a-f]{64}"
 
 	# Check that the first four fields make sense.
 	if gunzip -c < files/$1.gz |
 	    grep -qvE "^[a-z]+\|[0-9a-z-]+\|${P}+\|[fdL-]\|"; then
 		fetch_metadata_bogus ""
 		return 1
 	fi
 
 	# Remove the first three fields.
 	gunzip -c < files/$1.gz |
 	    cut -f 4- -d '|' > sanitycheck.tmp
 
 	# Sanity check entries with type 'f'
 	if grep -E '^f' sanitycheck.tmp |
 	    grep -qvE "^f\|${M}\|${H}\|${P}*\$"; then
 		fetch_metadata_bogus ""
 		return 1
 	fi
 
 	# Sanity check entries with type 'd'
 	if grep -E '^d' sanitycheck.tmp |
 	    grep -qvE "^d\|${M}\|\|\$"; then
 		fetch_metadata_bogus ""
 		return 1
 	fi
 
 	# Sanity check entries with type 'L'
 	if grep -E '^L' sanitycheck.tmp |
 	    grep -qvE "^L\|${M}\|${P}*\|\$"; then
 		fetch_metadata_bogus ""
 		return 1
 	fi
 
 	# Sanity check entries with type '-'
 	if grep -E '^-' sanitycheck.tmp |
 	    grep -qvE "^-\|\|\|\|\|\|"; then
 		fetch_metadata_bogus ""
 		return 1
 	fi
 
 	# Clean up
 	rm sanitycheck.tmp
 }
 
 # Fetch the metadata index and metadata files listed in $@,
 # taking advantage of metadata patches where possible.
 fetch_metadata () {
 	fetch_metadata_index || return 1
 	fetch_metadata_index_merge $@ || return 1
 	fetch_metadata_index_sanity || return 1
 
 	# Generate a list of wanted metadata patches
 	join -t '|' -o 1.2,2.2 tINDEX.present tINDEX.new |
 	    fetch_make_patchlist > patchlist
 
 	if [ -s patchlist ]; then
 		# Attempt to fetch metadata patches
 		echo -n "Fetching `wc -l < patchlist | tr -d ' '` "
 		echo ${NDEBUG} "metadata patches.${DDSTATS}"
 		tr '|' '-' < patchlist |
 		    lam -s "${FETCHDIR}/tp/" - -s ".gz" |
 		    xargs ${XARGST} ${PHTTPGET} ${SERVERNAME}	\
 			2>${STATSREDIR} | fetch_progress
 		echo "done."
 
 		# Attempt to apply metadata patches
 		echo -n "Applying metadata patches... "
 		tr '|' ' ' < patchlist |
 		    while read X Y; do
 			if [ ! -f "${X}-${Y}.gz" ]; then continue; fi
 			gunzip -c < ${X}-${Y}.gz > diff
 			gunzip -c < files/${X}.gz > diff-OLD
 
 			# Figure out which lines are being added and removed
 			grep -E '^-' diff |
 			    cut -c 2- |
 			    while read PREFIX; do
 				look "${PREFIX}" diff-OLD
 			    done |
 			    sort > diff-rm
 			grep -E '^\+' diff |
 			    cut -c 2- > diff-add
 
 			# Generate the new file
 			comm -23 diff-OLD diff-rm |
 			    sort - diff-add > diff-NEW
 
 			if [ `${SHA256} -q diff-NEW` = ${Y} ]; then
 				mv diff-NEW files/${Y}
 				gzip -n files/${Y}
 			else
 				mv diff-NEW ${Y}.bad
 			fi
 			rm -f ${X}-${Y}.gz diff
 			rm -f diff-OLD diff-NEW diff-add diff-rm
 		done 2>${QUIETREDIR}
 		echo "done."
 	fi
 
 	# Update metadata without patches
 	cut -f 2 -d '|' < tINDEX.new |
 	    while read Y; do
 		if [ ! -f "files/${Y}.gz" ]; then
 			echo ${Y};
 		fi
 	    done |
 	    sort -u > filelist
 
 	if [ -s filelist ]; then
 		echo -n "Fetching `wc -l < filelist | tr -d ' '` "
 		echo ${NDEBUG} "metadata files... "
 		lam -s "${FETCHDIR}/m/" - -s ".gz" < filelist |
 		    xargs ${XARGST} ${PHTTPGET} ${SERVERNAME}	\
 		    2>${QUIETREDIR}
 
 		while read Y; do
 			if ! [ -f ${Y}.gz ]; then
 				echo "failed."
 				return 1
 			fi
 			if [ `gunzip -c < ${Y}.gz |
 			    ${SHA256} -q` = ${Y} ]; then
 				mv ${Y}.gz files/${Y}.gz
 			else
 				echo "metadata is corrupt."
 				return 1
 			fi
 		done < filelist
 		echo "done."
 	fi
 
 # Sanity-check the metadata files.
 	cut -f 2 -d '|' tINDEX.new > filelist
 	while read X; do
 		fetch_metadata_sanity ${X} || return 1
 	done < filelist
 
 # Remove files which are no longer needed
 	cut -f 2 -d '|' tINDEX.present |
 	    sort > oldfiles
 	cut -f 2 -d '|' tINDEX.new |
 	    sort |
 	    comm -13 - oldfiles |
 	    lam -s "files/" - -s ".gz" |
 	    xargs rm -f
 	rm patchlist filelist oldfiles
 	rm ${TINDEXHASH}
 
 # We're done!
 	mv tINDEX.new tINDEX.present
 	mv tag.new tag
 
 	return 0
 }
 
 # Extract a subset of a downloaded metadata file containing only the parts
 # which are listed in COMPONENTS.
 fetch_filter_metadata_components () {
 	METAHASH=`look "$1|" tINDEX.present | cut -f 2 -d '|'`
 	gunzip -c < files/${METAHASH}.gz > $1.all
 
 	# Fish out the lines belonging to components we care about.
 	for C in ${COMPONENTS}; do
 		look "`echo ${C} | tr '/' '|'`|" $1.all
 	done > $1
 
 	# Remove temporary file.
 	rm $1.all
 }
 
 # Generate a filtered version of the metadata file $1 from the downloaded
 # file, by fishing out the lines corresponding to components we're trying
 # to keep updated, and then removing lines corresponding to paths we want
 # to ignore.
 fetch_filter_metadata () {
 	# Fish out the lines belonging to components we care about.
 	fetch_filter_metadata_components $1
 
 	# Canonicalize directory names by removing any trailing / in
 	# order to avoid listing directories multiple times if they
 	# belong to multiple components.  Turning "/" into "" doesn't
 	# matter, since we add a leading "/" when we use paths later.
 	cut -f 3- -d '|' $1 |
 	    sed -e 's,/|d|,|d|,' |
 	    sed -e 's,/|-|,|-|,' |
 	    sort -u > $1.tmp
 
 	# Figure out which lines to ignore and remove them.
 	for X in ${IGNOREPATHS}; do
 		grep -E "^${X}" $1.tmp
 	done |
 	    sort -u |
 	    comm -13 - $1.tmp > $1
 
 	# Remove temporary files.
 	rm $1.tmp
 }
 
 # Filter the metadata file $1 by adding lines with "/boot/$2"
 # replaced by ${KERNELDIR} (which is `sysctl -n kern.bootfile` minus the
 # trailing "/kernel"); and if "/boot/$2" does not exist, remove
 # the original lines which start with that.
 # Put another way: Deal with the fact that the FOO kernel is sometimes
 # installed in /boot/FOO/ and is sometimes installed elsewhere.
 fetch_filter_kernel_names () {
 	grep ^/boot/$2 $1 |
 	    sed -e "s,/boot/$2,${KERNELDIR},g" |
 	    sort - $1 > $1.tmp
 	mv $1.tmp $1
 
 	if ! [ -d /boot/$2 ]; then
 		grep -v ^/boot/$2 $1 > $1.tmp
 		mv $1.tmp $1
 	fi
 }
 
 # For all paths appearing in $1 or $3, inspect the system
 # and generate $2 describing what is currently installed.
 fetch_inspect_system () {
 	# No errors yet...
 	rm -f .err
 
 	# Tell the user why his disk is suddenly making lots of noise
 	echo -n "Inspecting system... "
 
 	# Generate list of files to inspect
 	cat $1 $3 |
 	    cut -f 1 -d '|' |
 	    sort -u > filelist
 
 	# Examine each file and output lines of the form
 	# /path/to/file|type|device-inum|user|group|perm|flags|value
 	# sorted by device and inode number.
 	while read F; do
 		# If the symlink/file/directory does not exist, record this.
 		if ! [ -e ${BASEDIR}/${F} ]; then
 			echo "${F}|-||||||"
 			continue
 		fi
 		if ! [ -r ${BASEDIR}/${F} ]; then
 			echo "Cannot read file: ${BASEDIR}/${F}"	\
 			    >/dev/stderr
 			touch .err
 			return 1
 		fi
 
 		# Otherwise, output an index line.
 		if [ -L ${BASEDIR}/${F} ]; then
 			echo -n "${F}|L|"
 			stat -n -f '%d-%i|%u|%g|%Mp%Lp|%Of|' ${BASEDIR}/${F};
 			readlink ${BASEDIR}/${F};
 		elif [ -f ${BASEDIR}/${F} ]; then
 			echo -n "${F}|f|"
 			stat -n -f '%d-%i|%u|%g|%Mp%Lp|%Of|' ${BASEDIR}/${F};
 			sha256 -q ${BASEDIR}/${F};
 		elif [ -d ${BASEDIR}/${F} ]; then
 			echo -n "${F}|d|"
 			stat -f '%d-%i|%u|%g|%Mp%Lp|%Of|' ${BASEDIR}/${F};
 		else
 			echo "Unknown file type: ${BASEDIR}/${F}"	\
 			    >/dev/stderr
 			touch .err
 			return 1
 		fi
 	done < filelist |
 	    sort -k 3,3 -t '|' > $2.tmp
 	rm filelist
 
 	# Check if an error occurred during system inspection
 	if [ -f .err ]; then
 		return 1
 	fi
 
 	# Convert to the form
 	# /path/to/file|type|user|group|perm|flags|value|hlink
 	# by resolving identical device and inode numbers into hard links.
 	cut -f 1,3 -d '|' $2.tmp |
 	    sort -k 1,1 -t '|' |
 	    sort -s -u -k 2,2 -t '|' |
 	    join -1 2 -2 3 -t '|' - $2.tmp |
 	    awk -F \| -v OFS=\|		\
 		'{
 		    if (($2 == $3) || ($4 == "-"))
 			print $3,$4,$5,$6,$7,$8,$9,""
 		    else
 			print $3,$4,$5,$6,$7,$8,$9,$2
 		}' |
 	    sort > $2
 	rm $2.tmp
 
 	# We're finished looking around
 	echo "done."
 }
 
 # For any paths matching ${MERGECHANGES}, compare $1 and $2 and find any
 # files which differ; generate $3 containing these paths and the old hashes.
 fetch_filter_mergechanges () {
 	# Pull out the paths and hashes of the files matching ${MERGECHANGES}.
 	for F in $1 $2; do
 		for X in ${MERGECHANGES}; do
 			grep -E "^${X}" ${F}
 		done |
 		    cut -f 1,2,7 -d '|' |
 		    sort > ${F}-values
 	done
 
 	# Any line in $2-values which doesn't appear in $1-values and is a
 	# file means that we should list the path in $3.
 	comm -13 $1-values $2-values |
 	    fgrep '|f|' |
 	    cut -f 1 -d '|' > $2-paths
 
 	# For each path, pull out one (and only one!) entry from $1-values.
 	# Note that we cannot distinguish which "old" version the user made
 	# changes to; but hopefully any changes which occur due to security
 	# updates will exist in both the "new" version and the version which
 	# the user has installed, so the merging will still work.
 	while read X; do
 		look "${X}|" $1-values |
 		    head -1
 	done < $2-paths > $3
 
 	# Clean up
 	rm $1-values $2-values $2-paths
 }
 
 # For any paths matching ${UPDATEIFUNMODIFIED}, remove lines from $[123]
 # which correspond to lines in $2 with hashes not matching $1 or $3, unless
 # the paths are listed in $4.  For entries in $2 marked "not present"
 # (aka. type -), remove lines from $[123] unless there is a corresponding
 # entry in $1.
 fetch_filter_unmodified_notpresent () {
 	# Figure out which lines of $1 and $3 correspond to bits which
 	# should only be updated if they haven't changed, and fish out
 	# the (path, type, value) tuples.
 	# NOTE: We don't consider a file to be "modified" if it matches
 	# the hash from $3.
 	for X in ${UPDATEIFUNMODIFIED}; do
 		grep -E "^${X}" $1
 		grep -E "^${X}" $3
 	done |
 	    cut -f 1,2,7 -d '|' |
 	    sort > $1-values
 
 	# Do the same for $2.
 	for X in ${UPDATEIFUNMODIFIED}; do
 		grep -E "^${X}" $2
 	done |
 	    cut -f 1,2,7 -d '|' |
 	    sort > $2-values
 
 	# Any entry in $2-values which is not in $1-values corresponds to
 	# a path which we need to remove from $1, $2, and $3, unless it
 	# that path appears in $4.
 	comm -13 $1-values $2-values |
 	    sort -t '|' -k 1,1 > mlines.tmp
 	cut -f 1 -d '|' $4 |
 	    sort |
 	    join -v 2 -t '|' - mlines.tmp |
 	    sort > mlines
 	rm $1-values $2-values mlines.tmp
 
 	# Any lines in $2 which are not in $1 AND are "not present" lines
 	# also belong in mlines.
 	comm -13 $1 $2 |
 	    cut -f 1,2,7 -d '|' |
 	    fgrep '|-|' >> mlines
 
 	# Remove lines from $1, $2, and $3
 	for X in $1 $2 $3; do
 		sort -t '|' -k 1,1 ${X} > ${X}.tmp
 		cut -f 1 -d '|' < mlines |
 		    sort |
 		    join -v 2 -t '|' - ${X}.tmp |
 		    sort > ${X}
 		rm ${X}.tmp
 	done
 
 	# Store a list of the modified files, for future reference
 	fgrep -v '|-|' mlines |
 	    cut -f 1 -d '|' > modifiedfiles
 	rm mlines
 }
 
 # For each entry in $1 of type -, remove any corresponding
 # entry from $2 if ${ALLOWADD} != "yes".  Remove all entries
 # of type - from $1.
 fetch_filter_allowadd () {
 	cut -f 1,2 -d '|' < $1 |
 	    fgrep '|-' |
 	    cut -f 1 -d '|' > filesnotpresent
 
 	if [ ${ALLOWADD} != "yes" ]; then
 		sort < $2 |
 		    join -v 1 -t '|' - filesnotpresent |
 		    sort > $2.tmp
 		mv $2.tmp $2
 	fi
 
 	sort < $1 |
 	    join -v 1 -t '|' - filesnotpresent |
 	    sort > $1.tmp
 	mv $1.tmp $1
 	rm filesnotpresent
 }
 
 # If ${ALLOWDELETE} != "yes", then remove any entries from $1
 # which don't correspond to entries in $2.
 fetch_filter_allowdelete () {
 	# Produce a lists ${PATH}|${TYPE}
 	for X in $1 $2; do
 		cut -f 1-2 -d '|' < ${X} |
 		    sort -u > ${X}.nodes
 	done
 
 	# Figure out which lines need to be removed from $1.
 	if [ ${ALLOWDELETE} != "yes" ]; then
 		comm -23 $1.nodes $2.nodes > $1.badnodes
 	else
 		: > $1.badnodes
 	fi
 
 	# Remove the relevant lines from $1
 	while read X; do
 		look "${X}|" $1
 	done < $1.badnodes |
 	    comm -13 - $1 > $1.tmp
 	mv $1.tmp $1
 
 	rm $1.badnodes $1.nodes $2.nodes
 }
 
 # If ${KEEPMODIFIEDMETADATA} == "yes", then for each entry in $2
 # with metadata not matching any entry in $1, replace the corresponding
 # line of $3 with one having the same metadata as the entry in $2.
 fetch_filter_modified_metadata () {
 	# Fish out the metadata from $1 and $2
 	for X in $1 $2; do
 		cut -f 1-6 -d '|' < ${X} > ${X}.metadata
 	done
 
 	# Find the metadata we need to keep
 	if [ ${KEEPMODIFIEDMETADATA} = "yes" ]; then
 		comm -13 $1.metadata $2.metadata > keepmeta
 	else
 		: > keepmeta
 	fi
 
 	# Extract the lines which we need to remove from $3, and
 	# construct the lines which we need to add to $3.
 	: > $3.remove
 	: > $3.add
 	while read LINE; do
 		NODE=`echo "${LINE}" | cut -f 1-2 -d '|'`
 		look "${NODE}|" $3 >> $3.remove
 		look "${NODE}|" $3 |
 		    cut -f 7- -d '|' |
 		    lam -s "${LINE}|" - >> $3.add
 	done < keepmeta
 
 	# Remove the specified lines and add the new lines.
 	sort $3.remove |
 	    comm -13 - $3 |
 	    sort -u - $3.add > $3.tmp
 	mv $3.tmp $3
 
 	rm keepmeta $1.metadata $2.metadata $3.add $3.remove
 }
 
 # Remove lines from $1 and $2 which are identical;
 # no need to update a file if it isn't changing.
 fetch_filter_uptodate () {
 	comm -23 $1 $2 > $1.tmp
 	comm -13 $1 $2 > $2.tmp
 
 	mv $1.tmp $1
 	mv $2.tmp $2
 }
 
 # Fetch any "clean" old versions of files we need for merging changes.
 fetch_files_premerge () {
 	# We only need to do anything if $1 is non-empty.
 	if [ -s $1 ]; then
 		# Tell the user what we're doing
 		echo -n "Fetching files from ${OLDRELNUM} for merging... "
 
 		# List of files wanted
 		fgrep '|f|' < $1 |
 		    cut -f 3 -d '|' |
 		    sort -u > files.wanted
 
 		# Only fetch the files we don't already have
 		while read Y; do
 			if [ ! -f "files/${Y}.gz" ]; then
 				echo ${Y};
 			fi
 		done < files.wanted > filelist
 
 		# Actually fetch them
 		lam -s "${OLDFETCHDIR}/f/" - -s ".gz" < filelist |
 		    xargs ${XARGST} ${PHTTPGET} ${SERVERNAME}	\
 		    2>${QUIETREDIR}
 
 		# Make sure we got them all, and move them into /files/
 		while read Y; do
 			if ! [ -f ${Y}.gz ]; then
 				echo "failed."
 				return 1
 			fi
 			if [ `gunzip -c < ${Y}.gz |
 			    ${SHA256} -q` = ${Y} ]; then
 				mv ${Y}.gz files/${Y}.gz
 			else
 				echo "${Y} has incorrect hash."
 				return 1
 			fi
 		done < filelist
 		echo "done."
 
 		# Clean up
 		rm filelist files.wanted
 	fi
 }
 
 # Prepare to fetch files: Generate a list of the files we need,
 # copy the unmodified files we have into /files/, and generate
 # a list of patches to download.
 fetch_files_prepare () {
 	# Tell the user why his disk is suddenly making lots of noise
 	echo -n "Preparing to download files... "
 
 	# Reduce indices to ${PATH}|${HASH} pairs
 	for X in $1 $2 $3; do
 		cut -f 1,2,7 -d '|' < ${X} |
 		    fgrep '|f|' |
 		    cut -f 1,3 -d '|' |
 		    sort > ${X}.hashes
 	done
 
 	# List of files wanted
 	cut -f 2 -d '|' < $3.hashes |
 	    sort -u |
 	    while read HASH; do
 		if ! [ -f files/${HASH}.gz ]; then
 			echo ${HASH}
 		fi
 	done > files.wanted
 
 	# Generate a list of unmodified files
 	comm -12 $1.hashes $2.hashes |
 	    sort -k 1,1 -t '|' > unmodified.files
 
 	# Copy all files into /files/.  We only need the unmodified files
 	# for use in patching; but we'll want all of them if the user asks
 	# to rollback the updates later.
 	while read LINE; do
 		F=`echo "${LINE}" | cut -f 1 -d '|'`
 		HASH=`echo "${LINE}" | cut -f 2 -d '|'`
 
 		# Skip files we already have.
 		if [ -f files/${HASH}.gz ]; then
 			continue
 		fi
 
 		# Make sure the file hasn't changed.
 		cp "${BASEDIR}/${F}" tmpfile
 		if [ `sha256 -q tmpfile` != ${HASH} ]; then
 			echo
 			echo "File changed while FreeBSD Update running: ${F}"
 			return 1
 		fi
 
 		# Place the file into storage.
 		gzip -c < tmpfile > files/${HASH}.gz
 		rm tmpfile
 	done < $2.hashes
 
 	# Produce a list of patches to download
 	sort -k 1,1 -t '|' $3.hashes |
 	    join -t '|' -o 2.2,1.2 - unmodified.files |
 	    fetch_make_patchlist > patchlist
 
 	# Garbage collect
 	rm unmodified.files $1.hashes $2.hashes $3.hashes
 
 	# We don't need the list of possible old files any more.
 	rm $1
 
 	# We're finished making noise
 	echo "done."
 }
 
 # Fetch files.
 fetch_files () {
 	# Attempt to fetch patches
 	if [ -s patchlist ]; then
 		echo -n "Fetching `wc -l < patchlist | tr -d ' '` "
 		echo ${NDEBUG} "patches.${DDSTATS}"
 		tr '|' '-' < patchlist |
 		    lam -s "${PATCHDIR}/" - |
 		    xargs ${XARGST} ${PHTTPGET} ${SERVERNAME}	\
 			2>${STATSREDIR} | fetch_progress
 		echo "done."
 
 		# Attempt to apply patches
 		echo -n "Applying patches... "
 		tr '|' ' ' < patchlist |
 		    while read X Y; do
 			if [ ! -f "${X}-${Y}" ]; then continue; fi
 			gunzip -c < files/${X}.gz > OLD
 
 			bspatch OLD NEW ${X}-${Y}
 
 			if [ `${SHA256} -q NEW` = ${Y} ]; then
 				mv NEW files/${Y}
 				gzip -n files/${Y}
 			fi
 			rm -f diff OLD NEW ${X}-${Y}
 		done 2>${QUIETREDIR}
 		echo "done."
 	fi
 
 	# Download files which couldn't be generate via patching
 	while read Y; do
 		if [ ! -f "files/${Y}.gz" ]; then
 			echo ${Y};
 		fi
 	done < files.wanted > filelist
 
 	if [ -s filelist ]; then
 		echo -n "Fetching `wc -l < filelist | tr -d ' '` "
 		echo ${NDEBUG} "files... "
 		lam -s "${FETCHDIR}/f/" - -s ".gz" < filelist |
 		    xargs ${XARGST} ${PHTTPGET} ${SERVERNAME}	\
 			2>${STATSREDIR} | fetch_progress
 
 		while read Y; do
 			if ! [ -f ${Y}.gz ]; then
 				echo "failed."
 				return 1
 			fi
 			if [ `gunzip -c < ${Y}.gz |
 			    ${SHA256} -q` = ${Y} ]; then
 				mv ${Y}.gz files/${Y}.gz
 			else
 				echo "${Y} has incorrect hash."
 				return 1
 			fi
 		done < filelist
 		echo "done."
 	fi
 
 	# Clean up
 	rm files.wanted filelist patchlist
 }
 
 # Create and populate install manifest directory; and report what updates
 # are available.
 fetch_create_manifest () {
 	# If we have an existing install manifest, nuke it.
 	if [ -L "${BDHASH}-install" ]; then
 		rm -r ${BDHASH}-install/
 		rm ${BDHASH}-install
 	fi
 
 	# Report to the user if any updates were avoided due to local changes
 	if [ -s modifiedfiles ]; then
 		cat - modifiedfiles <<- EOF | ${PAGER}
 			The following files are affected by updates. No changes have
 			been downloaded, however, because the files have been modified
 			locally:
 		EOF
 	fi
 	rm modifiedfiles
 
 	# If no files will be updated, tell the user and exit
 	if ! [ -s INDEX-PRESENT ] &&
 	    ! [ -s INDEX-NEW ]; then
 		rm INDEX-PRESENT INDEX-NEW
 		echo
 		echo -n "No updates needed to update system to "
 		echo "${RELNUM}-p${RELPATCHNUM}."
 		return
 	fi
 
 	# Divide files into (a) removed files, (b) added files, and
 	# (c) updated files.
 	cut -f 1 -d '|' < INDEX-PRESENT |
 	    sort > INDEX-PRESENT.flist
 	cut -f 1 -d '|' < INDEX-NEW |
 	    sort > INDEX-NEW.flist
 	comm -23 INDEX-PRESENT.flist INDEX-NEW.flist > files.removed
 	comm -13 INDEX-PRESENT.flist INDEX-NEW.flist > files.added
 	comm -12 INDEX-PRESENT.flist INDEX-NEW.flist > files.updated
 	rm INDEX-PRESENT.flist INDEX-NEW.flist
 
 	# Report removed files, if any
 	if [ -s files.removed ]; then
 		cat - files.removed <<- EOF | ${PAGER}
 			The following files will be removed as part of updating to
 			${RELNUM}-p${RELPATCHNUM}:
 		EOF
 	fi
 	rm files.removed
 
 	# Report added files, if any
 	if [ -s files.added ]; then
 		cat - files.added <<- EOF | ${PAGER}
 			The following files will be added as part of updating to
 			${RELNUM}-p${RELPATCHNUM}:
 		EOF
 	fi
 	rm files.added
 
 	# Report updated files, if any
 	if [ -s files.updated ]; then
 		cat - files.updated <<- EOF | ${PAGER}
 			The following files will be updated as part of updating to
 			${RELNUM}-p${RELPATCHNUM}:
 		EOF
 	fi
 	rm files.updated
 
 	# Create a directory for the install manifest.
 	MDIR=`mktemp -d install.XXXXXX` || return 1
 
 	# Populate it
 	mv INDEX-PRESENT ${MDIR}/INDEX-OLD
 	mv INDEX-NEW ${MDIR}/INDEX-NEW
 
 	# Link it into place
 	ln -s ${MDIR} ${BDHASH}-install
 }
 
 # Warn about any upcoming EoL
 fetch_warn_eol () {
 	# What's the current time?
 	NOWTIME=`date "+%s"`
 
 	# When did we last warn about the EoL date?
 	if [ -f lasteolwarn ]; then
 		LASTWARN=`cat lasteolwarn`
 	else
 		LASTWARN=`expr ${NOWTIME} - 63072000`
 	fi
 
 	# If the EoL time is past, warn.
 	if [ ${EOLTIME} -lt ${NOWTIME} ]; then
 		echo
 		cat <<-EOF
 		WARNING: `uname -sr` HAS PASSED ITS END-OF-LIFE DATE.
 		Any security issues discovered after `date -r ${EOLTIME}`
 		will not have been corrected.
 		EOF
 		return 1
 	fi
 
 	# Figure out how long it has been since we last warned about the
 	# upcoming EoL, and how much longer we have left.
 	SINCEWARN=`expr ${NOWTIME} - ${LASTWARN}`
 	TIMELEFT=`expr ${EOLTIME} - ${NOWTIME}`
 
 	# Don't warn if the EoL is more than 3 months away
 	if [ ${TIMELEFT} -gt 7884000 ]; then
 		return 0
 	fi
 
 	# Don't warn if the time remaining is more than 3 times the time
 	# since the last warning.
 	if [ ${TIMELEFT} -gt `expr ${SINCEWARN} \* 3` ]; then
 		return 0
 	fi
 
 	# Figure out what time units to use.
 	if [ ${TIMELEFT} -lt 604800 ]; then
 		UNIT="day"
 		SIZE=86400
 	elif [ ${TIMELEFT} -lt 2678400 ]; then
 		UNIT="week"
 		SIZE=604800
 	else
 		UNIT="month"
 		SIZE=2678400
 	fi
 
 	# Compute the right number of units
 	NUM=`expr ${TIMELEFT} / ${SIZE}`
 	if [ ${NUM} != 1 ]; then
 		UNIT="${UNIT}s"
 	fi
 
 	# Print the warning
 	echo
 	cat <<-EOF
 		WARNING: `uname -sr` is approaching its End-of-Life date.
 		It is strongly recommended that you upgrade to a newer
 		release within the next ${NUM} ${UNIT}.
 	EOF
 
 	# Update the stored time of last warning
 	echo ${NOWTIME} > lasteolwarn
 }
 
 # Do the actual work involved in "fetch" / "cron".
 fetch_run () {
 	workdir_init || return 1
 
 	# Prepare the mirror list.
 	fetch_pick_server_init && fetch_pick_server
 
 	# Try to fetch the public key until we run out of servers.
 	while ! fetch_key; do
 		fetch_pick_server || return 1
 	done
 
 	# Try to fetch the metadata index signature ("tag") until we run
 	# out of available servers; and sanity check the downloaded tag.
 	while ! fetch_tag; do
 		fetch_pick_server || return 1
 	done
 	fetch_tagsanity || return 1
 
 	# Fetch the latest INDEX-NEW and INDEX-OLD files.
 	fetch_metadata INDEX-NEW INDEX-OLD || return 1
 
 	# Generate filtered INDEX-NEW and INDEX-OLD files containing only
 	# the lines which (a) belong to components we care about, and (b)
 	# don't correspond to paths we're explicitly ignoring.
 	fetch_filter_metadata INDEX-NEW || return 1
 	fetch_filter_metadata INDEX-OLD || return 1
 
 	# Translate /boot/${KERNCONF} into ${KERNELDIR}
 	fetch_filter_kernel_names INDEX-NEW ${KERNCONF}
 	fetch_filter_kernel_names INDEX-OLD ${KERNCONF}
 
 	# For all paths appearing in INDEX-OLD or INDEX-NEW, inspect the
 	# system and generate an INDEX-PRESENT file.
 	fetch_inspect_system INDEX-OLD INDEX-PRESENT INDEX-NEW || return 1
 
 	# Based on ${UPDATEIFUNMODIFIED}, remove lines from INDEX-* which
 	# correspond to lines in INDEX-PRESENT with hashes not appearing
 	# in INDEX-OLD or INDEX-NEW.  Also remove lines where the entry in
 	# INDEX-PRESENT has type - and there isn't a corresponding entry in
 	# INDEX-OLD with type -.
 	fetch_filter_unmodified_notpresent	\
 	    INDEX-OLD INDEX-PRESENT INDEX-NEW /dev/null
 
 	# For each entry in INDEX-PRESENT of type -, remove any corresponding
 	# entry from INDEX-NEW if ${ALLOWADD} != "yes".  Remove all entries
 	# of type - from INDEX-PRESENT.
 	fetch_filter_allowadd INDEX-PRESENT INDEX-NEW
 
 	# If ${ALLOWDELETE} != "yes", then remove any entries from
 	# INDEX-PRESENT which don't correspond to entries in INDEX-NEW.
 	fetch_filter_allowdelete INDEX-PRESENT INDEX-NEW
 
 	# If ${KEEPMODIFIEDMETADATA} == "yes", then for each entry in
 	# INDEX-PRESENT with metadata not matching any entry in INDEX-OLD,
 	# replace the corresponding line of INDEX-NEW with one having the
 	# same metadata as the entry in INDEX-PRESENT.
 	fetch_filter_modified_metadata INDEX-OLD INDEX-PRESENT INDEX-NEW
 
 	# Remove lines from INDEX-PRESENT and INDEX-NEW which are identical;
 	# no need to update a file if it isn't changing.
 	fetch_filter_uptodate INDEX-PRESENT INDEX-NEW
 
 	# Prepare to fetch files: Generate a list of the files we need,
 	# copy the unmodified files we have into /files/, and generate
 	# a list of patches to download.
 	fetch_files_prepare INDEX-OLD INDEX-PRESENT INDEX-NEW || return 1
 
 	# Fetch files.
 	fetch_files || return 1
 
 	# Create and populate install manifest directory; and report what
 	# updates are available.
 	fetch_create_manifest || return 1
 
 	# Warn about any upcoming EoL
 	fetch_warn_eol || return 1
 }
 
 # If StrictComponents is not "yes", generate a new components list
 # with only the components which appear to be installed.
 upgrade_guess_components () {
 	if [ "${STRICTCOMPONENTS}" = "no" ]; then
 		# Generate filtered INDEX-ALL with only the components listed
 		# in COMPONENTS.
 		fetch_filter_metadata_components $1 || return 1
 
 		# Tell the user why his disk is suddenly making lots of noise
 		echo -n "Inspecting system... "
 
 		# Look at the files on disk, and assume that a component is
 		# supposed to be present if it is more than half-present.
 		cut -f 1-3 -d '|' < INDEX-ALL |
 		    tr '|' ' ' |
 		    while read C S F; do
 			if [ -e ${BASEDIR}/${F} ]; then
 				echo "+ ${C}|${S}"
 			fi
 			echo "= ${C}|${S}"
 		    done |
 		    sort |
 		    uniq -c |
 		    sed -E 's,^ +,,' > compfreq
 		grep ' = ' compfreq |
 		    cut -f 1,3 -d ' ' |
 		    sort -k 2,2 -t ' ' > compfreq.total
 		grep ' + ' compfreq |
 		    cut -f 1,3 -d ' ' |
 		    sort -k 2,2 -t ' ' > compfreq.present
 		join -t ' ' -1 2 -2 2 compfreq.present compfreq.total |
 		    while read S P T; do
 			if [ ${T} -ne 0 -a ${P} -gt `expr ${T} / 2` ]; then
 				echo ${S}
 			fi
 		    done > comp.present
 		cut -f 2 -d ' ' < compfreq.total > comp.total
 		rm INDEX-ALL compfreq compfreq.total compfreq.present
 
 		# We're done making noise.
 		echo "done."
 
 		# Sometimes the kernel isn't installed where INDEX-ALL
 		# thinks that it should be: In particular, it is often in
 		# /boot/kernel instead of /boot/GENERIC or /boot/SMP.  To
 		# deal with this, if "kernel|X" is listed in comp.total
 		# (i.e., is a component which would be upgraded if it is
 		# found to be present) we will add it to comp.present.
 		# If "kernel|<anything>" is in comp.total but "kernel|X" is
 		# not, we print a warning -- the user is running a kernel
 		# which isn't part of the release.
 		KCOMP=`echo ${KERNCONF} | tr 'A-Z' 'a-z'`
 		grep -E "^kernel\|${KCOMP}\$" comp.total >> comp.present
 
 		if grep -qE "^kernel\|" comp.total &&
 		    ! grep -qE "^kernel\|${KCOMP}\$" comp.total; then
 			cat <<-EOF
 
 WARNING: This system is running a "${KCOMP}" kernel, which is not a
 kernel configuration distributed as part of FreeBSD ${RELNUM}.
 This kernel will not be updated: you MUST update the kernel manually
 before running "$0 install".
 			EOF
 		fi
 
 		# Re-sort the list of installed components and generate
 		# the list of non-installed components.
 		sort -u < comp.present > comp.present.tmp
 		mv comp.present.tmp comp.present
 		comm -13 comp.present comp.total > comp.absent
 
 		# Ask the user to confirm that what we have is correct.  To
 		# reduce user confusion, translate "X|Y" back to "X/Y" (as
 		# subcomponents must be listed in the configuration file).
 		echo
 		echo -n "The following components of FreeBSD "
 		echo "seem to be installed:"
 		tr '|' '/' < comp.present |
 		    fmt -72
 		echo
 		echo -n "The following components of FreeBSD "
 		echo "do not seem to be installed:"
 		tr '|' '/' < comp.absent |
 		    fmt -72
 		echo
 		continuep || return 1
 		echo
 
 		# Suck the generated list of components into ${COMPONENTS}.
 		# Note that comp.present.tmp is used due to issues with
 		# pipelines and setting variables.
 		COMPONENTS=""
 		tr '|' '/' < comp.present > comp.present.tmp
 		while read C; do
 			COMPONENTS="${COMPONENTS} ${C}"
 		done < comp.present.tmp
 
 		# Delete temporary files
 		rm comp.present comp.present.tmp comp.absent comp.total
 	fi
 }
 
 # If StrictComponents is not "yes", COMPONENTS contains an entry
 # corresponding to the currently running kernel, and said kernel
 # does not exist in the new release, add "kernel/generic" to the
 # list of components.
 upgrade_guess_new_kernel () {
 	if [ "${STRICTCOMPONENTS}" = "no" ]; then
 		# Grab the unfiltered metadata file.
 		METAHASH=`look "$1|" tINDEX.present | cut -f 2 -d '|'`
 		gunzip -c < files/${METAHASH}.gz > $1.all
 
 		# If "kernel/${KCOMP}" is in ${COMPONENTS} and that component
 		# isn't in $1.all, we need to add kernel/generic.
 		for C in ${COMPONENTS}; do
 			if [ ${C} = "kernel/${KCOMP}" ] &&
 			    ! grep -qE "^kernel\|${KCOMP}\|" $1.all; then
 				COMPONENTS="${COMPONENTS} kernel/generic"
 				NKERNCONF="GENERIC"
 				cat <<-EOF
 
 WARNING: This system is running a "${KCOMP}" kernel, which is not a
 kernel configuration distributed as part of FreeBSD ${RELNUM}.
 As part of upgrading to FreeBSD ${RELNUM}, this kernel will be
 replaced with a "generic" kernel.
 				EOF
 				continuep || return 1
 			fi
 		done
 
 		# Don't need this any more...
 		rm $1.all
 	fi
 }
 
 # Convert INDEX-OLD (last release) and INDEX-ALL (new release) into
 # INDEX-OLD and INDEX-NEW files (in the sense of normal upgrades).
 upgrade_oldall_to_oldnew () {
 	# For each ${F}|... which appears in INDEX-ALL but does not appear
 	# in INDEX-OLD, add ${F}|-|||||| to INDEX-OLD.
 	cut -f 1 -d '|' < $1 |
 	    sort -u > $1.paths
 	cut -f 1 -d '|' < $2 |
 	    sort -u |
 	    comm -13 $1.paths - |
 	    lam - -s "|-||||||" |
 	    sort - $1 > $1.tmp
 	mv $1.tmp $1
 
 	# Remove lines from INDEX-OLD which also appear in INDEX-ALL
 	comm -23 $1 $2 > $1.tmp
 	mv $1.tmp $1
 
 	# Remove lines from INDEX-ALL which have a file name not appearing
 	# anywhere in INDEX-OLD (since these must be files which haven't
 	# changed -- if they were new, there would be an entry of type "-").
 	cut -f 1 -d '|' < $1 |
 	    sort -u > $1.paths
 	sort -k 1,1 -t '|' < $2 |
 	    join -t '|' - $1.paths |
 	    sort > $2.tmp
 	rm $1.paths
 	mv $2.tmp $2
 
 	# Rename INDEX-ALL to INDEX-NEW.
 	mv $2 $3
 }
 
 # Helper for upgrade_merge: Return zero true iff the two files differ only
 # in the contents of their RCS tags.
 samef () {
 	X=`sed -E 's/\\$FreeBSD.*\\$/\$FreeBSD\$/' < $1 | ${SHA256}`
 	Y=`sed -E 's/\\$FreeBSD.*\\$/\$FreeBSD\$/' < $2 | ${SHA256}`
 
 	if [ $X = $Y ]; then
 		return 0;
 	else
 		return 1;
 	fi
 }
 
 # From the list of "old" files in $1, merge changes in $2 with those in $3,
 # and update $3 to reflect the hashes of merged files.
 upgrade_merge () {
 	# We only need to do anything if $1 is non-empty.
 	if [ -s $1 ]; then
 		cut -f 1 -d '|' $1 |
 		    sort > $1-paths
 
 		# Create staging area for merging files
 		rm -rf merge/
 		while read F; do
 			D=`dirname ${F}`
 			mkdir -p merge/old/${D}
 			mkdir -p merge/${OLDRELNUM}/${D}
 			mkdir -p merge/${RELNUM}/${D}
 			mkdir -p merge/new/${D}
 		done < $1-paths
 
 		# Copy in files
 		while read F; do
 			# Currently installed file
 			V=`look "${F}|" $2 | cut -f 7 -d '|'`
 			gunzip < files/${V}.gz > merge/old/${F}
 
 			# Old release
 			if look "${F}|" $1 | fgrep -q "|f|"; then
 				V=`look "${F}|" $1 | cut -f 3 -d '|'`
 				gunzip < files/${V}.gz		\
 				    > merge/${OLDRELNUM}/${F}
 			fi
 
 			# New release
 			if look "${F}|" $3 | cut -f 1,2,7 -d '|' |
 			    fgrep -q "|f|"; then
 				V=`look "${F}|" $3 | cut -f 7 -d '|'`
 				gunzip < files/${V}.gz		\
 				    > merge/${RELNUM}/${F}
 			fi
 		done < $1-paths
 
 		# Attempt to automatically merge changes
 		echo -n "Attempting to automatically merge "
 		echo -n "changes in files..."
 		: > failed.merges
 		while read F; do
 			# If the file doesn't exist in the new release,
 			# the result of "merging changes" is having the file
 			# not exist.
 			if ! [ -f merge/${RELNUM}/${F} ]; then
 				continue
 			fi
 
 			# If the file didn't exist in the old release, we're
 			# going to throw away the existing file and hope that
 			# the version from the new release is what we want.
 			if ! [ -f merge/${OLDRELNUM}/${F} ]; then
 				cp merge/${RELNUM}/${F} merge/new/${F}
 				continue
 			fi
 
 			# Some files need special treatment.
 			case ${F} in
 			/etc/spwd.db | /etc/pwd.db | /etc/login.conf.db)
 				# Don't merge these -- we're rebuild them
 				# after updates are installed.
 				cp merge/old/${F} merge/new/${F}
 				;;
 			*)
 				if ! diff3 -E -m -L "current version"	\
 				    -L "${OLDRELNUM}" -L "${RELNUM}"	\
 				    merge/old/${F}			\
 				    merge/${OLDRELNUM}/${F}		\
 				    merge/${RELNUM}/${F}		\
 				    > merge/new/${F} 2>/dev/null; then
 					echo ${F} >> failed.merges
 				fi
 				;;
 			esac
 		done < $1-paths
 		echo " done."
 
 		# Ask the user to handle any files which didn't merge.
 		while read F; do
 			# If the installed file differs from the version in
 			# the old release only due to RCS tag expansion
 			# then just use the version in the new release.
 			if samef merge/old/${F} merge/${OLDRELNUM}/${F}; then
 				cp merge/${RELNUM}/${F} merge/new/${F}
 				continue
 			fi
 
 			cat <<-EOF
 
 The following file could not be merged automatically: ${F}
 Press Enter to edit this file in ${EDITOR} and resolve the conflicts
 manually...
 			EOF
 			read dummy </dev/tty
 			${EDITOR} `pwd`/merge/new/${F} < /dev/tty
 		done < failed.merges
 		rm failed.merges
 
 		# Ask the user to confirm that he likes how the result
 		# of merging files.
 		while read F; do
 			# Skip files which haven't changed except possibly
 			# in their RCS tags.
 			if [ -f merge/old/${F} ] && [ -f merge/new/${F} ] &&
 			    samef merge/old/${F} merge/new/${F}; then
 				continue
 			fi
 
 			# Skip files where the installed file differs from
 			# the old file only due to RCS tags.
 			if [ -f merge/old/${F} ] &&
 			    [ -f merge/${OLDRELNUM}/${F} ] &&
 			    samef merge/old/${F} merge/${OLDRELNUM}/${F}; then
 				continue
 			fi
 
 			# Warn about files which are ceasing to exist.
 			if ! [ -f merge/new/${F} ]; then
 				cat <<-EOF
 
 The following file will be removed, as it no longer exists in
 FreeBSD ${RELNUM}: ${F}
 				EOF
 				continuep < /dev/tty || return 1
 				continue
 			fi
 
 			# Print changes for the user's approval.
 			cat <<-EOF
 
 The following changes, which occurred between FreeBSD ${OLDRELNUM} and
 FreeBSD ${RELNUM} have been merged into ${F}:
 EOF
 			diff -U 5 -L "current version" -L "new version"	\
 			    merge/old/${F} merge/new/${F} || true
 			continuep < /dev/tty || return 1
 		done < $1-paths
 
 		# Store merged files.
 		while read F; do
 			if [ -f merge/new/${F} ]; then
 				V=`${SHA256} -q merge/new/${F}`
 
 				gzip -c < merge/new/${F} > files/${V}.gz
 				echo "${F}|${V}"
 			fi
 		done < $1-paths > newhashes
 
 		# Pull lines out from $3 which need to be updated to
 		# reflect merged files.
 		while read F; do
 			look "${F}|" $3
 		done < $1-paths > $3-oldlines
 
 		# Update lines to reflect merged files
 		join -t '|' -o 1.1,1.2,1.3,1.4,1.5,1.6,2.2,1.8		\
 		    $3-oldlines newhashes > $3-newlines
 
 		# Remove old lines from $3 and add new lines.
 		sort $3-oldlines |
 		    comm -13 - $3 |
 		    sort - $3-newlines > $3.tmp
 		mv $3.tmp $3
 
 		# Clean up
 		rm $1-paths newhashes $3-oldlines $3-newlines
 		rm -rf merge/
 	fi
 
 	# We're done with merging files.
 	rm $1
 }
 
 # Do the work involved in fetching upgrades to a new release
 upgrade_run () {
 	workdir_init || return 1
 
 	# Prepare the mirror list.
 	fetch_pick_server_init && fetch_pick_server
 
 	# Try to fetch the public key until we run out of servers.
 	while ! fetch_key; do
 		fetch_pick_server || return 1
 	done
  
 	# Try to fetch the metadata index signature ("tag") until we run
 	# out of available servers; and sanity check the downloaded tag.
 	while ! fetch_tag; do
 		fetch_pick_server || return 1
 	done
 	fetch_tagsanity || return 1
 
 	# Fetch the INDEX-OLD and INDEX-ALL.
 	fetch_metadata INDEX-OLD INDEX-ALL || return 1
 
 	# If StrictComponents is not "yes", generate a new components list
 	# with only the components which appear to be installed.
 	upgrade_guess_components INDEX-ALL || return 1
 
 	# Generate filtered INDEX-OLD and INDEX-ALL files containing only
 	# the components we want and without anything marked as "Ignore".
 	fetch_filter_metadata INDEX-OLD || return 1
 	fetch_filter_metadata INDEX-ALL || return 1
 
 	# Merge the INDEX-OLD and INDEX-ALL files into INDEX-OLD.
 	sort INDEX-OLD INDEX-ALL > INDEX-OLD.tmp
 	mv INDEX-OLD.tmp INDEX-OLD
 	rm INDEX-ALL
 
 	# Adjust variables for fetching files from the new release.
 	OLDRELNUM=${RELNUM}
 	RELNUM=${TARGETRELEASE}
 	OLDFETCHDIR=${FETCHDIR}
 	FETCHDIR=${RELNUM}/${ARCH}
 
 	# Try to fetch the NEW metadata index signature ("tag") until we run
 	# out of available servers; and sanity check the downloaded tag.
 	while ! fetch_tag; do
 		fetch_pick_server || return 1
 	done
 
 	# Fetch the new INDEX-ALL.
 	fetch_metadata INDEX-ALL || return 1
 
 	# If StrictComponents is not "yes", COMPONENTS contains an entry
 	# corresponding to the currently running kernel, and said kernel
 	# does not exist in the new release, add "kernel/generic" to the
 	# list of components.
 	upgrade_guess_new_kernel INDEX-ALL || return 1
 
 	# Filter INDEX-ALL to contain only the components we want and without
 	# anything marked as "Ignore".
 	fetch_filter_metadata INDEX-ALL || return 1
 
 	# Convert INDEX-OLD (last release) and INDEX-ALL (new release) into
 	# INDEX-OLD and INDEX-NEW files (in the sense of normal upgrades).
 	upgrade_oldall_to_oldnew INDEX-OLD INDEX-ALL INDEX-NEW
 
 	# Translate /boot/${KERNCONF} or /boot/${NKERNCONF} into ${KERNELDIR}
 	fetch_filter_kernel_names INDEX-NEW ${NKERNCONF}
 	fetch_filter_kernel_names INDEX-OLD ${KERNCONF}
 
 	# For all paths appearing in INDEX-OLD or INDEX-NEW, inspect the
 	# system and generate an INDEX-PRESENT file.
 	fetch_inspect_system INDEX-OLD INDEX-PRESENT INDEX-NEW || return 1
 
 	# Based on ${MERGECHANGES}, generate a file tomerge-old with the
 	# paths and hashes of old versions of files to merge.
 	fetch_filter_mergechanges INDEX-OLD INDEX-PRESENT tomerge-old
 
 	# Based on ${UPDATEIFUNMODIFIED}, remove lines from INDEX-* which
 	# correspond to lines in INDEX-PRESENT with hashes not appearing
 	# in INDEX-OLD or INDEX-NEW.  Also remove lines where the entry in
 	# INDEX-PRESENT has type - and there isn't a corresponding entry in
 	# INDEX-OLD with type -.
 	fetch_filter_unmodified_notpresent	\
 	    INDEX-OLD INDEX-PRESENT INDEX-NEW tomerge-old
 
 	# For each entry in INDEX-PRESENT of type -, remove any corresponding
 	# entry from INDEX-NEW if ${ALLOWADD} != "yes".  Remove all entries
 	# of type - from INDEX-PRESENT.
 	fetch_filter_allowadd INDEX-PRESENT INDEX-NEW
 
 	# If ${ALLOWDELETE} != "yes", then remove any entries from
 	# INDEX-PRESENT which don't correspond to entries in INDEX-NEW.
 	fetch_filter_allowdelete INDEX-PRESENT INDEX-NEW
 
 	# If ${KEEPMODIFIEDMETADATA} == "yes", then for each entry in
 	# INDEX-PRESENT with metadata not matching any entry in INDEX-OLD,
 	# replace the corresponding line of INDEX-NEW with one having the
 	# same metadata as the entry in INDEX-PRESENT.
 	fetch_filter_modified_metadata INDEX-OLD INDEX-PRESENT INDEX-NEW
 
 	# Remove lines from INDEX-PRESENT and INDEX-NEW which are identical;
 	# no need to update a file if it isn't changing.
 	fetch_filter_uptodate INDEX-PRESENT INDEX-NEW
 
 	# Fetch "clean" files from the old release for merging changes.
 	fetch_files_premerge tomerge-old
 
 	# Prepare to fetch files: Generate a list of the files we need,
 	# copy the unmodified files we have into /files/, and generate
 	# a list of patches to download.
 	fetch_files_prepare INDEX-OLD INDEX-PRESENT INDEX-NEW || return 1
 
 	# Fetch patches from to-${RELNUM}/${ARCH}/bp/
 	PATCHDIR=to-${RELNUM}/${ARCH}/bp
 	fetch_files || return 1
 
 	# Merge configuration file changes.
 	upgrade_merge tomerge-old INDEX-PRESENT INDEX-NEW || return 1
 
 	# Create and populate install manifest directory; and report what
 	# updates are available.
 	fetch_create_manifest || return 1
 
 	# Leave a note behind to tell the "install" command that the kernel
 	# needs to be installed before the world.
 	touch ${BDHASH}-install/kernelfirst
 
 	# Remind the user that they need to run "freebsd-update install"
 	# to install the downloaded bits, in case they didn't RTFM.
 	echo "To install the downloaded upgrades, run \"$0 install\"."
 }
 
 # Make sure that all the file hashes mentioned in $@ have corresponding
 # gzipped files stored in /files/.
 install_verify () {
 	# Generate a list of hashes
 	cat $@ |
 	    cut -f 2,7 -d '|' |
 	    grep -E '^f' |
 	    cut -f 2 -d '|' |
 	    sort -u > filelist
 
 	# Make sure all the hashes exist
 	while read HASH; do
 		if ! [ -f files/${HASH}.gz ]; then
 			echo -n "Update files missing -- "
 			echo "this should never happen."
 			echo "Re-run '$0 fetch'."
 			return 1
 		fi
 	done < filelist
 
 	# Clean up
 	rm filelist
 }
 
 # Remove the system immutable flag from files
 install_unschg () {
 	# Generate file list
 	cat $@ |
 	    cut -f 1 -d '|' > filelist
 
 	# Remove flags
 	while read F; do
 		if ! [ -e ${BASEDIR}/${F} ]; then
 			continue
 		else
 			echo ${BASEDIR}/${F}
 		fi
 	done < filelist | xargs chflags noschg || return 1
 
 	# Clean up
 	rm filelist
 }
 
 # Decide which directory name to use for kernel backups.
 backup_kernel_finddir () {
 	CNT=0
 	while true ; do
 		# Pathname does not exist, so it is OK use that name
 		# for backup directory.
 		if [ ! -e $BASEDIR/$BACKUPKERNELDIR ]; then
 			return 0
 		fi
 
 		# If directory do exist, we only use if it has our
 		# marker file.
 		if [ -d $BASEDIR/$BACKUPKERNELDIR -a \
 			-e $BASEDIR/$BACKUPKERNELDIR/.freebsd-update ]; then
 			return 0
 		fi
 
 		# We could not use current directory name, so add counter to
 		# the end and try again.
 		CNT=$((CNT + 1))
 		if [ $CNT -gt 9 ]; then
 			echo "Could not find valid backup dir ($BASEDIR/$BACKUPKERNELDIR)"
 			exit 1
 		fi
 		BACKUPKERNELDIR="`echo $BACKUPKERNELDIR | sed -Ee 's/[0-9]\$//'`"
 		BACKUPKERNELDIR="${BACKUPKERNELDIR}${CNT}"
 	done
 }
 
 # Backup the current kernel using hardlinks, if not disabled by user.
 # Since we delete all files in the directory used for previous backups
 # we create a marker file called ".freebsd-update" in the directory so
 # we can determine on the next run that the directory was created by
 # freebsd-update and we then do not accidentally remove user files in
 # the unlikely case that the user has created a directory with a
 # conflicting name.
 backup_kernel () {
 	# Only make kernel backup is so configured.
 	if [ $BACKUPKERNEL != yes ]; then
 		return 0
 	fi
 
 	# Decide which directory name to use for kernel backups.
 	backup_kernel_finddir
 
 	# Remove old kernel backup files.  If $BACKUPKERNELDIR was
 	# "not ours", backup_kernel_finddir would have exited, so
 	# deleting the directory content is as safe as we can make it.
 	if [ -d $BASEDIR/$BACKUPKERNELDIR ]; then
 		rm -fr $BASEDIR/$BACKUPKERNELDIR
 	fi
 
 	# Create directories for backup.
 	mkdir -p $BASEDIR/$BACKUPKERNELDIR
 	mtree -cdn -p "${BASEDIR}/${KERNELDIR}" | \
 	    mtree -Ue -p "${BASEDIR}/${BACKUPKERNELDIR}" > /dev/null
 
 	# Mark the directory as having been created by freebsd-update.
 	touch $BASEDIR/$BACKUPKERNELDIR/.freebsd-update
 	if [ $? -ne 0 ]; then
 		echo "Could not create kernel backup directory"
 		exit 1
 	fi
 
 	# Disable pathname expansion to be sure *.symbols is not
 	# expanded.
 	set -f
 
 	# Use find to ignore symbol files, unless disabled by user.
 	if [ $BACKUPKERNELSYMBOLFILES = yes ]; then
 		FINDFILTER=""
 	else
 		FINDFILTER="-a ! -name *.debug -a ! -name *.symbols"
 	fi
 
 	# Backup all the kernel files using hardlinks.
 	(cd ${BASEDIR}/${KERNELDIR} && find . -type f $FINDFILTER -exec \
 	    cp -pl '{}' ${BASEDIR}/${BACKUPKERNELDIR}/'{}' \;)
 
 	# Re-enable patchname expansion.
 	set +f
 }
 
 # Install new files
 install_from_index () {
 	# First pass: Do everything apart from setting file flags.  We
 	# can't set flags yet, because schg inhibits hard linking.
 	sort -k 1,1 -t '|' $1 |
 	    tr '|' ' ' |
 	    while read FPATH TYPE OWNER GROUP PERM FLAGS HASH LINK; do
 		case ${TYPE} in
 		d)
 			# Create a directory
 			install -d -o ${OWNER} -g ${GROUP}		\
 			    -m ${PERM} ${BASEDIR}/${FPATH}
 			;;
 		f)
 			if [ -z "${LINK}" ]; then
 				# Create a file, without setting flags.
 				gunzip < files/${HASH}.gz > ${HASH}
 				install -S -o ${OWNER} -g ${GROUP}	\
 				    -m ${PERM} ${HASH} ${BASEDIR}/${FPATH}
 				rm ${HASH}
 			else
 				# Create a hard link.
 				ln -f ${BASEDIR}/${LINK} ${BASEDIR}/${FPATH}
 			fi
 			;;
 		L)
 			# Create a symlink
 			ln -sfh ${HASH} ${BASEDIR}/${FPATH}
 			;;
 		esac
 	    done
 
 	# Perform a second pass, adding file flags.
 	tr '|' ' ' < $1 |
 	    while read FPATH TYPE OWNER GROUP PERM FLAGS HASH LINK; do
 		if [ ${TYPE} = "f" ] &&
 		    ! [ ${FLAGS} = "0" ]; then
 			chflags ${FLAGS} ${BASEDIR}/${FPATH}
 		fi
 	    done
 }
 
 # Remove files which we want to delete
 install_delete () {
 	# Generate list of new files
 	cut -f 1 -d '|' < $2 |
 	    sort > newfiles
 
 	# Generate subindex of old files we want to nuke
 	sort -k 1,1 -t '|' $1 |
 	    join -t '|' -v 1 - newfiles |
 	    sort -r -k 1,1 -t '|' |
 	    cut -f 1,2 -d '|' |
 	    tr '|' ' ' > killfiles
 
 	# Remove the offending bits
 	while read FPATH TYPE; do
 		case ${TYPE} in
 		d)
 			rmdir ${BASEDIR}/${FPATH}
 			;;
 		f)
 			rm ${BASEDIR}/${FPATH}
 			;;
 		L)
 			rm ${BASEDIR}/${FPATH}
 			;;
 		esac
 	done < killfiles
 
 	# Clean up
 	rm newfiles killfiles
 }
 
 # Install new files, delete old files, and update linker.hints
 install_files () {
 	# If we haven't already dealt with the kernel, deal with it.
 	if ! [ -f $1/kerneldone ]; then
 		grep -E '^/boot/' $1/INDEX-OLD > INDEX-OLD
 		grep -E '^/boot/' $1/INDEX-NEW > INDEX-NEW
 
 		# Backup current kernel before installing a new one
 		backup_kernel || return 1
 
 		# Install new files
 		install_from_index INDEX-NEW || return 1
 
 		# Remove files which need to be deleted
 		install_delete INDEX-OLD INDEX-NEW || return 1
 
 		# Update linker.hints if necessary
 		if [ -s INDEX-OLD -o -s INDEX-NEW ]; then
 			kldxref -R ${BASEDIR}/boot/ 2>/dev/null
 		fi
 
 		# We've finished updating the kernel.
 		touch $1/kerneldone
 
 		# Do we need to ask for a reboot now?
 		if [ -f $1/kernelfirst ] &&
 		    [ -s INDEX-OLD -o -s INDEX-NEW ]; then
 			cat <<-EOF
 
 Kernel updates have been installed.  Please reboot and run
 "$0 install" again to finish installing updates.
 			EOF
 			exit 0
 		fi
 	fi
 
 	# If we haven't already dealt with the world, deal with it.
 	if ! [ -f $1/worlddone ]; then
 		# Create any necessary directories first
 		grep -vE '^/boot/' $1/INDEX-NEW |
 		    grep -E '^[^|]+\|d\|' > INDEX-NEW
 		install_from_index INDEX-NEW || return 1
 
 		# Install new runtime linker
 		grep -vE '^/boot/' $1/INDEX-NEW |
 		    grep -vE '^[^|]+\|d\|' |
 		    grep -E '^/libexec/ld-elf[^|]*\.so\.[0-9]+\|' > INDEX-NEW
 		install_from_index INDEX-NEW || return 1
 
 		# Install new shared libraries next
 		grep -vE '^/boot/' $1/INDEX-NEW |
 		    grep -vE '^[^|]+\|d\|' |
 		    grep -vE '^/libexec/ld-elf[^|]*\.so\.[0-9]+\|' |
 		    grep -E '^[^|]*/lib/[^|]*\.so\.[0-9]+\|' > INDEX-NEW
 		install_from_index INDEX-NEW || return 1
 
 		# Deal with everything else
 		grep -vE '^/boot/' $1/INDEX-OLD |
 		    grep -vE '^[^|]+\|d\|' |
 		    grep -vE '^/libexec/ld-elf[^|]*\.so\.[0-9]+\|' |
 		    grep -vE '^[^|]*/lib/[^|]*\.so\.[0-9]+\|' > INDEX-OLD
 		grep -vE '^/boot/' $1/INDEX-NEW |
 		    grep -vE '^[^|]+\|d\|' |
 		    grep -vE '^/libexec/ld-elf[^|]*\.so\.[0-9]+\|' |
 		    grep -vE '^[^|]*/lib/[^|]*\.so\.[0-9]+\|' > INDEX-NEW
 		install_from_index INDEX-NEW || return 1
 		install_delete INDEX-OLD INDEX-NEW || return 1
 
 		# Rebuild generated pwd files.
 		if [ ${BASEDIR}/etc/master.passwd -nt ${BASEDIR}/etc/spwd.db ] ||
 		    [ ${BASEDIR}/etc/master.passwd -nt ${BASEDIR}/etc/pwd.db ] ||
 		    [ ${BASEDIR}/etc/master.passwd -nt ${BASEDIR}/etc/passwd ]; then
 			pwd_mkdb -d ${BASEDIR}/etc -p ${BASEDIR}/etc/master.passwd
 		fi
 
 		# Rebuild /etc/login.conf.db if necessary.
 		if [ ${BASEDIR}/etc/login.conf -nt ${BASEDIR}/etc/login.conf.db ]; then
 			cap_mkdb ${BASEDIR}/etc/login.conf
 		fi
 
 		# Rebuild man page databases, if necessary.
 		for D in /usr/share/man /usr/share/openssl/man; do
 			if [ ! -d ${BASEDIR}/$D ]; then
 				continue
 			fi
 			if [ -z "$(find ${BASEDIR}/$D -type f -newer ${BASEDIR}/$D/mandoc.db)" ]; then
 				continue;
 			fi
 			makewhatis ${BASEDIR}/$D
 		done
 
 		# We've finished installing the world and deleting old files
 		# which are not shared libraries.
 		touch $1/worlddone
 
 		# Do we need to ask the user to portupgrade now?
 		grep -vE '^/boot/' $1/INDEX-NEW |
 		    grep -E '^[^|]*/lib/[^|]*\.so\.[0-9]+\|' |
 		    cut -f 1 -d '|' |
 		    sort > newfiles
 		if grep -vE '^/boot/' $1/INDEX-OLD |
 		    grep -E '^[^|]*/lib/[^|]*\.so\.[0-9]+\|' |
 		    cut -f 1 -d '|' |
 		    sort |
 		    join -v 1 - newfiles |
 		    grep -q .; then
 			cat <<-EOF
 
 Completing this upgrade requires removing old shared object files.
 Please rebuild all installed 3rd party software (e.g., programs
 installed from the ports tree) and then run "$0 install"
 again to finish installing updates.
 			EOF
 			rm newfiles
 			exit 0
 		fi
 		rm newfiles
 	fi
 
 	# Remove old shared libraries
 	grep -vE '^/boot/' $1/INDEX-NEW |
 	    grep -vE '^[^|]+\|d\|' |
 	    grep -E '^[^|]*/lib/[^|]*\.so\.[0-9]+\|' > INDEX-NEW
 	grep -vE '^/boot/' $1/INDEX-OLD |
 	    grep -vE '^[^|]+\|d\|' |
 	    grep -E '^[^|]*/lib/[^|]*\.so\.[0-9]+\|' > INDEX-OLD
 	install_delete INDEX-OLD INDEX-NEW || return 1
 
 	# Remove old directories
 	grep -vE '^/boot/' $1/INDEX-NEW |
 	    grep -E '^[^|]+\|d\|' > INDEX-NEW
 	grep -vE '^/boot/' $1/INDEX-OLD |
 	    grep -E '^[^|]+\|d\|' > INDEX-OLD
 	install_delete INDEX-OLD INDEX-NEW || return 1
 
 	# Remove temporary files
 	rm INDEX-OLD INDEX-NEW
 }
 
 # Rearrange bits to allow the installed updates to be rolled back
 install_setup_rollback () {
 	# Remove the "reboot after installing kernel", "kernel updated", and
 	# "finished installing the world" flags if present -- they are
 	# irrelevant when rolling back updates.
 	if [ -f ${BDHASH}-install/kernelfirst ]; then
 		rm ${BDHASH}-install/kernelfirst
 		rm ${BDHASH}-install/kerneldone
 	fi
 	if [ -f ${BDHASH}-install/worlddone ]; then
 		rm ${BDHASH}-install/worlddone
 	fi
 
 	if [ -L ${BDHASH}-rollback ]; then
 		mv ${BDHASH}-rollback ${BDHASH}-install/rollback
 	fi
 
 	mv ${BDHASH}-install ${BDHASH}-rollback
 }
 
 # Actually install updates
 install_run () {
 	echo -n "Installing updates..."
 
 	# Make sure we have all the files we should have
 	install_verify ${BDHASH}-install/INDEX-OLD	\
 	    ${BDHASH}-install/INDEX-NEW || return 1
 
 	# Remove system immutable flag from files
 	install_unschg ${BDHASH}-install/INDEX-OLD	\
 	    ${BDHASH}-install/INDEX-NEW || return 1
 
 	# Install new files, delete old files, and update linker.hints
 	install_files ${BDHASH}-install || return 1
 
 	# Rearrange bits to allow the installed updates to be rolled back
 	install_setup_rollback
 
 	echo " done."
 }
 
 # Rearrange bits to allow the previous set of updates to be rolled back next.
 rollback_setup_rollback () {
 	if [ -L ${BDHASH}-rollback/rollback ]; then
 		mv ${BDHASH}-rollback/rollback rollback-tmp
 		rm -r ${BDHASH}-rollback/
 		rm ${BDHASH}-rollback
 		mv rollback-tmp ${BDHASH}-rollback
 	else
 		rm -r ${BDHASH}-rollback/
 		rm ${BDHASH}-rollback
 	fi
 }
 
 # Install old files, delete new files, and update linker.hints
 rollback_files () {
 	# Install old shared library files which don't have the same path as
 	# a new shared library file.
 	grep -vE '^/boot/' $1/INDEX-NEW |
 	    grep -E '/lib/.*\.so\.[0-9]+\|' |
 	    cut -f 1 -d '|' |
 	    sort > INDEX-NEW.libs.flist
 	grep -vE '^/boot/' $1/INDEX-OLD |
 	    grep -E '/lib/.*\.so\.[0-9]+\|' |
 	    sort -k 1,1 -t '|' - |
 	    join -t '|' -v 1 - INDEX-NEW.libs.flist > INDEX-OLD
 	install_from_index INDEX-OLD || return 1
 
 	# Deal with files which are neither kernel nor shared library
 	grep -vE '^/boot/' $1/INDEX-OLD |
 	    grep -vE '/lib/.*\.so\.[0-9]+\|' > INDEX-OLD
 	grep -vE '^/boot/' $1/INDEX-NEW |
 	    grep -vE '/lib/.*\.so\.[0-9]+\|' > INDEX-NEW
 	install_from_index INDEX-OLD || return 1
 	install_delete INDEX-NEW INDEX-OLD || return 1
 
 	# Install any old shared library files which we didn't install above.
 	grep -vE '^/boot/' $1/INDEX-OLD |
 	    grep -E '/lib/.*\.so\.[0-9]+\|' |
 	    sort -k 1,1 -t '|' - |
 	    join -t '|' - INDEX-NEW.libs.flist > INDEX-OLD
 	install_from_index INDEX-OLD || return 1
 
 	# Delete unneeded shared library files
 	grep -vE '^/boot/' $1/INDEX-OLD |
 	    grep -E '/lib/.*\.so\.[0-9]+\|' > INDEX-OLD
 	grep -vE '^/boot/' $1/INDEX-NEW |
 	    grep -E '/lib/.*\.so\.[0-9]+\|' > INDEX-NEW
 	install_delete INDEX-NEW INDEX-OLD || return 1
 
 	# Deal with kernel files
 	grep -E '^/boot/' $1/INDEX-OLD > INDEX-OLD
 	grep -E '^/boot/' $1/INDEX-NEW > INDEX-NEW
 	install_from_index INDEX-OLD || return 1
 	install_delete INDEX-NEW INDEX-OLD || return 1
 	if [ -s INDEX-OLD -o -s INDEX-NEW ]; then
 		kldxref -R /boot/ 2>/dev/null
 	fi
 
 	# Remove temporary files
 	rm INDEX-OLD INDEX-NEW INDEX-NEW.libs.flist
 }
 
 # Actually rollback updates
 rollback_run () {
 	echo -n "Uninstalling updates..."
 
 	# If there are updates waiting to be installed, remove them; we
 	# want the user to re-run 'fetch' after rolling back updates.
 	if [ -L ${BDHASH}-install ]; then
 		rm -r ${BDHASH}-install/
 		rm ${BDHASH}-install
 	fi
 
 	# Make sure we have all the files we should have
 	install_verify ${BDHASH}-rollback/INDEX-NEW	\
 	    ${BDHASH}-rollback/INDEX-OLD || return 1
 
 	# Remove system immutable flag from files
 	install_unschg ${BDHASH}-rollback/INDEX-NEW	\
 	    ${BDHASH}-rollback/INDEX-OLD || return 1
 
 	# Install old files, delete new files, and update linker.hints
 	rollback_files ${BDHASH}-rollback || return 1
 
 	# Remove the rollback directory and the symlink pointing to it; and
 	# rearrange bits to allow the previous set of updates to be rolled
 	# back next.
 	rollback_setup_rollback
 
 	echo " done."
 }
 
 # Compare INDEX-ALL and INDEX-PRESENT and print warnings about differences.
 IDS_compare () {
 	# Get all the lines which mismatch in something other than file
 	# flags.  We ignore file flags because sysinstall doesn't seem to
 	# set them when it installs FreeBSD; warning about these adds a
 	# very large amount of noise.
 	cut -f 1-5,7-8 -d '|' $1 > $1.noflags
 	sort -k 1,1 -t '|' $1.noflags > $1.sorted
 	cut -f 1-5,7-8 -d '|' $2 |
 	    comm -13 $1.noflags - |
 	    fgrep -v '|-|||||' |
 	    sort -k 1,1 -t '|' |
 	    join -t '|' $1.sorted - > INDEX-NOTMATCHING
 
 	# Ignore files which match IDSIGNOREPATHS.
 	for X in ${IDSIGNOREPATHS}; do
 		grep -E "^${X}" INDEX-NOTMATCHING
 	done |
 	    sort -u |
 	    comm -13 - INDEX-NOTMATCHING > INDEX-NOTMATCHING.tmp
 	mv INDEX-NOTMATCHING.tmp INDEX-NOTMATCHING
 
 	# Go through the lines and print warnings.
 	local IFS='|'
 	while read FPATH TYPE OWNER GROUP PERM HASH LINK P_TYPE P_OWNER P_GROUP P_PERM P_HASH P_LINK; do
 		# Warn about different object types.
 		if ! [ "${TYPE}" = "${P_TYPE}" ]; then
 			echo -n "${FPATH} is a "
 			case "${P_TYPE}" in
 			f)	echo -n "regular file, "
 				;;
 			d)	echo -n "directory, "
 				;;
 			L)	echo -n "symlink, "
 				;;
 			esac
 			echo -n "but should be a "
 			case "${TYPE}" in
 			f)	echo -n "regular file."
 				;;
 			d)	echo -n "directory."
 				;;
 			L)	echo -n "symlink."
 				;;
 			esac
 			echo
 
 			# Skip other tests, since they don't make sense if
 			# we're comparing different object types.
 			continue
 		fi
 
 		# Warn about different owners.
 		if ! [ "${OWNER}" = "${P_OWNER}" ]; then
 			echo -n "${FPATH} is owned by user id ${P_OWNER}, "
 			echo "but should be owned by user id ${OWNER}."
 		fi
 
 		# Warn about different groups.
 		if ! [ "${GROUP}" = "${P_GROUP}" ]; then
 			echo -n "${FPATH} is owned by group id ${P_GROUP}, "
 			echo "but should be owned by group id ${GROUP}."
 		fi
 
 		# Warn about different permissions.  We do not warn about
 		# different permissions on symlinks, since some archivers
 		# don't extract symlink permissions correctly and they are
 		# ignored anyway.
 		if ! [ "${PERM}" = "${P_PERM}" ] &&
 		    ! [ "${TYPE}" = "L" ]; then
 			echo -n "${FPATH} has ${P_PERM} permissions, "
 			echo "but should have ${PERM} permissions."
 		fi
 
 		# Warn about different file hashes / symlink destinations.
 		if ! [ "${HASH}" = "${P_HASH}" ]; then
 			if [ "${TYPE}" = "L" ]; then
 				echo -n "${FPATH} is a symlink to ${P_HASH}, "
 				echo "but should be a symlink to ${HASH}."
 			fi
 			if [ "${TYPE}" = "f" ]; then
 				echo -n "${FPATH} has SHA256 hash ${P_HASH}, "
 				echo "but should have SHA256 hash ${HASH}."
 			fi
 		fi
 
 		# We don't warn about different hard links, since some
 		# some archivers break hard links, and as long as the
 		# underlying data is correct they really don't matter.
 	done < INDEX-NOTMATCHING
 
 	# Clean up
 	rm $1 $1.noflags $1.sorted $2 INDEX-NOTMATCHING
 }
 
 # Do the work involved in comparing the system to a "known good" index
 IDS_run () {
 	workdir_init || return 1
 
 	# Prepare the mirror list.
 	fetch_pick_server_init && fetch_pick_server
 
 	# Try to fetch the public key until we run out of servers.
 	while ! fetch_key; do
 		fetch_pick_server || return 1
 	done
  
 	# Try to fetch the metadata index signature ("tag") until we run
 	# out of available servers; and sanity check the downloaded tag.
 	while ! fetch_tag; do
 		fetch_pick_server || return 1
 	done
 	fetch_tagsanity || return 1
 
 	# Fetch INDEX-OLD and INDEX-ALL.
 	fetch_metadata INDEX-OLD INDEX-ALL || return 1
 
 	# Generate filtered INDEX-OLD and INDEX-ALL files containing only
 	# the components we want and without anything marked as "Ignore".
 	fetch_filter_metadata INDEX-OLD || return 1
 	fetch_filter_metadata INDEX-ALL || return 1
 
 	# Merge the INDEX-OLD and INDEX-ALL files into INDEX-ALL.
 	sort INDEX-OLD INDEX-ALL > INDEX-ALL.tmp
 	mv INDEX-ALL.tmp INDEX-ALL
 	rm INDEX-OLD
 
 	# Translate /boot/${KERNCONF} to ${KERNELDIR}
 	fetch_filter_kernel_names INDEX-ALL ${KERNCONF}
 
 	# Inspect the system and generate an INDEX-PRESENT file.
 	fetch_inspect_system INDEX-ALL INDEX-PRESENT /dev/null || return 1
 
 	# Compare INDEX-ALL and INDEX-PRESENT and print warnings about any
 	# differences.
 	IDS_compare INDEX-ALL INDEX-PRESENT
 }
 
 #### Main functions -- call parameter-handling and core functions
 
 # Using the command line, configuration file, and defaults,
 # set all the parameters which are needed later.
 get_params () {
 	init_params
 	parse_cmdline $@
 	parse_conffile
 	default_params
 }
 
 # Fetch command.  Make sure that we're being called
 # interactively, then run fetch_check_params and fetch_run
 cmd_fetch () {
 	if [ ! -t 0 -a $NOTTYOK -eq 0 ]; then
 		echo -n "`basename $0` fetch should not "
 		echo "be run non-interactively."
 		echo "Run `basename $0` cron instead."
 		exit 1
 	fi
 	fetch_check_params
 	fetch_run || exit 1
 	ISFETCHED=1
 }
 
 # Cron command.  Make sure the parameters are sensible; wait
 # rand(3600) seconds; then fetch updates.  While fetching updates,
 # send output to a temporary file; only print that file if the
 # fetching failed.
 cmd_cron () {
 	fetch_check_params
 	sleep `jot -r 1 0 3600`
 
 	TMPFILE=`mktemp /tmp/freebsd-update.XXXXXX` || exit 1
 	if ! fetch_run >> ${TMPFILE} ||
 	    ! grep -q "No updates needed" ${TMPFILE} ||
 	    [ ${VERBOSELEVEL} = "debug" ]; then
 		mail -s "`hostname` security updates" ${MAILTO} < ${TMPFILE}
 	fi
 
 	rm ${TMPFILE}
 }
 
 # Fetch files for upgrading to a new release.
 cmd_upgrade () {
 	upgrade_check_params
 	upgrade_run || exit 1
 }
 
 # Install downloaded updates.
 cmd_install () {
 	install_check_params
 	install_run || exit 1
 }
 
 # Rollback most recently installed updates.
 cmd_rollback () {
 	rollback_check_params
 	rollback_run || exit 1
 }
 
 # Compare system against a "known good" index.
 cmd_IDS () {
 	IDS_check_params
 	IDS_run || exit 1
 }
 
 #### Entry point
 
 # Make sure we find utilities from the base system
 export PATH=/sbin:/bin:/usr/sbin:/usr/bin:${PATH}
 
 # Set a pager if the user doesn't
 if [ -z "$PAGER" ]; then
 	PAGER=/usr/bin/less
 fi
 
 # Set LC_ALL in order to avoid problems with character ranges like [A-Z].
 export LC_ALL=C
 
 get_params $@
 for COMMAND in ${COMMANDS}; do
 	cmd_${COMMAND}
 done
Index: projects/clang900-import/usr.sbin/newsyslog/newsyslog.conf
===================================================================
--- projects/clang900-import/usr.sbin/newsyslog/newsyslog.conf	(revision 352536)
+++ projects/clang900-import/usr.sbin/newsyslog/newsyslog.conf	(revision 352537)
@@ -1,35 +1,36 @@
 # configuration file for newsyslog
 # $FreeBSD$
 #
 # Entries which do not specify the '/pid_file' field will cause the
 # syslogd process to be signalled when that log file is rotated.  This
 # action is only appropriate for log files which are written to by the
 # syslogd process (ie, files listed in /etc/syslog.conf).  If there
 # is no process which needs to be signalled when a given log file is
 # rotated, then the entry for that file should include the 'N' flag.
 #
 # Note: some sites will want to select more restrictive protections than the
 # defaults.  In particular, it may be desirable to switch many of the 644
 # entries to 640 or 600.  For example, some sites will consider the
 # contents of maillog, messages, and lpd-errs to be confidential.  In the
 # future, these defaults may change to more conservative ones.
 #
 # logfilename          [owner:group]    mode count size when  flags [/pid_file] [sig_num]
 /var/log/all.log			600  7	   *	@T00  J
 /var/log/auth.log			600  7     1000 @0101T JC
 /var/log/console.log			600  5	   1000	*     J
 /var/log/cron				600  3	   1000	*     JC
 /var/log/daily.log			640  7	   *	@T00  JN
 /var/log/debug.log			600  7     1000 *     JC
 /var/log/init.log			644  3	   1000	*     J
 /var/log/kerberos.log			600  7	   1000	*     J
 /var/log/maillog			640  7	   *	@T00  JC
 /var/log/messages			644  5	   1000	@0101T JC
 /var/log/monthly.log			640  12	   *	$M1D0 JN
 /var/log/devd.log			644  3	   1000	*     JC
 /var/log/security			600  10	   1000	*     JC
 /var/log/utx.log			644  3	   *	@01T05 B
 /var/log/weekly.log			640  5	   *	$W6D0 JN
+/var/log/daemon.log			644  5	   1000	@0101T JC
 
 <include> /etc/newsyslog.conf.d/[!.]*.conf
 <include> /usr/local/etc/newsyslog.conf.d/[!.]*.conf
Index: projects/clang900-import/usr.sbin/ntp/libntp/Makefile
===================================================================
--- projects/clang900-import/usr.sbin/ntp/libntp/Makefile	(revision 352536)
+++ projects/clang900-import/usr.sbin/ntp/libntp/Makefile	(revision 352537)
@@ -1,93 +1,93 @@
 # $FreeBSD$
 
 .PATH: ${SRCTOP}/contrib/ntp/libntp \
 	${SRCTOP}/contrib/ntp/lib/isc \
 	${SRCTOP}/contrib/ntp/lib/isc/nls \
 	${SRCTOP}/contrib/ntp/lib/isc/pthreads \
 	${SRCTOP}/contrib/ntp/lib/isc/unix \
 
 LIB= ntp
 INTERNALLIB=
 
 NTP_SRCS= systime.c	a_md5encrypt.c	adjtime.c	atoint.c \
 	atolfp.c	atouint.c	audio.c	authkeys.c \
 	authreadkeys.c	authusekey.c	bsd_strerror.c	buftvtots.c \
 	caljulian.c	caltontp.c	calyearstart.c	clocktime.c \
 	clocktypes.c	decodenetnum.c	dofptoa.c	dolfptoa.c \
 	emalloc.c	findconfig.c	getopt.c	hextoint.c \
 	hextolfp.c	humandate.c	icom.c		iosignal.c \
 	is_ip_address.c \
 	lib_strbuf.c	\
 	libssl_compat.c \
 	machines.c	mktime.c	modetoa.c \
 	mstolfp.c	msyslog.c	netof.c		ntp_calendar.c \
 	ntp_crypto_rnd.c		ntp_intres.c	ntp_libopts.c \
 	ntp_lineedit.c	ntp_random.c	ntp_rfc2553.c	ntp_worker.c \
 	numtoa.c	numtohost.c	octtoint.c	prettydate.c \
 	recvbuff.c	refidsmear.c	\
 	refnumtoa.c	snprintf.c	socket.c \
 	socktoa.c	socktohost.c	ssl_init.c	statestr.c \
 	strdup.c	strl_obsd.c	syssignal.c	timetoa.c \
 	timevalops.c	uglydate.c	vint64ops.c	work_fork.c \
 	work_thread.c	xsbprintf.c	ymd2yd.c
 
 ISC_PTHREADS_SRCS= condition.c \
 	thread.c \
 	mutex.c
 
 ISC_UNIX_SRCS= dir.c \
 	errno2result.c \
 	file.c \
 	interfaceiter.c \
 	net.c \
 	stdio.c \
 	stdtime.c \
 	strerror.c \
 	time.c \
 	tsmemcmp.c
 
 ISC_NLS_SRCS= msgcat.c
 
 ISC_SRCS= assertions.c \
 	buffer.c \
 	backtrace-emptytbl.c \
 	backtrace.c \
 	error.c \
 	event.c \
 	inet_ntop.c \
 	inet_pton.c \
 	lib.c \
 	log.c \
 	md5.c \
 	netaddr.c \
 	netscope.c \
 	ondestroy.c \
 	random.c \
 	result.c \
 	task.c \
 	sha1.c \
 	sockaddr.c \
 	${ISC_NLS_SRCS} \
 	${ISC_PTHREADS_SRCS} \
 	${ISC_UNIX_SRCS}
 
 SRCS=	${NTP_SRCS} ${ISC_SRCS} version.c
 
 CFLAGS+= -I${SRCTOP}/contrib/ntp/include \
 	-I${SRCTOP}/contrib/ntp/lib/isc/include \
 	-I${SRCTOP}/contrib/ntp/lib/isc/unix/include \
 	-I${SRCTOP}/contrib/ntp/lib/isc/pthreads/include \
 	-I${SRCTOP}/contrib/ntp/sntp/libopts \
 	-I${SRCTOP}/lib/libc/${MACHINE_ARCH} \
 	-I${SYSROOT:U${DESTDIR}}/${INCLUDEDIR}/edit \
 	-I${.CURDIR:H} \
 	-I${.CURDIR}/
 
-CFLAGS+= -DHAVE_BSD_NICE -DHAVE_STDINT_H
+CFLAGS+= -DHAVE_BSD_NICE -DHAVE_STDINT_H -DHAVE_CLOSEFROM
 
 CLEANFILES+= .version version.c
 
 version.c:
 	sh -e ${.CURDIR:H}/scripts/mkver ntpd
 
 .include <bsd.lib.mk>
Index: projects/clang900-import/usr.sbin/pkg/Makefile
===================================================================
--- projects/clang900-import/usr.sbin/pkg/Makefile	(revision 352536)
+++ projects/clang900-import/usr.sbin/pkg/Makefile	(revision 352537)
@@ -1,16 +1,26 @@
 # $FreeBSD$
 
+.if ${MACHINE} != "amd64" && ${MACHINE} != "i386"
+PKGCONFBRANCH?=	quarterly
+.else
+_BRANCH!=	${MAKE} -C ${SRCTOP}/release -V BRANCH
+BRANCH?=	${_BRANCH}
+. if ${BRANCH:MBETA*} || ${BRANCH:MRC*} || ${BRANCH:MRELEASE*}
+PKGCONFBRANCH?=	quarterly
+. else
 PKGCONFBRANCH?=	latest
+. endif
+.endif
 CONFS=		FreeBSD.conf.${PKGCONFBRANCH}
 CONFSNAME=	FreeBSD.conf
 CONFSDIR=	/etc/pkg
 CONFSMODE=	644
 PROG=	pkg
 SRCS=	pkg.c dns_utils.c config.c
 MAN=	pkg.7
 
 CFLAGS+=-I${SRCTOP}/contrib/libucl/include
 .PATH:	${SRCTOP}/contrib/libucl/include
 LIBADD=	archive fetch ucl sbuf crypto ssl
 
 .include <bsd.prog.mk>
Index: projects/clang900-import/usr.sbin/syslogd/syslog.conf
===================================================================
--- projects/clang900-import/usr.sbin/syslogd/syslog.conf	(revision 352536)
+++ projects/clang900-import/usr.sbin/syslogd/syslog.conf	(revision 352537)
@@ -1,34 +1,35 @@
 # $FreeBSD$
 #
 #	Spaces ARE valid field separators in this file. However,
 #	other *nix-like systems still insist on using tabs as field
 #	separators. If you are sharing this file between systems, you
 #	may want to use only tabs as field separators here.
 #	Consult the syslog.conf(5) manpage.
 *.err;kern.warning;auth.notice;mail.crit		/dev/console
 *.notice;authpriv.none;kern.debug;lpr.info;mail.crit;news.err	/var/log/messages
 security.*					/var/log/security
 auth.info;authpriv.info				/var/log/auth.log
 mail.info					/var/log/maillog
 cron.*						/var/log/cron
 !-devd
 *.=debug					/var/log/debug.log
 *.emerg						*
+daemon.info					/var/log/daemon.log
 # uncomment this to log all writes to /dev/console to /var/log/console.log
 # touch /var/log/console.log and chmod it to mode 600 before it will work
 #console.info					/var/log/console.log
 # uncomment this to enable logging of all log messages to /var/log/all.log
 # touch /var/log/all.log and chmod it to mode 600 before it will work
 #*.*						/var/log/all.log
 # uncomment this to enable logging to a remote loghost named loghost
 #*.*						@loghost
 # uncomment these if you're running inn
 # news.crit					/var/log/news/news.crit
 # news.err					/var/log/news/news.err
 # news.notice					/var/log/news/news.notice
 # Uncomment this if you wish to see messages produced by devd
 # !devd
 # *.>=notice					/var/log/devd.log
 !*
 include						/etc/syslog.d
 include						/usr/local/etc/syslog.d
Index: projects/clang900-import
===================================================================
--- projects/clang900-import	(revision 352536)
+++ projects/clang900-import	(revision 352537)

Property changes on: projects/clang900-import
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r352436-352536